# 00-Libraries ------------------------------------------- library(tidyverse) library(readxl) # 01-Import ---------------------------------------------- # Municipalities_db.xlsx has two title rows before the merged-cell header: # Row 1: "Municipalities Database" (title) # Row 2: blank # Row 3: category names (forward-filled across sub-columns) # Row 4: sub-category names # Row 5+: data header_rows <- read_xlsx( "data/Municipalities_db.xlsx", col_names = FALSE, n_max = 2, skip = 2 ) col_names <- header_rows |> t() |> as_tibble(.name_repair = "unique") |> set_names(c("category", "subcategory")) |> fill(category) |> mutate( col_name = if_else( is.na(subcategory), category, str_c(category, subcategory, sep = " - ") ) |> str_to_lower() |> str_replace_all("[^a-z0-9]+", "_") |> str_remove("_$") ) |> pull(col_name) municipalities_raw <- read_xlsx( "data/Municipalities_db.xlsx", skip = 4, col_names = col_names, col_types = "text" ) |> mutate(across(-c(code, municipality), \(x) suppressWarnings(as.numeric(x)))) write_rds(municipalities_raw, "data/processed/m_raw.rds")