# 00-Libraries ----------------------------------------------------------------- library(tidyverse) library(readxl) # 01-Import -------------------------------------------------------------------- # Municipalities_db.xlsx has two title rows before the merged-cell header: # Row 1: "Municipalities Database" (title) # Row 2: blank # Row 3: category names (forward-filled across sub-columns) # Row 4: sub-category names # Row 5+: data # I first extract the categories and subcategories header_rows <- read_xlsx( "data/Municipalities_db.xlsx", col_names = FALSE, n_max = 2, skip = 2 ) # Then, I reshape it as a list using a "category_subcategory" format col_names <- header_rows |> t() |> as_tibble(.name_repair = "unique") |> set_names(c("category", "subcategory")) |> fill(category) |> mutate( col_name = if_else( is.na(subcategory), category, str_c(category, subcategory, sep = " - ") ) |> str_to_lower() |> str_replace_all("[^a-z0-9]+", "_") |> str_remove("_$") ) |> pull(col_name) # Finally, I read the table skiping the first for rows and then append col_names municipalities_raw <- read_xlsx( "data/Municipalities_db.xlsx", skip = 4, col_names = col_names, col_types = "text" ) |> mutate(across(-c(code, municipality), \(x) suppressWarnings(as.numeric(x)))) # Write out as rds for future reading write_rds(municipalities_raw, "data/processed/m_raw.rds")