ruralitic-qrm/src/municipalities/00-import.R

47 lines
1.1 KiB
R
Raw Normal View History

2026-05-07 14:32:28 +02:00
# 00-Libraries -------------------------------------------
library(tidyverse)
library(readxl)
# 01-Import ----------------------------------------------
# Municipalities_db.xlsx has two title rows before the merged-cell header:
# Row 1: "Municipalities Database" (title)
# Row 2: blank
# Row 3: category names (forward-filled across sub-columns)
# Row 4: sub-category names
# Row 5+: data
header_rows <- read_xlsx(
"data/Municipalities_db.xlsx",
col_names = FALSE,
n_max = 2,
skip = 2
)
col_names <- header_rows |>
t() |>
as_tibble(.name_repair = "unique") |>
set_names(c("category", "subcategory")) |>
fill(category) |>
mutate(
col_name = if_else(
is.na(subcategory),
category,
str_c(category, subcategory, sep = " - ")
) |>
str_to_lower() |>
str_replace_all("[^a-z0-9]+", "_") |>
str_remove("_$")
) |>
pull(col_name)
municipalities_raw <- read_xlsx(
"data/Municipalities_db.xlsx",
skip = 4,
col_names = col_names,
col_types = "text"
) |>
mutate(across(-c(code, municipality), \(x) suppressWarnings(as.numeric(x))))
write_rds(municipalities_raw, "data/processed/m_raw.rds")