This commit is contained in:
Pablo Antonio Lillo Cea 2026-05-08 10:12:33 +02:00
parent 1631adfb02
commit eb093605b3
2 changed files with 79 additions and 64 deletions

View file

@ -57,7 +57,9 @@ truly_exclude <- c(
"land_area_ha", "land_area_ha",
all_vars[str_detect(all_vars, "^type_of_land_")], all_vars[str_detect(all_vars, "^type_of_land_")],
all_vars[str_detect(all_vars, "^use_of_land_")], all_vars[str_detect(all_vars, "^use_of_land_")],
"forest", "open_land", "total_green_space", "forest",
"open_land",
"total_green_space",
all_vars[str_detect(all_vars, "^inland_water")], all_vars[str_detect(all_vars, "^inland_water")],
all_vars[str_detect(all_vars, "^seawater")], all_vars[str_detect(all_vars, "^seawater")],
all_vars[str_detect(all_vars, "^the_four_large")], all_vars[str_detect(all_vars, "^the_four_large")],
@ -90,19 +92,30 @@ col_sup_politics <- analysis_vars[
# rather than resident persons. Projecting them as supplementary shows how # rather than resident persons. Projecting them as supplementary shows how
# they relate to the population-composition space without distorting it. # they relate to the population-composition space without distorting it.
col_sup_infra <- analysis_vars[ col_sup_infra <- analysis_vars[
str_detect(analysis_vars, str_detect(
"^number_of_(rented|tenant_owned|owner_occupied)_dwellings") | analysis_vars,
"^number_of_(rented|tenant_owned|owner_occupied)_dwellings"
) |
str_detect(analysis_vars, "^number_of_registered_passenger_cars_") | str_detect(analysis_vars, "^number_of_registered_passenger_cars_") |
str_detect(analysis_vars, "^workplaces_") | str_detect(analysis_vars, "^workplaces_") |
str_detect(analysis_vars, "^agricultural_enterprises_") | str_detect(analysis_vars, "^agricultural_enterprises_") |
str_detect(analysis_vars, "^livestock_") | str_detect(analysis_vars, "^livestock_") |
analysis_vars %in% c( analysis_vars %in%
"sex_men", "sex_women", c(
"employment_by_gender_men", "employment_by_gender_women", "sex_men",
"number_of_inmigrations", "number_of_outmigrations", "sex_women",
"births", "deaths", "marriages", "divorces", "employment_by_gender_men",
"buildings", "buildings_for_seasonal_use", "employment_by_gender_women",
"concentrations_of_holiday_homes", "holiday_home_areas", "number_of_inmigrations",
"number_of_outmigrations",
"births",
"deaths",
"marriages",
"divorces",
"buildings",
"buildings_for_seasonal_use",
"concentrations_of_holiday_homes",
"holiday_home_areas",
"social_assistance_number_of_receiver_households", "social_assistance_number_of_receiver_households",
"urban_residences_proximity_to_public_green_areas_500_meters_or_less", "urban_residences_proximity_to_public_green_areas_500_meters_or_less",
"number_of_localities" "number_of_localities"
@ -129,14 +142,6 @@ active_vars <- analysis_vars[
# Everything else → post-hoc correlations with CA dimensions # Everything else → post-hoc correlations with CA dimensions
outside_ca <- setdiff(analysis_vars, c(active_vars, col_sup_vars)) outside_ca <- setdiff(analysis_vars, c(active_vars, col_sup_vars))
cat(
"Active (person-count population composition): ", length(active_vars), "\n",
"col.sup educational provision: ", length(col_sup_edu), "\n",
"col.sup political vote counts: ", length(col_sup_politics), "\n",
"col.sup infrastructure / event counts: ", length(col_sup_infra), "\n",
"Outside CA (rates / continuous / other): ", length(outside_ca), "\n"
)
# 03-Build CA matrix ----------------------------------------------------------- # 03-Build CA matrix -----------------------------------------------------------
X <- m_sample |> X <- m_sample |>
select(all_of(c(active_vars, col_sup_vars))) |> select(all_of(c(active_vars, col_sup_vars))) |>
@ -154,9 +159,6 @@ idx_sup <- seq(length(active_vars) + 1L, ncol(X))
# 04-Run CA -------------------------------------------------------------------- # 04-Run CA --------------------------------------------------------------------
ca <- CA(X, ncp = 10, col.sup = idx_sup, graph = FALSE) ca <- CA(X, ncp = 10, col.sup = idx_sup, graph = FALSE)
cat("\nEigenvalues (first 10 dimensions):\n")
print(round(ca$eig[1:10, ], 3))
contribs <- ca$col$contrib |> contribs <- ca$col$contrib |>
as.data.frame() |> as.data.frame() |>
rownames_to_column("variable") rownames_to_column("variable")
@ -177,7 +179,11 @@ outside_data <- m_sample |>
replace_na(x, if (is.finite(m)) m else 0) replace_na(x, if (is.finite(m)) m else 0)
})) }))
posthoc_cor <- cor(ca_row_coords, outside_data, use = "pairwise.complete.obs") |> posthoc_cor <- cor(
ca_row_coords,
outside_data,
use = "pairwise.complete.obs"
) |>
as.data.frame() |> as.data.frame() |>
rownames_to_column("dimension") rownames_to_column("dimension")

View file

@ -40,16 +40,20 @@ cat("Municipalities to fetch:", length(munis), "\n")
fetch_batch <- function(muni_batch) { fetch_batch <- function(muni_batch) {
query <- list( query <- list(
query = list( query = list(
list(code = "Region", list(
selection = list(filter = "item", values = as.list(muni_batch))), code = "Region",
list(code = "Alder", selection = list(filter = "item", values = as.list(muni_batch))
selection = list(filter = "item", values = AGES)), ),
list(code = "UtbildningsNiva", list(code = "Alder", selection = list(filter = "item", values = AGES)),
selection = list(filter = "item", values = LEVELS)), list(
list(code = "Kon", code = "UtbildningsNiva",
selection = list(filter = "item", values = GENDERS)), selection = list(filter = "item", values = LEVELS)
list(code = "Tid", ),
selection = list(filter = "item", values = as.list(YEARS))) list(code = "Kon", selection = list(filter = "item", values = GENDERS)),
list(
code = "Tid",
selection = list(filter = "item", values = as.list(YEARS))
)
), ),
response = list(format = "json") response = list(format = "json")
) )
@ -80,8 +84,17 @@ batches <- split(munis, ceiling(seq_along(munis) / 50))
raw_list <- vector("list", length(batches)) raw_list <- vector("list", length(batches))
for (i in seq_along(batches)) { for (i in seq_along(batches)) {
cat(" Fetching batch", i, "/", length(batches), cat(
"(munis", batches[[i]][1], "", tail(batches[[i]], 1), ")\n") " Fetching batch",
i,
"/",
length(batches),
"(munis",
batches[[i]][1],
"",
tail(batches[[i]], 1),
")\n"
)
raw_list[[i]] <- tryCatch( raw_list[[i]] <- tryCatch(
fetch_batch(batches[[i]]), fetch_batch(batches[[i]]),
error = function(e) { error = function(e) {
@ -93,7 +106,6 @@ for (i in seq_along(batches)) {
} }
raw <- bind_rows(compact(raw_list)) raw <- bind_rows(compact(raw_list))
cat("Total rows fetched:", nrow(raw), "\n")
# Aggregate: sum across ages and genders → n per (code, year, edu_level) # Aggregate: sum across ages and genders → n per (code, year, edu_level)
attainment <- raw |> attainment <- raw |>
@ -112,6 +124,3 @@ attainment_summary <- attainment |>
write_rds(attainment, "data/processed/attainment_ts.rds") write_rds(attainment, "data/processed/attainment_ts.rds")
write_rds(attainment_summary, "data/processed/attainment_summary.rds") write_rds(attainment_summary, "data/processed/attainment_summary.rds")
cat("Saved attainment_ts.rds and attainment_summary.rds\n")
cat("Years:", paste(sort(unique(attainment$year)), collapse = ", "), "\n")
cat("Municipalities:", n_distinct(attainment$code), "\n")