125 lines
3.8 KiB
R
125 lines
3.8 KiB
R
# =============================================================================
|
||
# 03-attainment-ts.R · Educational attainment time-series from SCB PxWeb
|
||
# =============================================================================
|
||
#
|
||
# Table: UF0506B/Utbildning
|
||
# "Befolkning 16-74 år efter region, utbildningsnivå, ålder och kön. År 1985-2025"
|
||
#
|
||
# We fetch ages 25-64 (standard working-age window for education attainment),
|
||
# both genders, all 7 SUN education levels, for selected years.
|
||
# The API imposes an undocumented cell limit (~150k per request), so we
|
||
# batch across municipalities (50 per request).
|
||
#
|
||
# Education levels (SUN):
|
||
# 1-2 = förgymnasial (pre-secondary)
|
||
# 3-4 = gymnasial (upper secondary)
|
||
# 5-6 = eftergymnasial (post-secondary, non-doctoral)
|
||
# 7 = forskarutbildning (doctoral)
|
||
#
|
||
# Output: data/processed/attainment_ts.rds
|
||
# Columns: code, year, edu_level (1-7), n
|
||
# Also a summary: code, year, pct_postsec, n_25_64
|
||
|
||
library(tidyverse)
|
||
library(httr)
|
||
library(jsonlite)
|
||
|
||
SCB_URL <- "https://api.scb.se/OV0104/v1/doris/sv/ssd/START/UF/UF0506/UF0506B/Utbildning"
|
||
|
||
YEARS <- c("2000", "2005", "2010", "2015", "2022")
|
||
LEVELS <- as.list(as.character(1:7))
|
||
AGES <- as.list(as.character(25:64))
|
||
GENDERS <- list("1", "2")
|
||
|
||
# Fetch municipality codes from table metadata
|
||
meta <- fromJSON(content(GET(SCB_URL), "text", encoding = "UTF-8"))
|
||
munis <- Filter(\(x) nchar(x) == 4, meta$variables$values[[1]])
|
||
|
||
# Batch fetch: 50 municipalities per request
|
||
fetch_batch <- function(muni_batch) {
|
||
query <- list(
|
||
query = list(
|
||
list(
|
||
code = "Region",
|
||
selection = list(filter = "item", values = as.list(muni_batch))
|
||
),
|
||
list(code = "Alder", selection = list(filter = "item", values = AGES)),
|
||
list(
|
||
code = "UtbildningsNiva",
|
||
selection = list(filter = "item", values = LEVELS)
|
||
),
|
||
list(code = "Kon", selection = list(filter = "item", values = GENDERS)),
|
||
list(
|
||
code = "Tid",
|
||
selection = list(filter = "item", values = as.list(YEARS))
|
||
)
|
||
),
|
||
response = list(format = "json")
|
||
)
|
||
resp <- POST(
|
||
SCB_URL,
|
||
body = toJSON(query, auto_unbox = TRUE),
|
||
encode = "raw",
|
||
content_type("application/json"),
|
||
timeout(60)
|
||
)
|
||
if (status_code(resp) != 200) {
|
||
stop("HTTP ", status_code(resp), " for batch starting at ", muni_batch[1])
|
||
}
|
||
d <- fromJSON(content(resp, "text", encoding = "UTF-8"))$data
|
||
# key is a list of character vectors: [region, age, edu_level, gender, year]
|
||
keys <- do.call(rbind, d$key)
|
||
tibble(
|
||
code = keys[, 1],
|
||
age = as.integer(keys[, 2]),
|
||
edu_level = as.integer(keys[, 3]),
|
||
gender = as.integer(keys[, 4]),
|
||
year = as.integer(keys[, 5]),
|
||
n = as.integer(unlist(d$values))
|
||
)
|
||
}
|
||
|
||
batches <- split(munis, ceiling(seq_along(munis) / 50))
|
||
raw_list <- vector("list", length(batches))
|
||
|
||
for (i in seq_along(batches)) {
|
||
cat(
|
||
" Fetching batch",
|
||
i,
|
||
"/",
|
||
length(batches),
|
||
"(munis",
|
||
batches[[i]][1],
|
||
"–",
|
||
tail(batches[[i]], 1),
|
||
")\n"
|
||
)
|
||
raw_list[[i]] <- tryCatch(
|
||
fetch_batch(batches[[i]]),
|
||
error = function(e) {
|
||
message(" Batch ", i, " failed: ", conditionMessage(e))
|
||
NULL
|
||
}
|
||
)
|
||
Sys.sleep(0.3)
|
||
}
|
||
|
||
raw <- bind_rows(compact(raw_list))
|
||
|
||
# Aggregate: sum across ages and genders → n per (code, year, edu_level)
|
||
attainment <- raw |>
|
||
group_by(code, year, edu_level) |>
|
||
summarise(n = sum(n, na.rm = TRUE), .groups = "drop")
|
||
|
||
# Summary: % with post-secondary education among 25-64 year olds
|
||
attainment_summary <- attainment |>
|
||
group_by(code, year) |>
|
||
summarise(
|
||
n_total = sum(n),
|
||
n_postsec = sum(n[edu_level >= 5]),
|
||
pct_postsec = 100 * n_postsec / n_total,
|
||
.groups = "drop"
|
||
)
|
||
|
||
write_rds(attainment, "data/processed/attainment_ts.rds")
|
||
write_rds(attainment_summary, "data/processed/attainment_summary.rds")
|