ruralitic-qrm/skolverket.R

313 lines
12 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# =============================================================================
# skolverket.R · Educational offer dataset by municipality
# =============================================================================
#
# Sources and coverage:
# 1. data/skolenhetsadresser.xlsx Skolverket school unit registry
# (current snapshot; all Skolverket-regulated institution types)
# Sheets: Förskoleklass, Grundskola, Anpassad grundskola, Specialskola,
# Sameskola, Gymnasieskola, Anpassad gymnasieskola, Komvux
# 2. Hardcoded list higher education institutions
# Source: UKÄ (Universitetskanslersämbetet) register, ~40 institutions.
# Each institution-municipality pair is one row; multi-campus institutions
# appear under every municipality that hosts a campus.
# 3. TODO: Yrkeshögskola MYH (Myndigheten för yrkeshögskolan)
# open data at myh.se; no REST API identified so far.
# 4. TODO: Folkhögskola Folkbildningsrådet register at
# folkbildning.se; ~155 institutions across ~100+ municipalities.
#
# Note: the Skolverket planned-educations API (api.skolverket.se) was
# explored but covers only the same types as the xlsx; it is used here
# purely as an optional check in section 03.
#
# Output:
# data/processed/edu_offer.rds : municipality × indicator/count (wide)
# data/processed/edu_offer.csv : same, plain text
library(tidyverse)
library(readxl)
# 00 Helpers -----------------------------------------------------------------
XLSX_PATH <- "data/skolenhetsadresser.xlsx"
# Standardise municipality code column to 4-char zero-padded string
add_muni_code <- function(df) {
df |>
rename(
muni_code = `BELÄGEN I KOMMUN (KOD)`,
muni_name = `BELÄGEN I KOMMUN (NAMN)`
) |>
mutate(muni_code = str_pad(as.character(muni_code), 4, "left", "0"))
}
# Public = Kommunal or Region (state-level public body); else private
categorise_ownership <- function(df) {
mutate(
df,
ownership = if_else(
HUVUDMANNATYP %in% c("Kommunal", "Region", "Statlig"),
"public",
"private"
)
)
}
# Count units by municipality and ownership, then pivot to n_public / n_private / n_total
count_units <- function(df, type_label) {
pivoted <- df |>
add_muni_code() |>
categorise_ownership() |>
count(muni_code, muni_name, ownership) |>
pivot_wider(names_from = ownership, values_from = n, values_fill = 0L)
# Ensure both columns exist even if one ownership type is absent
if (!"public" %in% names(pivoted)) {
pivoted$public <- 0L
}
if (!"private" %in% names(pivoted)) {
pivoted$private <- 0L
}
pivoted |>
mutate(
n_total = public + private,
type = type_label,
n_public = public,
n_private = private
) |>
select(muni_code, muni_name, type, n_total, n_public, n_private)
}
# 01 Read all xlsx sheets ----------------------------------------------------
sheet_map <- c(
"forskoleklass" = "Förskoleklass",
"grundskola" = "Grundskola",
"anpassad_grundskola" = "Anpassad grundskola",
"specialskola" = "Specialskola",
"sameskola" = "Sameskola",
"gymnasieskola" = "Gymnasieskola",
"anpassad_gymnasieskola" = "Anpassad gymnasieskola",
"komvux" = "Komvux"
)
raw <- imap(sheet_map, \(sheet_name, type_label) {
cat("Reading sheet:", sheet_name, "\n")
read_excel(XLSX_PATH, sheet = sheet_name)
})
# 02 Count institutions per municipality and ownership -----------------------
unit_counts <- imap_dfr(raw, \(df, type_label) count_units(df, type_label))
# Komvux: additionally extract SFI-offering units as a separate indicator.
# Column "SVENSKA FÖR INVANDRARE" = "J" means the unit offers SFI.
sfi_counts <- raw[["komvux"]] |>
add_muni_code() |>
categorise_ownership() |>
filter(`SVENSKA FÖR INVANDRARE` == "J") |>
count(muni_code, muni_name, ownership) |>
pivot_wider(names_from = ownership, values_from = n, values_fill = 0L) |>
(\(x) {
if (!"public" %in% names(x)) {
x$public <- 0L
}
x
})() |>
(\(x) {
if (!"private" %in% names(x)) {
x$private <- 0L
}
x
})() |>
mutate(
n_total = public + private,
type = "sfi",
n_public = public,
n_private = private
) |>
select(muni_code, muni_name, type, n_total, n_public, n_private)
unit_counts <- bind_rows(unit_counts, sfi_counts)
# 03 Skolverket API cross-check (optional) -----------------------------------
# The planned-educations API returns the same institution types as the xlsx.
# This block fetches the API data and reports any discrepancies between the two.
# Comment out if offline or if the xlsx is known to be current.
api_cross_check <- tryCatch(
{
cat("\nFetching Skolverket API for cross-check...\n")
base_url <- "https://api.skolverket.se/planned-educations/school-units"
fetch_page <- function(page) {
url <- paste0(base_url, "?page=", page, "&size=100")
resp <- readLines(url, warn = FALSE) |>
paste(collapse = "") |>
jsonlite::fromJSON()
resp$body
}
first <- fetch_page(0)
n_pages <- first$page$totalPages
cat(
" API reports",
first$page$totalElements,
"units across",
n_pages,
"pages\n"
)
all_pages <- map(0:(n_pages - 1), \(p) {
if (p %% 10 == 0) {
cat(" page", p, "/", n_pages, "\n")
}
fetch_page(p)$`_embedded`$listedSchoolUnits
})
api_df <- bind_rows(all_pages) |>
transmute(
muni_code = str_pad(as.character(geographicalAreaCode), 4, "left", "0"),
ownership = if_else(
principalOrganizerType %in% c("Kommunal", "Region", "Statlig"),
"public",
"private"
),
type = map_chr(typeOfSchooling, \(t) {
if (is.null(t) || nrow(t) == 0) {
return(NA_character_)
}
t$code[1]
})
) |>
filter(!is.na(type))
api_summary <- api_df |>
count(muni_code, type, ownership, name = "n_api") |>
mutate(
type = recode(
type,
fsk = "forskoleklass",
gr = "grundskola",
gran = "anpassad_grundskola",
sp = "specialskola",
sam = "sameskola",
gy = "gymnasieskola",
gyan = "anpassad_gymnasieskola",
vuxgy = "komvux",
vuxgr = "komvux",
sfi = "sfi"
)
)
cat(" API cross-check complete\n")
api_summary
},
error = function(e) {
message("API cross-check skipped: ", conditionMessage(e))
NULL
}
)
# 04 Higher education institutions (UKÄ list, hardcoded) ---------------------
# Source: UKÄ register of accredited Swedish higher education institutions.
# Each row = one institution × one municipality (multi-campus → multiple rows).
# Verify against: https://www.uka.se/om-oss/kontakt/larosaetenas-webbplatser.html
he_institutions <- tribble(
~institution , ~muni_code , ~type_he ,
# ---- State universities ----
"Uppsala University" , "0380" , "university" ,
"Stockholm University" , "0180" , "university" ,
"Lund University" , "1281" , "university" ,
"University of Gothenburg" , "1480" , "university" ,
"Umeå University" , "2480" , "university" ,
"Linköping University" , "0580" , "university" ,
"Örebro University" , "1880" , "university" ,
"Karlstad University" , "1780" , "university" ,
# ---- State specialised universities ----
"KTH Royal Institute of Technology" , "0180" , "university" ,
"Karolinska Institutet" , "0184" , "university" , # Solna
"Chalmers University of Technology" , "1480" , "university" , # private, state-grant
"SLU Uppsala" , "0380" , "university" ,
"SLU Umeå" , "2480" , "university" ,
"SLU Alnarp (Lomma)" , "1262" , "university" ,
"SLU Skara" , "1495" , "university" ,
# ---- State university colleges ----
"Blekinge Institute of Technology" , "1080" , "university_college" , # Karlskrona
"Dalarna University Falun" , "2080" , "university_college" ,
"Dalarna University Borlänge" , "2081" , "university_college" ,
"University of Gävle" , "2180" , "university_college" ,
"Halmstad University" , "1380" , "university_college" ,
"Kristianstad University" , "1290" , "university_college" ,
"Linnaeus University Växjö" , "0780" , "university_college" ,
"Linnaeus University Kalmar" , "0880" , "university_college" ,
"Malmö University" , "1280" , "university_college" ,
"Mälardalen University Västerås" , "1980" , "university_college" ,
"Mälardalen University Eskilstuna" , "0484" , "university_college" ,
"Mid Sweden University Sundsvall" , "2281" , "university_college" ,
"Mid Sweden University Östersund" , "2380" , "university_college" ,
"Södertörn University" , "0126" , "university_college" , # Huddinge
"University of Borås" , "1490" , "university_college" ,
"University of Skövde" , "1496" , "university_college" ,
"University West" , "1488" , "university_college" , # Trollhättan
# ---- Private accredited institutions ----
"Stockholm School of Economics" , "0180" , "university_college" ,
"Jönköping University" , "0680" , "university_college" ,
# ---- Art, music, design, sport ----
"Konstfack" , "0180" , "university_college" ,
"Royal University College of Music (KMH)" , "0180" , "university_college" ,
"Stockholm University of the Arts" , "0180" , "university_college" ,
"Royal Institute of Art" , "0180" , "university_college" ,
"Beckmans College of Design" , "0180" , "university_college" ,
"Swedish School of Sport and Health Sciences" , "0180" , "university_college" ,
# ---- Defence / health ----
"Swedish Defence University" , "0180" , "university_college" ,
"Sophiahemmet University" , "0180" , "university_college" ,
"Ersta Sköndal Bräcke University College" , "0180" , "university_college" ,
"Röda Korsets Högskola" , "0180" , "university_college" ,
"Newmaninstitutet" , "0380" , "university_college"
)
he_counts <- he_institutions |>
count(muni_code, type_he, name = "n_total") |>
rename(type = type_he) |>
# All Swedish HE institutions are state-funded or receive >90% public funding;
# public/private distinction used for school units does not apply here.
mutate(n_public = n_total, n_private = 0L, muni_name = NA_character_)
# 05 Combine all sources and reshape to wide ---------------------------------
long <- bind_rows(
unit_counts,
he_counts
)
# Load the municipality reference to fill in any missing names and ensure
# all 290 m_sample municipalities appear (with 0s for absent institution types)
munis <- readRDS("data/processed/m_sample.rds") |>
select(muni_code = code, muni_name_ref = municipality)
all_types <- unique(long$type)
wide <- munis |>
cross_join(tibble(type = all_types)) |>
left_join(
long |> select(muni_code, type, n_total, n_public, n_private),
by = c("muni_code", "type")
) |>
mutate(
n_total = replace_na(n_total, 0L),
n_public = replace_na(n_public, 0L),
n_private = replace_na(n_private, 0L)
) |>
pivot_wider(
names_from = type,
values_from = c(n_total, n_public, n_private),
names_glue = "{type}_{.value}"
) |>
rename(municipality = muni_name_ref, code = muni_code)
# 06 Save --------------------------------------------------------------------
write_rds(wide, "data/processed/edu_offer.rds")
write_csv(wide, "data/processed/edu_offer.csv")