ruralitic-qrm/skolverket.R

314 lines
12 KiB
R
Raw Normal View History

# =============================================================================
# skolverket.R · Educational offer dataset by municipality
# =============================================================================
#
# Sources and coverage:
# 1. data/skolenhetsadresser.xlsx Skolverket school unit registry
# (current snapshot; all Skolverket-regulated institution types)
# Sheets: Förskoleklass, Grundskola, Anpassad grundskola, Specialskola,
# Sameskola, Gymnasieskola, Anpassad gymnasieskola, Komvux
# 2. Hardcoded list higher education institutions
# Source: UKÄ (Universitetskanslersämbetet) register, ~40 institutions.
# Each institution-municipality pair is one row; multi-campus institutions
# appear under every municipality that hosts a campus.
# 3. TODO: Yrkeshögskola MYH (Myndigheten för yrkeshögskolan)
# open data at myh.se; no REST API identified so far.
# 4. TODO: Folkhögskola Folkbildningsrådet register at
# folkbildning.se; ~155 institutions across ~100+ municipalities.
#
# Note: the Skolverket planned-educations API (api.skolverket.se) was
# explored but covers only the same types as the xlsx; it is used here
# purely as an optional check in section 03.
#
# Output:
# data/processed/edu_offer.rds : municipality × indicator/count (wide)
# data/processed/edu_offer.csv : same, plain text
library(tidyverse)
library(readxl)
# 00 Helpers -----------------------------------------------------------------
XLSX_PATH <- "data/skolenhetsadresser.xlsx"
# Standardise municipality code column to 4-char zero-padded string
add_muni_code <- function(df) {
df |>
rename(
muni_code = `BELÄGEN I KOMMUN (KOD)`,
muni_name = `BELÄGEN I KOMMUN (NAMN)`
) |>
mutate(muni_code = str_pad(as.character(muni_code), 4, "left", "0"))
}
# Public = Kommunal or Region (state-level public body); else private
categorise_ownership <- function(df) {
mutate(
df,
ownership = if_else(
HUVUDMANNATYP %in% c("Kommunal", "Region", "Statlig"),
"public",
"private"
)
)
}
# Count units by municipality and ownership, then pivot to n_public / n_private / n_total
count_units <- function(df, type_label) {
pivoted <- df |>
add_muni_code() |>
categorise_ownership() |>
count(muni_code, muni_name, ownership) |>
pivot_wider(names_from = ownership, values_from = n, values_fill = 0L)
# Ensure both columns exist even if one ownership type is absent
if (!"public" %in% names(pivoted)) {
pivoted$public <- 0L
}
if (!"private" %in% names(pivoted)) {
pivoted$private <- 0L
}
pivoted |>
mutate(
n_total = public + private,
type = type_label,
n_public = public,
n_private = private
) |>
select(muni_code, muni_name, type, n_total, n_public, n_private)
}
# 01 Read all xlsx sheets ----------------------------------------------------
sheet_map <- c(
"forskoleklass" = "Förskoleklass",
"grundskola" = "Grundskola",
"anpassad_grundskola" = "Anpassad grundskola",
"specialskola" = "Specialskola",
"sameskola" = "Sameskola",
"gymnasieskola" = "Gymnasieskola",
"anpassad_gymnasieskola" = "Anpassad gymnasieskola",
"komvux" = "Komvux"
)
raw <- imap(sheet_map, \(sheet_name, type_label) {
cat("Reading sheet:", sheet_name, "\n")
read_excel(XLSX_PATH, sheet = sheet_name)
})
# 02 Count institutions per municipality and ownership -----------------------
unit_counts <- imap_dfr(raw, \(df, type_label) count_units(df, type_label))
# Komvux: additionally extract SFI-offering units as a separate indicator.
# Column "SVENSKA FÖR INVANDRARE" = "J" means the unit offers SFI.
sfi_counts <- raw[["komvux"]] |>
add_muni_code() |>
categorise_ownership() |>
filter(`SVENSKA FÖR INVANDRARE` == "J") |>
count(muni_code, muni_name, ownership) |>
pivot_wider(names_from = ownership, values_from = n, values_fill = 0L) |>
(\(x) {
if (!"public" %in% names(x)) {
x$public <- 0L
}
x
})() |>
(\(x) {
if (!"private" %in% names(x)) {
x$private <- 0L
}
x
})() |>
mutate(
n_total = public + private,
type = "sfi",
n_public = public,
n_private = private
) |>
select(muni_code, muni_name, type, n_total, n_public, n_private)
unit_counts <- bind_rows(unit_counts, sfi_counts)
# 03 Skolverket API cross-check (optional) -----------------------------------
# The planned-educations API returns the same institution types as the xlsx.
# This block fetches the API data and reports any discrepancies between the two.
# Comment out if offline or if the xlsx is known to be current.
api_cross_check <- tryCatch(
{
cat("\nFetching Skolverket API for cross-check...\n")
base_url <- "https://api.skolverket.se/planned-educations/school-units"
fetch_page <- function(page) {
url <- paste0(base_url, "?page=", page, "&size=100")
resp <- readLines(url, warn = FALSE) |>
paste(collapse = "") |>
jsonlite::fromJSON()
resp$body
}
first <- fetch_page(0)
n_pages <- first$page$totalPages
cat(
" API reports",
first$page$totalElements,
"units across",
n_pages,
"pages\n"
)
all_pages <- map(0:(n_pages - 1), \(p) {
if (p %% 10 == 0) {
cat(" page", p, "/", n_pages, "\n")
}
fetch_page(p)$`_embedded`$listedSchoolUnits
})
api_df <- bind_rows(all_pages) |>
transmute(
muni_code = str_pad(as.character(geographicalAreaCode), 4, "left", "0"),
ownership = if_else(
principalOrganizerType %in% c("Kommunal", "Region", "Statlig"),
"public",
"private"
),
type = map_chr(typeOfSchooling, \(t) {
if (is.null(t) || nrow(t) == 0) {
return(NA_character_)
}
t$code[1]
})
) |>
filter(!is.na(type))
api_summary <- api_df |>
count(muni_code, type, ownership, name = "n_api") |>
mutate(
type = recode(
type,
fsk = "forskoleklass",
gr = "grundskola",
gran = "anpassad_grundskola",
sp = "specialskola",
sam = "sameskola",
gy = "gymnasieskola",
gyan = "anpassad_gymnasieskola",
vuxgy = "komvux",
vuxgr = "komvux",
sfi = "sfi"
)
)
cat(" API cross-check complete\n")
api_summary
},
error = function(e) {
message("API cross-check skipped: ", conditionMessage(e))
NULL
}
)
# 04 Higher education institutions (UKÄ list, hardcoded) ---------------------
# Source: UKÄ register of accredited Swedish higher education institutions.
# Each row = one institution × one municipality (multi-campus → multiple rows).
# Verify against: https://www.uka.se/om-oss/kontakt/larosaetenas-webbplatser.html
he_institutions <- tribble(
~institution , ~muni_code , ~type_he ,
# ---- State universities ----
"Uppsala University" , "0380" , "university" ,
"Stockholm University" , "0180" , "university" ,
"Lund University" , "1281" , "university" ,
"University of Gothenburg" , "1480" , "university" ,
"Umeå University" , "2480" , "university" ,
"Linköping University" , "0580" , "university" ,
"Örebro University" , "1880" , "university" ,
"Karlstad University" , "1780" , "university" ,
# ---- State specialised universities ----
"KTH Royal Institute of Technology" , "0180" , "university" ,
"Karolinska Institutet" , "0184" , "university" , # Solna
"Chalmers University of Technology" , "1480" , "university" , # private, state-grant
"SLU Uppsala" , "0380" , "university" ,
"SLU Umeå" , "2480" , "university" ,
"SLU Alnarp (Lomma)" , "1262" , "university" ,
"SLU Skara" , "1495" , "university" ,
# ---- State university colleges ----
"Blekinge Institute of Technology" , "1080" , "university_college" , # Karlskrona
"Dalarna University Falun" , "2080" , "university_college" ,
"Dalarna University Borlänge" , "2081" , "university_college" ,
"University of Gävle" , "2180" , "university_college" ,
"Halmstad University" , "1380" , "university_college" ,
"Kristianstad University" , "1290" , "university_college" ,
"Linnaeus University Växjö" , "0780" , "university_college" ,
"Linnaeus University Kalmar" , "0880" , "university_college" ,
"Malmö University" , "1280" , "university_college" ,
"Mälardalen University Västerås" , "1980" , "university_college" ,
"Mälardalen University Eskilstuna" , "0484" , "university_college" ,
"Mid Sweden University Sundsvall" , "2281" , "university_college" ,
"Mid Sweden University Östersund" , "2380" , "university_college" ,
"Södertörn University" , "0126" , "university_college" , # Huddinge
"University of Borås" , "1490" , "university_college" ,
"University of Skövde" , "1496" , "university_college" ,
"University West" , "1488" , "university_college" , # Trollhättan
# ---- Private accredited institutions ----
"Stockholm School of Economics" , "0180" , "university_college" ,
"Jönköping University" , "0680" , "university_college" ,
# ---- Art, music, design, sport ----
"Konstfack" , "0180" , "university_college" ,
"Royal University College of Music (KMH)" , "0180" , "university_college" ,
"Stockholm University of the Arts" , "0180" , "university_college" ,
"Royal Institute of Art" , "0180" , "university_college" ,
"Beckmans College of Design" , "0180" , "university_college" ,
"Swedish School of Sport and Health Sciences" , "0180" , "university_college" ,
# ---- Defence / health ----
"Swedish Defence University" , "0180" , "university_college" ,
"Sophiahemmet University" , "0180" , "university_college" ,
"Ersta Sköndal Bräcke University College" , "0180" , "university_college" ,
"Röda Korsets Högskola" , "0180" , "university_college" ,
"Newmaninstitutet" , "0380" , "university_college"
)
he_counts <- he_institutions |>
count(muni_code, type_he, name = "n_total") |>
rename(type = type_he) |>
# All Swedish HE institutions are state-funded or receive >90% public funding;
# public/private distinction used for school units does not apply here.
mutate(n_public = n_total, n_private = 0L, muni_name = NA_character_)
# 05 Combine all sources and reshape to wide ---------------------------------
long <- bind_rows(
unit_counts,
he_counts
)
# Load the municipality reference to fill in any missing names and ensure
# all 290 m_sample municipalities appear (with 0s for absent institution types)
munis <- readRDS("data/processed/m_sample.rds") |>
select(muni_code = code, muni_name_ref = municipality)
all_types <- unique(long$type)
wide <- munis |>
cross_join(tibble(type = all_types)) |>
left_join(
long |> select(muni_code, type, n_total, n_public, n_private),
by = c("muni_code", "type")
) |>
mutate(
n_total = replace_na(n_total, 0L),
n_public = replace_na(n_public, 0L),
n_private = replace_na(n_private, 0L)
) |>
pivot_wider(
names_from = type,
values_from = c(n_total, n_public, n_private),
names_glue = "{type}_{.value}"
) |>
rename(municipality = muni_name_ref, code = muni_code)
# 06 Save --------------------------------------------------------------------
write_rds(wide, "data/processed/edu_offer.rds")
write_csv(wide, "data/processed/edu_offer.csv")