# ============================================================================= # skolverket.R · Educational offer dataset by municipality # ============================================================================= # # Sources and coverage: # 1. data/skolenhetsadresser.xlsx Skolverket school unit registry # (current snapshot; all Skolverket-regulated institution types) # Sheets: Förskoleklass, Grundskola, Anpassad grundskola, Specialskola, # Sameskola, Gymnasieskola, Anpassad gymnasieskola, Komvux # 2. Hardcoded list higher education institutions # Source: UKÄ (Universitetskanslersämbetet) register, ~40 institutions. # Each institution-municipality pair is one row; multi-campus institutions # appear under every municipality that hosts a campus. # 3. TODO: Yrkeshögskola MYH (Myndigheten för yrkeshögskolan) # open data at myh.se; no REST API identified so far. # 4. TODO: Folkhögskola Folkbildningsrådet register at # folkbildning.se; ~155 institutions across ~100+ municipalities. # # Note: the Skolverket planned-educations API (api.skolverket.se) was # explored but covers only the same types as the xlsx; it is used here # purely as an optional check in section 03. # # Output: # data/processed/edu_offer.rds : municipality × indicator/count (wide) # data/processed/edu_offer.csv : same, plain text library(tidyverse) library(readxl) # 00 – Helpers ----------------------------------------------------------------- XLSX_PATH <- "data/skolenhetsadresser.xlsx" # Standardise municipality code column to 4-char zero-padded string add_muni_code <- function(df) { df |> rename( muni_code = `BELÄGEN I KOMMUN (KOD)`, muni_name = `BELÄGEN I KOMMUN (NAMN)` ) |> mutate(muni_code = str_pad(as.character(muni_code), 4, "left", "0")) } # Public = Kommunal or Region (state-level public body); else private categorise_ownership <- function(df) { mutate( df, ownership = if_else( HUVUDMANNATYP %in% c("Kommunal", "Region", "Statlig"), "public", "private" ) ) } # Count units by municipality and ownership, then pivot to n_public / n_private / n_total count_units <- function(df, type_label) { pivoted <- df |> add_muni_code() |> categorise_ownership() |> count(muni_code, muni_name, ownership) |> pivot_wider(names_from = ownership, values_from = n, values_fill = 0L) # Ensure both columns exist even if one ownership type is absent if (!"public" %in% names(pivoted)) { pivoted$public <- 0L } if (!"private" %in% names(pivoted)) { pivoted$private <- 0L } pivoted |> mutate( n_total = public + private, type = type_label, n_public = public, n_private = private ) |> select(muni_code, muni_name, type, n_total, n_public, n_private) } # 01 – Read all xlsx sheets ---------------------------------------------------- sheet_map <- c( "forskoleklass" = "Förskoleklass", "grundskola" = "Grundskola", "anpassad_grundskola" = "Anpassad grundskola", "specialskola" = "Specialskola", "sameskola" = "Sameskola", "gymnasieskola" = "Gymnasieskola", "anpassad_gymnasieskola" = "Anpassad gymnasieskola", "komvux" = "Komvux" ) raw <- imap(sheet_map, \(sheet_name, type_label) { cat("Reading sheet:", sheet_name, "\n") read_excel(XLSX_PATH, sheet = sheet_name) }) # 02 – Count institutions per municipality and ownership ----------------------- unit_counts <- imap_dfr(raw, \(df, type_label) count_units(df, type_label)) # Komvux: additionally extract SFI-offering units as a separate indicator. # Column "SVENSKA FÖR INVANDRARE" = "J" means the unit offers SFI. sfi_counts <- raw[["komvux"]] |> add_muni_code() |> categorise_ownership() |> filter(`SVENSKA FÖR INVANDRARE` == "J") |> count(muni_code, muni_name, ownership) |> pivot_wider(names_from = ownership, values_from = n, values_fill = 0L) |> (\(x) { if (!"public" %in% names(x)) { x$public <- 0L } x })() |> (\(x) { if (!"private" %in% names(x)) { x$private <- 0L } x })() |> mutate( n_total = public + private, type = "sfi", n_public = public, n_private = private ) |> select(muni_code, muni_name, type, n_total, n_public, n_private) unit_counts <- bind_rows(unit_counts, sfi_counts) # 03 – Skolverket API cross-check (optional) ----------------------------------- # The planned-educations API returns the same institution types as the xlsx. # This block fetches the API data and reports any discrepancies between the two. # Comment out if offline or if the xlsx is known to be current. api_cross_check <- tryCatch( { cat("\nFetching Skolverket API for cross-check...\n") base_url <- "https://api.skolverket.se/planned-educations/school-units" fetch_page <- function(page) { url <- paste0(base_url, "?page=", page, "&size=100") resp <- readLines(url, warn = FALSE) |> paste(collapse = "") |> jsonlite::fromJSON() resp$body } first <- fetch_page(0) n_pages <- first$page$totalPages cat( " API reports", first$page$totalElements, "units across", n_pages, "pages\n" ) all_pages <- map(0:(n_pages - 1), \(p) { if (p %% 10 == 0) { cat(" page", p, "/", n_pages, "\n") } fetch_page(p)$`_embedded`$listedSchoolUnits }) api_df <- bind_rows(all_pages) |> transmute( muni_code = str_pad(as.character(geographicalAreaCode), 4, "left", "0"), ownership = if_else( principalOrganizerType %in% c("Kommunal", "Region", "Statlig"), "public", "private" ), type = map_chr(typeOfSchooling, \(t) { if (is.null(t) || nrow(t) == 0) { return(NA_character_) } t$code[1] }) ) |> filter(!is.na(type)) api_summary <- api_df |> count(muni_code, type, ownership, name = "n_api") |> mutate( type = recode( type, fsk = "forskoleklass", gr = "grundskola", gran = "anpassad_grundskola", sp = "specialskola", sam = "sameskola", gy = "gymnasieskola", gyan = "anpassad_gymnasieskola", vuxgy = "komvux", vuxgr = "komvux", sfi = "sfi" ) ) cat(" API cross-check complete\n") api_summary }, error = function(e) { message("API cross-check skipped: ", conditionMessage(e)) NULL } ) # 04 – Higher education institutions (UKÄ list, hardcoded) --------------------- # Source: UKÄ register of accredited Swedish higher education institutions. # Each row = one institution × one municipality (multi-campus → multiple rows). # Verify against: https://www.uka.se/om-oss/kontakt/larosaetenas-webbplatser.html he_institutions <- tribble( ~institution , ~muni_code , ~type_he , # ---- State universities ---- "Uppsala University" , "0380" , "university" , "Stockholm University" , "0180" , "university" , "Lund University" , "1281" , "university" , "University of Gothenburg" , "1480" , "university" , "Umeå University" , "2480" , "university" , "Linköping University" , "0580" , "university" , "Örebro University" , "1880" , "university" , "Karlstad University" , "1780" , "university" , # ---- State specialised universities ---- "KTH Royal Institute of Technology" , "0180" , "university" , "Karolinska Institutet" , "0184" , "university" , # Solna "Chalmers University of Technology" , "1480" , "university" , # private, state-grant "SLU – Uppsala" , "0380" , "university" , "SLU – Umeå" , "2480" , "university" , "SLU – Alnarp (Lomma)" , "1262" , "university" , "SLU – Skara" , "1495" , "university" , # ---- State university colleges ---- "Blekinge Institute of Technology" , "1080" , "university_college" , # Karlskrona "Dalarna University – Falun" , "2080" , "university_college" , "Dalarna University – Borlänge" , "2081" , "university_college" , "University of Gävle" , "2180" , "university_college" , "Halmstad University" , "1380" , "university_college" , "Kristianstad University" , "1290" , "university_college" , "Linnaeus University – Växjö" , "0780" , "university_college" , "Linnaeus University – Kalmar" , "0880" , "university_college" , "Malmö University" , "1280" , "university_college" , "Mälardalen University – Västerås" , "1980" , "university_college" , "Mälardalen University – Eskilstuna" , "0484" , "university_college" , "Mid Sweden University – Sundsvall" , "2281" , "university_college" , "Mid Sweden University – Östersund" , "2380" , "university_college" , "Södertörn University" , "0126" , "university_college" , # Huddinge "University of Borås" , "1490" , "university_college" , "University of Skövde" , "1496" , "university_college" , "University West" , "1488" , "university_college" , # Trollhättan # ---- Private accredited institutions ---- "Stockholm School of Economics" , "0180" , "university_college" , "Jönköping University" , "0680" , "university_college" , # ---- Art, music, design, sport ---- "Konstfack" , "0180" , "university_college" , "Royal University College of Music (KMH)" , "0180" , "university_college" , "Stockholm University of the Arts" , "0180" , "university_college" , "Royal Institute of Art" , "0180" , "university_college" , "Beckmans College of Design" , "0180" , "university_college" , "Swedish School of Sport and Health Sciences" , "0180" , "university_college" , # ---- Defence / health ---- "Swedish Defence University" , "0180" , "university_college" , "Sophiahemmet University" , "0180" , "university_college" , "Ersta Sköndal Bräcke University College" , "0180" , "university_college" , "Röda Korsets Högskola" , "0180" , "university_college" , "Newmaninstitutet" , "0380" , "university_college" ) he_counts <- he_institutions |> count(muni_code, type_he, name = "n_total") |> rename(type = type_he) |> # All Swedish HE institutions are state-funded or receive >90% public funding; # public/private distinction used for school units does not apply here. mutate(n_public = n_total, n_private = 0L, muni_name = NA_character_) # 05 – Combine all sources and reshape to wide --------------------------------- long <- bind_rows( unit_counts, he_counts ) # Load the municipality reference to fill in any missing names and ensure # all 290 m_sample municipalities appear (with 0s for absent institution types) munis <- readRDS("data/processed/m_sample.rds") |> select(muni_code = code, muni_name_ref = municipality) all_types <- unique(long$type) wide <- munis |> cross_join(tibble(type = all_types)) |> left_join( long |> select(muni_code, type, n_total, n_public, n_private), by = c("muni_code", "type") ) |> mutate( n_total = replace_na(n_total, 0L), n_public = replace_na(n_public, 0L), n_private = replace_na(n_private, 0L) ) |> pivot_wider( names_from = type, values_from = c(n_total, n_public, n_private), names_glue = "{type}_{.value}" ) |> rename(municipality = muni_name_ref, code = muni_code) # 06 – Save -------------------------------------------------------------------- write_rds(wide, "data/processed/edu_offer.rds") write_csv(wide, "data/processed/edu_offer.csv")