314 lines
12 KiB
R
314 lines
12 KiB
R
|
|
# =============================================================================
|
|||
|
|
# skolverket.R · Educational offer dataset by municipality
|
|||
|
|
# =============================================================================
|
|||
|
|
#
|
|||
|
|
# Sources and coverage:
|
|||
|
|
# 1. data/skolenhetsadresser.xlsx Skolverket school unit registry
|
|||
|
|
# (current snapshot; all Skolverket-regulated institution types)
|
|||
|
|
# Sheets: Förskoleklass, Grundskola, Anpassad grundskola, Specialskola,
|
|||
|
|
# Sameskola, Gymnasieskola, Anpassad gymnasieskola, Komvux
|
|||
|
|
# 2. Hardcoded list higher education institutions
|
|||
|
|
# Source: UKÄ (Universitetskanslersämbetet) register, ~40 institutions.
|
|||
|
|
# Each institution-municipality pair is one row; multi-campus institutions
|
|||
|
|
# appear under every municipality that hosts a campus.
|
|||
|
|
# 3. TODO: Yrkeshögskola MYH (Myndigheten för yrkeshögskolan)
|
|||
|
|
# open data at myh.se; no REST API identified so far.
|
|||
|
|
# 4. TODO: Folkhögskola Folkbildningsrådet register at
|
|||
|
|
# folkbildning.se; ~155 institutions across ~100+ municipalities.
|
|||
|
|
#
|
|||
|
|
# Note: the Skolverket planned-educations API (api.skolverket.se) was
|
|||
|
|
# explored but covers only the same types as the xlsx; it is used here
|
|||
|
|
# purely as an optional check in section 03.
|
|||
|
|
#
|
|||
|
|
# Output:
|
|||
|
|
# data/processed/edu_offer.rds : municipality × indicator/count (wide)
|
|||
|
|
# data/processed/edu_offer.csv : same, plain text
|
|||
|
|
|
|||
|
|
library(tidyverse)
|
|||
|
|
library(readxl)
|
|||
|
|
|
|||
|
|
# 00 – Helpers -----------------------------------------------------------------
|
|||
|
|
|
|||
|
|
XLSX_PATH <- "data/skolenhetsadresser.xlsx"
|
|||
|
|
|
|||
|
|
# Standardise municipality code column to 4-char zero-padded string
|
|||
|
|
add_muni_code <- function(df) {
|
|||
|
|
df |>
|
|||
|
|
rename(
|
|||
|
|
muni_code = `BELÄGEN I KOMMUN (KOD)`,
|
|||
|
|
muni_name = `BELÄGEN I KOMMUN (NAMN)`
|
|||
|
|
) |>
|
|||
|
|
mutate(muni_code = str_pad(as.character(muni_code), 4, "left", "0"))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Public = Kommunal or Region (state-level public body); else private
|
|||
|
|
categorise_ownership <- function(df) {
|
|||
|
|
mutate(
|
|||
|
|
df,
|
|||
|
|
ownership = if_else(
|
|||
|
|
HUVUDMANNATYP %in% c("Kommunal", "Region", "Statlig"),
|
|||
|
|
"public",
|
|||
|
|
"private"
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Count units by municipality and ownership, then pivot to n_public / n_private / n_total
|
|||
|
|
count_units <- function(df, type_label) {
|
|||
|
|
pivoted <- df |>
|
|||
|
|
add_muni_code() |>
|
|||
|
|
categorise_ownership() |>
|
|||
|
|
count(muni_code, muni_name, ownership) |>
|
|||
|
|
pivot_wider(names_from = ownership, values_from = n, values_fill = 0L)
|
|||
|
|
# Ensure both columns exist even if one ownership type is absent
|
|||
|
|
if (!"public" %in% names(pivoted)) {
|
|||
|
|
pivoted$public <- 0L
|
|||
|
|
}
|
|||
|
|
if (!"private" %in% names(pivoted)) {
|
|||
|
|
pivoted$private <- 0L
|
|||
|
|
}
|
|||
|
|
pivoted |>
|
|||
|
|
mutate(
|
|||
|
|
n_total = public + private,
|
|||
|
|
type = type_label,
|
|||
|
|
n_public = public,
|
|||
|
|
n_private = private
|
|||
|
|
) |>
|
|||
|
|
select(muni_code, muni_name, type, n_total, n_public, n_private)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 01 – Read all xlsx sheets ----------------------------------------------------
|
|||
|
|
|
|||
|
|
sheet_map <- c(
|
|||
|
|
"forskoleklass" = "Förskoleklass",
|
|||
|
|
"grundskola" = "Grundskola",
|
|||
|
|
"anpassad_grundskola" = "Anpassad grundskola",
|
|||
|
|
"specialskola" = "Specialskola",
|
|||
|
|
"sameskola" = "Sameskola",
|
|||
|
|
"gymnasieskola" = "Gymnasieskola",
|
|||
|
|
"anpassad_gymnasieskola" = "Anpassad gymnasieskola",
|
|||
|
|
"komvux" = "Komvux"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
raw <- imap(sheet_map, \(sheet_name, type_label) {
|
|||
|
|
cat("Reading sheet:", sheet_name, "\n")
|
|||
|
|
read_excel(XLSX_PATH, sheet = sheet_name)
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 02 – Count institutions per municipality and ownership -----------------------
|
|||
|
|
|
|||
|
|
unit_counts <- imap_dfr(raw, \(df, type_label) count_units(df, type_label))
|
|||
|
|
|
|||
|
|
# Komvux: additionally extract SFI-offering units as a separate indicator.
|
|||
|
|
# Column "SVENSKA FÖR INVANDRARE" = "J" means the unit offers SFI.
|
|||
|
|
sfi_counts <- raw[["komvux"]] |>
|
|||
|
|
add_muni_code() |>
|
|||
|
|
categorise_ownership() |>
|
|||
|
|
filter(`SVENSKA FÖR INVANDRARE` == "J") |>
|
|||
|
|
count(muni_code, muni_name, ownership) |>
|
|||
|
|
pivot_wider(names_from = ownership, values_from = n, values_fill = 0L) |>
|
|||
|
|
(\(x) {
|
|||
|
|
if (!"public" %in% names(x)) {
|
|||
|
|
x$public <- 0L
|
|||
|
|
}
|
|||
|
|
x
|
|||
|
|
})() |>
|
|||
|
|
(\(x) {
|
|||
|
|
if (!"private" %in% names(x)) {
|
|||
|
|
x$private <- 0L
|
|||
|
|
}
|
|||
|
|
x
|
|||
|
|
})() |>
|
|||
|
|
mutate(
|
|||
|
|
n_total = public + private,
|
|||
|
|
type = "sfi",
|
|||
|
|
n_public = public,
|
|||
|
|
n_private = private
|
|||
|
|
) |>
|
|||
|
|
select(muni_code, muni_name, type, n_total, n_public, n_private)
|
|||
|
|
|
|||
|
|
unit_counts <- bind_rows(unit_counts, sfi_counts)
|
|||
|
|
|
|||
|
|
# 03 – Skolverket API cross-check (optional) -----------------------------------
|
|||
|
|
# The planned-educations API returns the same institution types as the xlsx.
|
|||
|
|
# This block fetches the API data and reports any discrepancies between the two.
|
|||
|
|
# Comment out if offline or if the xlsx is known to be current.
|
|||
|
|
|
|||
|
|
api_cross_check <- tryCatch(
|
|||
|
|
{
|
|||
|
|
cat("\nFetching Skolverket API for cross-check...\n")
|
|||
|
|
base_url <- "https://api.skolverket.se/planned-educations/school-units"
|
|||
|
|
|
|||
|
|
fetch_page <- function(page) {
|
|||
|
|
url <- paste0(base_url, "?page=", page, "&size=100")
|
|||
|
|
resp <- readLines(url, warn = FALSE) |>
|
|||
|
|
paste(collapse = "") |>
|
|||
|
|
jsonlite::fromJSON()
|
|||
|
|
resp$body
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
first <- fetch_page(0)
|
|||
|
|
n_pages <- first$page$totalPages
|
|||
|
|
cat(
|
|||
|
|
" API reports",
|
|||
|
|
first$page$totalElements,
|
|||
|
|
"units across",
|
|||
|
|
n_pages,
|
|||
|
|
"pages\n"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
all_pages <- map(0:(n_pages - 1), \(p) {
|
|||
|
|
if (p %% 10 == 0) {
|
|||
|
|
cat(" page", p, "/", n_pages, "\n")
|
|||
|
|
}
|
|||
|
|
fetch_page(p)$`_embedded`$listedSchoolUnits
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
api_df <- bind_rows(all_pages) |>
|
|||
|
|
transmute(
|
|||
|
|
muni_code = str_pad(as.character(geographicalAreaCode), 4, "left", "0"),
|
|||
|
|
ownership = if_else(
|
|||
|
|
principalOrganizerType %in% c("Kommunal", "Region", "Statlig"),
|
|||
|
|
"public",
|
|||
|
|
"private"
|
|||
|
|
),
|
|||
|
|
type = map_chr(typeOfSchooling, \(t) {
|
|||
|
|
if (is.null(t) || nrow(t) == 0) {
|
|||
|
|
return(NA_character_)
|
|||
|
|
}
|
|||
|
|
t$code[1]
|
|||
|
|
})
|
|||
|
|
) |>
|
|||
|
|
filter(!is.na(type))
|
|||
|
|
|
|||
|
|
api_summary <- api_df |>
|
|||
|
|
count(muni_code, type, ownership, name = "n_api") |>
|
|||
|
|
mutate(
|
|||
|
|
type = recode(
|
|||
|
|
type,
|
|||
|
|
fsk = "forskoleklass",
|
|||
|
|
gr = "grundskola",
|
|||
|
|
gran = "anpassad_grundskola",
|
|||
|
|
sp = "specialskola",
|
|||
|
|
sam = "sameskola",
|
|||
|
|
gy = "gymnasieskola",
|
|||
|
|
gyan = "anpassad_gymnasieskola",
|
|||
|
|
vuxgy = "komvux",
|
|||
|
|
vuxgr = "komvux",
|
|||
|
|
sfi = "sfi"
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
cat(" API cross-check complete\n")
|
|||
|
|
api_summary
|
|||
|
|
},
|
|||
|
|
error = function(e) {
|
|||
|
|
message("API cross-check skipped: ", conditionMessage(e))
|
|||
|
|
NULL
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 04 – Higher education institutions (UKÄ list, hardcoded) ---------------------
|
|||
|
|
# Source: UKÄ register of accredited Swedish higher education institutions.
|
|||
|
|
# Each row = one institution × one municipality (multi-campus → multiple rows).
|
|||
|
|
# Verify against: https://www.uka.se/om-oss/kontakt/larosaetenas-webbplatser.html
|
|||
|
|
|
|||
|
|
he_institutions <- tribble(
|
|||
|
|
~institution , ~muni_code , ~type_he ,
|
|||
|
|
# ---- State universities ----
|
|||
|
|
"Uppsala University" , "0380" , "university" ,
|
|||
|
|
"Stockholm University" , "0180" , "university" ,
|
|||
|
|
"Lund University" , "1281" , "university" ,
|
|||
|
|
"University of Gothenburg" , "1480" , "university" ,
|
|||
|
|
"Umeå University" , "2480" , "university" ,
|
|||
|
|
"Linköping University" , "0580" , "university" ,
|
|||
|
|
"Örebro University" , "1880" , "university" ,
|
|||
|
|
"Karlstad University" , "1780" , "university" ,
|
|||
|
|
# ---- State specialised universities ----
|
|||
|
|
"KTH Royal Institute of Technology" , "0180" , "university" ,
|
|||
|
|
"Karolinska Institutet" , "0184" , "university" , # Solna
|
|||
|
|
"Chalmers University of Technology" , "1480" , "university" , # private, state-grant
|
|||
|
|
"SLU – Uppsala" , "0380" , "university" ,
|
|||
|
|
"SLU – Umeå" , "2480" , "university" ,
|
|||
|
|
"SLU – Alnarp (Lomma)" , "1262" , "university" ,
|
|||
|
|
"SLU – Skara" , "1495" , "university" ,
|
|||
|
|
# ---- State university colleges ----
|
|||
|
|
"Blekinge Institute of Technology" , "1080" , "university_college" , # Karlskrona
|
|||
|
|
"Dalarna University – Falun" , "2080" , "university_college" ,
|
|||
|
|
"Dalarna University – Borlänge" , "2081" , "university_college" ,
|
|||
|
|
"University of Gävle" , "2180" , "university_college" ,
|
|||
|
|
"Halmstad University" , "1380" , "university_college" ,
|
|||
|
|
"Kristianstad University" , "1290" , "university_college" ,
|
|||
|
|
"Linnaeus University – Växjö" , "0780" , "university_college" ,
|
|||
|
|
"Linnaeus University – Kalmar" , "0880" , "university_college" ,
|
|||
|
|
"Malmö University" , "1280" , "university_college" ,
|
|||
|
|
"Mälardalen University – Västerås" , "1980" , "university_college" ,
|
|||
|
|
"Mälardalen University – Eskilstuna" , "0484" , "university_college" ,
|
|||
|
|
"Mid Sweden University – Sundsvall" , "2281" , "university_college" ,
|
|||
|
|
"Mid Sweden University – Östersund" , "2380" , "university_college" ,
|
|||
|
|
"Södertörn University" , "0126" , "university_college" , # Huddinge
|
|||
|
|
"University of Borås" , "1490" , "university_college" ,
|
|||
|
|
"University of Skövde" , "1496" , "university_college" ,
|
|||
|
|
"University West" , "1488" , "university_college" , # Trollhättan
|
|||
|
|
# ---- Private accredited institutions ----
|
|||
|
|
"Stockholm School of Economics" , "0180" , "university_college" ,
|
|||
|
|
"Jönköping University" , "0680" , "university_college" ,
|
|||
|
|
# ---- Art, music, design, sport ----
|
|||
|
|
"Konstfack" , "0180" , "university_college" ,
|
|||
|
|
"Royal University College of Music (KMH)" , "0180" , "university_college" ,
|
|||
|
|
"Stockholm University of the Arts" , "0180" , "university_college" ,
|
|||
|
|
"Royal Institute of Art" , "0180" , "university_college" ,
|
|||
|
|
"Beckmans College of Design" , "0180" , "university_college" ,
|
|||
|
|
"Swedish School of Sport and Health Sciences" , "0180" , "university_college" ,
|
|||
|
|
# ---- Defence / health ----
|
|||
|
|
"Swedish Defence University" , "0180" , "university_college" ,
|
|||
|
|
"Sophiahemmet University" , "0180" , "university_college" ,
|
|||
|
|
"Ersta Sköndal Bräcke University College" , "0180" , "university_college" ,
|
|||
|
|
"Röda Korsets Högskola" , "0180" , "university_college" ,
|
|||
|
|
"Newmaninstitutet" , "0380" , "university_college"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
he_counts <- he_institutions |>
|
|||
|
|
count(muni_code, type_he, name = "n_total") |>
|
|||
|
|
rename(type = type_he) |>
|
|||
|
|
# All Swedish HE institutions are state-funded or receive >90% public funding;
|
|||
|
|
# public/private distinction used for school units does not apply here.
|
|||
|
|
mutate(n_public = n_total, n_private = 0L, muni_name = NA_character_)
|
|||
|
|
|
|||
|
|
# 05 – Combine all sources and reshape to wide ---------------------------------
|
|||
|
|
|
|||
|
|
long <- bind_rows(
|
|||
|
|
unit_counts,
|
|||
|
|
he_counts
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Load the municipality reference to fill in any missing names and ensure
|
|||
|
|
# all 290 m_sample municipalities appear (with 0s for absent institution types)
|
|||
|
|
munis <- readRDS("data/processed/m_sample.rds") |>
|
|||
|
|
select(muni_code = code, muni_name_ref = municipality)
|
|||
|
|
|
|||
|
|
all_types <- unique(long$type)
|
|||
|
|
|
|||
|
|
wide <- munis |>
|
|||
|
|
cross_join(tibble(type = all_types)) |>
|
|||
|
|
left_join(
|
|||
|
|
long |> select(muni_code, type, n_total, n_public, n_private),
|
|||
|
|
by = c("muni_code", "type")
|
|||
|
|
) |>
|
|||
|
|
mutate(
|
|||
|
|
n_total = replace_na(n_total, 0L),
|
|||
|
|
n_public = replace_na(n_public, 0L),
|
|||
|
|
n_private = replace_na(n_private, 0L)
|
|||
|
|
) |>
|
|||
|
|
pivot_wider(
|
|||
|
|
names_from = type,
|
|||
|
|
values_from = c(n_total, n_public, n_private),
|
|||
|
|
names_glue = "{type}_{.value}"
|
|||
|
|
) |>
|
|||
|
|
rename(municipality = muni_name_ref, code = muni_code)
|
|||
|
|
|
|||
|
|
# 06 – Save --------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
write_rds(wide, "data/processed/edu_offer.rds")
|
|||
|
|
write_csv(wide, "data/processed/edu_offer.csv")
|