aggregate counties
This commit is contained in:
parent
5a991f1e0e
commit
c724bba819
4 changed files with 1465 additions and 0 deletions
22
data/processed/edu_offer_county.csv
Normal file
22
data/processed/edu_offer_county.csv
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
county_code,county,forskoleklass_n_total,grundskola_n_total,anpassad_grundskola_n_total,specialskola_n_total,sameskola_n_total,gymnasieskola_n_total,anpassad_gymnasieskola_n_total,komvux_n_total,sfi_n_total,university_n_total,university_college_n_total,forskoleklass_n_public,grundskola_n_public,anpassad_grundskola_n_public,specialskola_n_public,sameskola_n_public,gymnasieskola_n_public,anpassad_gymnasieskola_n_public,komvux_n_public,sfi_n_public,university_n_public,university_college_n_public,forskoleklass_n_private,grundskola_n_private,anpassad_grundskola_n_private,specialskola_n_private,sameskola_n_private,gymnasieskola_n_private,anpassad_gymnasieskola_n_private,komvux_n_private,sfi_n_private,university_n_private,university_college_n_private
|
||||||
|
01,Stockholm,632,755,139,3,0,209,48,66,43,3,12,429,481,109,3,0,78,28,65,43,3,12,203,274,30,0,0,131,20,1,0,0,0
|
||||||
|
03,Uppsala,119,148,16,0,0,34,8,23,12,2,1,101,117,13,0,0,15,5,23,12,2,1,18,31,3,0,0,19,3,0,0,0,0
|
||||||
|
04,Södermanland,103,123,24,0,0,32,10,19,9,0,1,84,95,24,0,0,20,8,19,9,0,1,19,28,0,0,0,12,2,0,0,0,0
|
||||||
|
05,Östergötland,155,237,32,0,0,73,15,31,14,1,0,138,206,30,0,0,56,14,31,14,1,0,17,31,2,0,0,17,1,0,0,0,0
|
||||||
|
06,Jönköping,142,196,35,0,0,56,13,29,13,0,1,133,182,31,0,0,46,11,29,13,0,1,9,14,4,0,0,10,2,0,0,0,0
|
||||||
|
07,Kronoberg,91,122,19,0,0,34,6,19,8,0,1,80,107,19,0,0,25,6,19,8,0,1,11,15,0,0,0,9,0,0,0,0,0
|
||||||
|
08,Kalmar,103,132,18,0,0,29,4,19,10,0,1,95,119,17,0,0,22,4,19,10,0,1,8,13,1,0,0,7,0,0,0,0,0
|
||||||
|
09,Gotland,33,37,3,0,0,6,1,2,2,0,0,29,33,3,0,0,5,1,2,2,0,0,4,4,0,0,0,1,0,0,0,0,0
|
||||||
|
10,Blekinge,64,81,7,0,0,21,4,12,5,0,1,52,68,7,0,0,19,4,12,5,0,1,12,13,0,0,0,2,0,0,0,0,0
|
||||||
|
12,Skåne,453,588,73,1,0,151,26,102,46,2,2,359,471,68,1,0,78,23,102,46,2,2,94,117,5,0,0,73,3,0,0,0,0
|
||||||
|
13,Halland,122,199,14,0,0,53,8,13,7,0,1,104,177,13,0,0,38,8,13,7,0,1,18,22,1,0,0,15,0,0,0,0,0
|
||||||
|
14,Västra Götaland,565,811,145,1,0,255,41,142,68,3,3,482,707,138,1,0,186,37,142,68,3,3,83,104,7,0,0,69,4,0,0,0,0
|
||||||
|
17,Värmland,103,154,37,0,0,40,11,29,14,1,0,93,140,36,0,0,28,11,29,14,1,0,10,14,1,0,0,12,0,0,0,0,0
|
||||||
|
18,Örebro,105,140,19,3,0,52,7,38,12,1,0,93,121,19,3,0,40,6,38,12,1,0,12,19,0,0,0,12,1,0,0,0,0
|
||||||
|
19,Västmanland,91,119,25,0,0,34,10,22,10,0,1,78,99,24,0,0,18,9,22,10,0,1,13,20,1,0,0,16,1,0,0,0,0
|
||||||
|
20,Dalarna,125,152,28,0,0,30,10,22,11,0,2,113,136,27,0,0,21,9,22,11,0,2,12,16,1,0,0,9,1,0,0,0,0
|
||||||
|
21,Gävleborg,111,141,17,0,0,35,4,23,10,0,1,93,116,16,0,0,21,4,23,10,0,1,18,25,1,0,0,14,0,0,0,0,0
|
||||||
|
22,Västernorrland,102,127,14,1,0,34,5,29,12,0,1,89,109,14,1,0,25,5,29,12,0,1,13,18,0,0,0,9,0,0,0,0,0
|
||||||
|
23,Jämtland,75,86,9,0,0,24,2,14,8,0,1,69,80,9,0,0,21,2,14,8,0,1,6,6,0,0,0,3,0,0,0,0,0
|
||||||
|
24,Västerbotten,129,162,22,1,0,39,7,31,15,2,0,115,146,21,1,0,32,7,31,15,2,0,14,16,1,0,0,7,0,0,0,0,0
|
||||||
|
25,Norrbotten,120,159,23,0,4,41,11,32,16,0,0,102,138,23,0,4,37,11,32,16,0,0,18,21,0,0,0,4,0,0,0,0,0
|
||||||
|
BIN
data/processed/edu_offer_county.rds
Normal file
BIN
data/processed/edu_offer_county.rds
Normal file
Binary file not shown.
1201
ppt/Presentation June 10.qmd
Normal file
1201
ppt/Presentation June 10.qmd
Normal file
File diff suppressed because it is too large
Load diff
242
src/scraping/skolverket_by_county.R
Normal file
242
src/scraping/skolverket_by_county.R
Normal file
|
|
@ -0,0 +1,242 @@
|
||||||
|
# =============================================================================
|
||||||
|
# skolverket_by_county.R · Educational offer dataset by COUNTY
|
||||||
|
# =============================================================================
|
||||||
|
#
|
||||||
|
# skolverket.R produces ONE ROW PER MUNICIPALITY. This script produces
|
||||||
|
# ONE ROW PER COUNTY by aggregating the same school-unit and higher-education
|
||||||
|
# data up one administrative level.
|
||||||
|
#
|
||||||
|
# KEY IDEA -----------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Swedish municipality codes are 4 digits, structured like this:
|
||||||
|
#
|
||||||
|
# 1 4 8 0 (= Göteborg)
|
||||||
|
# ^ ^ ^ ^
|
||||||
|
# └─┴───── county (län): 14 → Västra Götaland
|
||||||
|
# └─┴─ position inside the county
|
||||||
|
#
|
||||||
|
# So aggregating municipalities into counties is just a regrouping: replace
|
||||||
|
# `muni_code` with `str_sub(muni_code, 1, 2)` in the grouping step. Reading
|
||||||
|
# the xlsx, classifying ownership, counting, pivoting wide, they all stay the
|
||||||
|
# same.
|
||||||
|
#
|
||||||
|
# Sources:
|
||||||
|
# 1. data/skolenhetsadresser.xlsx Skolverket school-unit registry
|
||||||
|
# Sheets: Förskoleklass, Grundskola, Anpassad grundskola, Specialskola,
|
||||||
|
# Sameskola, Gymnasieskola, Anpassad gymnasieskola, Komvux
|
||||||
|
# 2. Hardcoded list UKÄ register of higher-education
|
||||||
|
# institutions (~40 institution-sites)
|
||||||
|
#
|
||||||
|
# Output:
|
||||||
|
# data/processed/edu_offer_county.rds : county × indicator/count (wide)
|
||||||
|
# data/processed/edu_offer_county.csv : same, plain text
|
||||||
|
|
||||||
|
library(tidyverse)
|
||||||
|
library(readxl)
|
||||||
|
|
||||||
|
XLSX_PATH <- "data/skolenhetsadresser.xlsx"
|
||||||
|
|
||||||
|
# 21 Swedish counties. Some codes are skipped (e.g. 02, 11, 15) for historical
|
||||||
|
# reasons; that's fine, we just lookup by code.
|
||||||
|
county_names <- c(
|
||||||
|
"01" = "Stockholm", "03" = "Uppsala", "04" = "Södermanland",
|
||||||
|
"05" = "Östergötland", "06" = "Jönköping", "07" = "Kronoberg",
|
||||||
|
"08" = "Kalmar", "09" = "Gotland", "10" = "Blekinge",
|
||||||
|
"12" = "Skåne", "13" = "Halland", "14" = "Västra Götaland",
|
||||||
|
"17" = "Värmland", "18" = "Örebro", "19" = "Västmanland",
|
||||||
|
"20" = "Dalarna", "21" = "Gävleborg", "22" = "Västernorrland",
|
||||||
|
"23" = "Jämtland", "24" = "Västerbotten", "25" = "Norrbotten"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# 00 – Helpers ----------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Each helper does ONE small step. They get chained with the pipe in 02 and 04.
|
||||||
|
|
||||||
|
# (a) Derive county_code (first two digits) and county_name from muni_code.
|
||||||
|
# Works on any df that already has a `muni_code` column.
|
||||||
|
with_county <- function(df) {
|
||||||
|
df |>
|
||||||
|
mutate(
|
||||||
|
muni_code = str_pad(as.character(muni_code), 4, "left", "0"),
|
||||||
|
county_code = str_sub(muni_code, 1, 2),
|
||||||
|
county_name = county_names[county_code]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# (b) Public = Kommunal / Region / Statlig; everything else is private.
|
||||||
|
classify_ownership <- function(df) {
|
||||||
|
df |>
|
||||||
|
mutate(ownership = if_else(
|
||||||
|
HUVUDMANNATYP %in% c("Kommunal", "Region", "Statlig"),
|
||||||
|
"public", "private"
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# (c) One school-unit sheet → clean, county-keyed, ownership-classified.
|
||||||
|
prepare_sheet <- function(df) {
|
||||||
|
df |>
|
||||||
|
rename(muni_code = `BELÄGEN I KOMMUN (KOD)`,
|
||||||
|
muni_name = `BELÄGEN I KOMMUN (NAMN)`) |>
|
||||||
|
with_county() |>
|
||||||
|
classify_ownership()
|
||||||
|
}
|
||||||
|
|
||||||
|
# (d) Aggregate a prepared df to county × ownership counts, then to a single
|
||||||
|
# row per county with n_total / n_public / n_private columns. Uses
|
||||||
|
# `complete()` to guarantee both ownership categories exist even when
|
||||||
|
# a type has no private units anywhere (e.g. specialskola).
|
||||||
|
count_by_county <- function(df, type_label) {
|
||||||
|
df |>
|
||||||
|
count(county_code, county_name, ownership) |>
|
||||||
|
complete(
|
||||||
|
nesting(county_code, county_name),
|
||||||
|
ownership = c("public", "private"),
|
||||||
|
fill = list(n = 0L)
|
||||||
|
) |>
|
||||||
|
pivot_wider(names_from = ownership, values_from = n) |>
|
||||||
|
transmute(
|
||||||
|
county_code, county_name,
|
||||||
|
type = type_label,
|
||||||
|
n_total = public + private,
|
||||||
|
n_public = public,
|
||||||
|
n_private = private
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 01 – Read all xlsx sheets ---------------------------------------------------
|
||||||
|
|
||||||
|
sheet_map <- c(
|
||||||
|
"forskoleklass" = "Förskoleklass",
|
||||||
|
"grundskola" = "Grundskola",
|
||||||
|
"anpassad_grundskola" = "Anpassad grundskola",
|
||||||
|
"specialskola" = "Specialskola",
|
||||||
|
"sameskola" = "Sameskola",
|
||||||
|
"gymnasieskola" = "Gymnasieskola",
|
||||||
|
"anpassad_gymnasieskola" = "Anpassad gymnasieskola",
|
||||||
|
"komvux" = "Komvux"
|
||||||
|
)
|
||||||
|
|
||||||
|
raw <- imap(sheet_map, \(sheet_name, type_label) {
|
||||||
|
cat("Reading sheet:", sheet_name, "\n")
|
||||||
|
read_excel(XLSX_PATH, sheet = sheet_name) |> prepare_sheet()
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# 02 – Aggregate to county level ----------------------------------------------
|
||||||
|
|
||||||
|
unit_counts <- imap_dfr(raw, count_by_county)
|
||||||
|
|
||||||
|
# Komvux units that also offer SFI (column SVENSKA FÖR INVANDRARE == "J"):
|
||||||
|
# filter then reuse the same county aggregator with a new type label.
|
||||||
|
sfi_counts <- raw[["komvux"]] |>
|
||||||
|
filter(`SVENSKA FÖR INVANDRARE` == "J") |>
|
||||||
|
count_by_county("sfi")
|
||||||
|
|
||||||
|
unit_counts <- bind_rows(unit_counts, sfi_counts)
|
||||||
|
|
||||||
|
|
||||||
|
# 03 – Higher-education institutions (UKÄ list, hardcoded) --------------------
|
||||||
|
# Each row is one institution sited in one municipality (multi-campus
|
||||||
|
# institutions repeat). We derive the county and aggregate the same way.
|
||||||
|
|
||||||
|
he_institutions <- tribble(
|
||||||
|
~institution , ~muni_code , ~type_he ,
|
||||||
|
# ---- State universities ----
|
||||||
|
"Uppsala University" , "0380" , "university" ,
|
||||||
|
"Stockholm University" , "0180" , "university" ,
|
||||||
|
"Lund University" , "1281" , "university" ,
|
||||||
|
"University of Gothenburg" , "1480" , "university" ,
|
||||||
|
"Umeå University" , "2480" , "university" ,
|
||||||
|
"Linköping University" , "0580" , "university" ,
|
||||||
|
"Örebro University" , "1880" , "university" ,
|
||||||
|
"Karlstad University" , "1780" , "university" ,
|
||||||
|
# ---- State specialised universities ----
|
||||||
|
"KTH Royal Institute of Technology" , "0180" , "university" ,
|
||||||
|
"Karolinska Institutet" , "0184" , "university" ,
|
||||||
|
"Chalmers University of Technology" , "1480" , "university" ,
|
||||||
|
"SLU – Uppsala" , "0380" , "university" ,
|
||||||
|
"SLU – Umeå" , "2480" , "university" ,
|
||||||
|
"SLU – Alnarp (Lomma)" , "1262" , "university" ,
|
||||||
|
"SLU – Skara" , "1495" , "university" ,
|
||||||
|
# ---- State university colleges ----
|
||||||
|
"Blekinge Institute of Technology" , "1080" , "university_college" ,
|
||||||
|
"Dalarna University – Falun" , "2080" , "university_college" ,
|
||||||
|
"Dalarna University – Borlänge" , "2081" , "university_college" ,
|
||||||
|
"University of Gävle" , "2180" , "university_college" ,
|
||||||
|
"Halmstad University" , "1380" , "university_college" ,
|
||||||
|
"Kristianstad University" , "1290" , "university_college" ,
|
||||||
|
"Linnaeus University – Växjö" , "0780" , "university_college" ,
|
||||||
|
"Linnaeus University – Kalmar" , "0880" , "university_college" ,
|
||||||
|
"Malmö University" , "1280" , "university_college" ,
|
||||||
|
"Mälardalen University – Västerås" , "1980" , "university_college" ,
|
||||||
|
"Mälardalen University – Eskilstuna" , "0484" , "university_college" ,
|
||||||
|
"Mid Sweden University – Sundsvall" , "2281" , "university_college" ,
|
||||||
|
"Mid Sweden University – Östersund" , "2380" , "university_college" ,
|
||||||
|
"Södertörn University" , "0126" , "university_college" ,
|
||||||
|
"University of Borås" , "1490" , "university_college" ,
|
||||||
|
"University of Skövde" , "1496" , "university_college" ,
|
||||||
|
"University West" , "1488" , "university_college" ,
|
||||||
|
# ---- Private accredited institutions ----
|
||||||
|
"Stockholm School of Economics" , "0180" , "university_college" ,
|
||||||
|
"Jönköping University" , "0680" , "university_college" ,
|
||||||
|
# ---- Art, music, design, sport ----
|
||||||
|
"Konstfack" , "0180" , "university_college" ,
|
||||||
|
"Royal University College of Music (KMH)" , "0180" , "university_college" ,
|
||||||
|
"Stockholm University of the Arts" , "0180" , "university_college" ,
|
||||||
|
"Royal Institute of Art" , "0180" , "university_college" ,
|
||||||
|
"Beckmans College of Design" , "0180" , "university_college" ,
|
||||||
|
"Swedish School of Sport and Health Sciences" , "0180" , "university_college" ,
|
||||||
|
# ---- Defence / health ----
|
||||||
|
"Swedish Defence University" , "0180" , "university_college" ,
|
||||||
|
"Sophiahemmet University" , "0180" , "university_college" ,
|
||||||
|
"Ersta Sköndal Bräcke University College" , "0180" , "university_college" ,
|
||||||
|
"Röda Korsets Högskola" , "0180" , "university_college" ,
|
||||||
|
"Newmaninstitutet" , "0380" , "university_college"
|
||||||
|
)
|
||||||
|
|
||||||
|
# All Swedish HE institutions are state-funded; public = total, private = 0.
|
||||||
|
# Schema-compatible with `unit_counts` so we can bind_rows them later.
|
||||||
|
he_counts <- he_institutions |>
|
||||||
|
with_county() |>
|
||||||
|
count(county_code, county_name, type_he, name = "n_total") |>
|
||||||
|
transmute(
|
||||||
|
county_code, county_name,
|
||||||
|
type = type_he,
|
||||||
|
n_total,
|
||||||
|
n_public = n_total,
|
||||||
|
n_private = 0L
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# 04 – Combine and reshape wide -----------------------------------------------
|
||||||
|
# Build a (county × type) skeleton so every one of Sweden's 21 counties
|
||||||
|
# appears, with zeros for institution types absent from that county.
|
||||||
|
|
||||||
|
long <- bind_rows(unit_counts, he_counts)
|
||||||
|
counties <- enframe(county_names, name = "county_code", value = "county_name")
|
||||||
|
all_types <- unique(long$type)
|
||||||
|
|
||||||
|
wide <- counties |>
|
||||||
|
expand_grid(type = all_types) |>
|
||||||
|
left_join(long, by = c("county_code", "county_name", "type")) |>
|
||||||
|
mutate(across(starts_with("n_"), \(x) replace_na(x, 0L))) |>
|
||||||
|
pivot_wider(
|
||||||
|
names_from = type,
|
||||||
|
values_from = c(n_total, n_public, n_private),
|
||||||
|
names_glue = "{type}_{.value}"
|
||||||
|
) |>
|
||||||
|
rename(county = county_name)
|
||||||
|
|
||||||
|
|
||||||
|
# 05 – Save -------------------------------------------------------------------
|
||||||
|
|
||||||
|
write_rds(wide, "data/processed/edu_offer_county.rds")
|
||||||
|
write_csv(wide, "data/processed/edu_offer_county.csv")
|
||||||
|
|
||||||
|
cat("\nSaved:\n",
|
||||||
|
" data/processed/edu_offer_county.rds\n",
|
||||||
|
" data/processed/edu_offer_county.csv\n",
|
||||||
|
"Rows: ", nrow(wide), " (counties)\n",
|
||||||
|
"Cols: ", ncol(wide), "\n", sep = "")
|
||||||
Loading…
Add table
Reference in a new issue