aggregate counties

This commit is contained in:
pab 2026-06-04 12:47:08 +02:00
parent 5a991f1e0e
commit c724bba819
4 changed files with 1465 additions and 0 deletions

View file

@ -0,0 +1,22 @@
county_code,county,forskoleklass_n_total,grundskola_n_total,anpassad_grundskola_n_total,specialskola_n_total,sameskola_n_total,gymnasieskola_n_total,anpassad_gymnasieskola_n_total,komvux_n_total,sfi_n_total,university_n_total,university_college_n_total,forskoleklass_n_public,grundskola_n_public,anpassad_grundskola_n_public,specialskola_n_public,sameskola_n_public,gymnasieskola_n_public,anpassad_gymnasieskola_n_public,komvux_n_public,sfi_n_public,university_n_public,university_college_n_public,forskoleklass_n_private,grundskola_n_private,anpassad_grundskola_n_private,specialskola_n_private,sameskola_n_private,gymnasieskola_n_private,anpassad_gymnasieskola_n_private,komvux_n_private,sfi_n_private,university_n_private,university_college_n_private
01,Stockholm,632,755,139,3,0,209,48,66,43,3,12,429,481,109,3,0,78,28,65,43,3,12,203,274,30,0,0,131,20,1,0,0,0
03,Uppsala,119,148,16,0,0,34,8,23,12,2,1,101,117,13,0,0,15,5,23,12,2,1,18,31,3,0,0,19,3,0,0,0,0
04,Södermanland,103,123,24,0,0,32,10,19,9,0,1,84,95,24,0,0,20,8,19,9,0,1,19,28,0,0,0,12,2,0,0,0,0
05,Östergötland,155,237,32,0,0,73,15,31,14,1,0,138,206,30,0,0,56,14,31,14,1,0,17,31,2,0,0,17,1,0,0,0,0
06,Jönköping,142,196,35,0,0,56,13,29,13,0,1,133,182,31,0,0,46,11,29,13,0,1,9,14,4,0,0,10,2,0,0,0,0
07,Kronoberg,91,122,19,0,0,34,6,19,8,0,1,80,107,19,0,0,25,6,19,8,0,1,11,15,0,0,0,9,0,0,0,0,0
08,Kalmar,103,132,18,0,0,29,4,19,10,0,1,95,119,17,0,0,22,4,19,10,0,1,8,13,1,0,0,7,0,0,0,0,0
09,Gotland,33,37,3,0,0,6,1,2,2,0,0,29,33,3,0,0,5,1,2,2,0,0,4,4,0,0,0,1,0,0,0,0,0
10,Blekinge,64,81,7,0,0,21,4,12,5,0,1,52,68,7,0,0,19,4,12,5,0,1,12,13,0,0,0,2,0,0,0,0,0
12,Skåne,453,588,73,1,0,151,26,102,46,2,2,359,471,68,1,0,78,23,102,46,2,2,94,117,5,0,0,73,3,0,0,0,0
13,Halland,122,199,14,0,0,53,8,13,7,0,1,104,177,13,0,0,38,8,13,7,0,1,18,22,1,0,0,15,0,0,0,0,0
14,Västra Götaland,565,811,145,1,0,255,41,142,68,3,3,482,707,138,1,0,186,37,142,68,3,3,83,104,7,0,0,69,4,0,0,0,0
17,Värmland,103,154,37,0,0,40,11,29,14,1,0,93,140,36,0,0,28,11,29,14,1,0,10,14,1,0,0,12,0,0,0,0,0
18,Örebro,105,140,19,3,0,52,7,38,12,1,0,93,121,19,3,0,40,6,38,12,1,0,12,19,0,0,0,12,1,0,0,0,0
19,Västmanland,91,119,25,0,0,34,10,22,10,0,1,78,99,24,0,0,18,9,22,10,0,1,13,20,1,0,0,16,1,0,0,0,0
20,Dalarna,125,152,28,0,0,30,10,22,11,0,2,113,136,27,0,0,21,9,22,11,0,2,12,16,1,0,0,9,1,0,0,0,0
21,Gävleborg,111,141,17,0,0,35,4,23,10,0,1,93,116,16,0,0,21,4,23,10,0,1,18,25,1,0,0,14,0,0,0,0,0
22,Västernorrland,102,127,14,1,0,34,5,29,12,0,1,89,109,14,1,0,25,5,29,12,0,1,13,18,0,0,0,9,0,0,0,0,0
23,Jämtland,75,86,9,0,0,24,2,14,8,0,1,69,80,9,0,0,21,2,14,8,0,1,6,6,0,0,0,3,0,0,0,0,0
24,Västerbotten,129,162,22,1,0,39,7,31,15,2,0,115,146,21,1,0,32,7,31,15,2,0,14,16,1,0,0,7,0,0,0,0,0
25,Norrbotten,120,159,23,0,4,41,11,32,16,0,0,102,138,23,0,4,37,11,32,16,0,0,18,21,0,0,0,4,0,0,0,0,0
1 county_code county forskoleklass_n_total grundskola_n_total anpassad_grundskola_n_total specialskola_n_total sameskola_n_total gymnasieskola_n_total anpassad_gymnasieskola_n_total komvux_n_total sfi_n_total university_n_total university_college_n_total forskoleklass_n_public grundskola_n_public anpassad_grundskola_n_public specialskola_n_public sameskola_n_public gymnasieskola_n_public anpassad_gymnasieskola_n_public komvux_n_public sfi_n_public university_n_public university_college_n_public forskoleklass_n_private grundskola_n_private anpassad_grundskola_n_private specialskola_n_private sameskola_n_private gymnasieskola_n_private anpassad_gymnasieskola_n_private komvux_n_private sfi_n_private university_n_private university_college_n_private
2 01 Stockholm 632 755 139 3 0 209 48 66 43 3 12 429 481 109 3 0 78 28 65 43 3 12 203 274 30 0 0 131 20 1 0 0 0
3 03 Uppsala 119 148 16 0 0 34 8 23 12 2 1 101 117 13 0 0 15 5 23 12 2 1 18 31 3 0 0 19 3 0 0 0 0
4 04 Södermanland 103 123 24 0 0 32 10 19 9 0 1 84 95 24 0 0 20 8 19 9 0 1 19 28 0 0 0 12 2 0 0 0 0
5 05 Östergötland 155 237 32 0 0 73 15 31 14 1 0 138 206 30 0 0 56 14 31 14 1 0 17 31 2 0 0 17 1 0 0 0 0
6 06 Jönköping 142 196 35 0 0 56 13 29 13 0 1 133 182 31 0 0 46 11 29 13 0 1 9 14 4 0 0 10 2 0 0 0 0
7 07 Kronoberg 91 122 19 0 0 34 6 19 8 0 1 80 107 19 0 0 25 6 19 8 0 1 11 15 0 0 0 9 0 0 0 0 0
8 08 Kalmar 103 132 18 0 0 29 4 19 10 0 1 95 119 17 0 0 22 4 19 10 0 1 8 13 1 0 0 7 0 0 0 0 0
9 09 Gotland 33 37 3 0 0 6 1 2 2 0 0 29 33 3 0 0 5 1 2 2 0 0 4 4 0 0 0 1 0 0 0 0 0
10 10 Blekinge 64 81 7 0 0 21 4 12 5 0 1 52 68 7 0 0 19 4 12 5 0 1 12 13 0 0 0 2 0 0 0 0 0
11 12 Skåne 453 588 73 1 0 151 26 102 46 2 2 359 471 68 1 0 78 23 102 46 2 2 94 117 5 0 0 73 3 0 0 0 0
12 13 Halland 122 199 14 0 0 53 8 13 7 0 1 104 177 13 0 0 38 8 13 7 0 1 18 22 1 0 0 15 0 0 0 0 0
13 14 Västra Götaland 565 811 145 1 0 255 41 142 68 3 3 482 707 138 1 0 186 37 142 68 3 3 83 104 7 0 0 69 4 0 0 0 0
14 17 Värmland 103 154 37 0 0 40 11 29 14 1 0 93 140 36 0 0 28 11 29 14 1 0 10 14 1 0 0 12 0 0 0 0 0
15 18 Örebro 105 140 19 3 0 52 7 38 12 1 0 93 121 19 3 0 40 6 38 12 1 0 12 19 0 0 0 12 1 0 0 0 0
16 19 Västmanland 91 119 25 0 0 34 10 22 10 0 1 78 99 24 0 0 18 9 22 10 0 1 13 20 1 0 0 16 1 0 0 0 0
17 20 Dalarna 125 152 28 0 0 30 10 22 11 0 2 113 136 27 0 0 21 9 22 11 0 2 12 16 1 0 0 9 1 0 0 0 0
18 21 Gävleborg 111 141 17 0 0 35 4 23 10 0 1 93 116 16 0 0 21 4 23 10 0 1 18 25 1 0 0 14 0 0 0 0 0
19 22 Västernorrland 102 127 14 1 0 34 5 29 12 0 1 89 109 14 1 0 25 5 29 12 0 1 13 18 0 0 0 9 0 0 0 0 0
20 23 Jämtland 75 86 9 0 0 24 2 14 8 0 1 69 80 9 0 0 21 2 14 8 0 1 6 6 0 0 0 3 0 0 0 0 0
21 24 Västerbotten 129 162 22 1 0 39 7 31 15 2 0 115 146 21 1 0 32 7 31 15 2 0 14 16 1 0 0 7 0 0 0 0 0
22 25 Norrbotten 120 159 23 0 4 41 11 32 16 0 0 102 138 23 0 4 37 11 32 16 0 0 18 21 0 0 0 4 0 0 0 0 0

Binary file not shown.

1201
ppt/Presentation June 10.qmd Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,242 @@
# =============================================================================
# skolverket_by_county.R · Educational offer dataset by COUNTY
# =============================================================================
#
# skolverket.R produces ONE ROW PER MUNICIPALITY. This script produces
# ONE ROW PER COUNTY by aggregating the same school-unit and higher-education
# data up one administrative level.
#
# KEY IDEA -----------------------------------------------------------------
#
# Swedish municipality codes are 4 digits, structured like this:
#
# 1 4 8 0 (= Göteborg)
# ^ ^ ^ ^
# └─┴───── county (län): 14 → Västra Götaland
# └─┴─ position inside the county
#
# So aggregating municipalities into counties is just a regrouping: replace
# `muni_code` with `str_sub(muni_code, 1, 2)` in the grouping step. Reading
# the xlsx, classifying ownership, counting, pivoting wide, they all stay the
# same.
#
# Sources:
# 1. data/skolenhetsadresser.xlsx Skolverket school-unit registry
# Sheets: Förskoleklass, Grundskola, Anpassad grundskola, Specialskola,
# Sameskola, Gymnasieskola, Anpassad gymnasieskola, Komvux
# 2. Hardcoded list UKÄ register of higher-education
# institutions (~40 institution-sites)
#
# Output:
# data/processed/edu_offer_county.rds : county × indicator/count (wide)
# data/processed/edu_offer_county.csv : same, plain text
library(tidyverse)
library(readxl)
XLSX_PATH <- "data/skolenhetsadresser.xlsx"
# 21 Swedish counties. Some codes are skipped (e.g. 02, 11, 15) for historical
# reasons; that's fine, we just lookup by code.
county_names <- c(
"01" = "Stockholm", "03" = "Uppsala", "04" = "Södermanland",
"05" = "Östergötland", "06" = "Jönköping", "07" = "Kronoberg",
"08" = "Kalmar", "09" = "Gotland", "10" = "Blekinge",
"12" = "Skåne", "13" = "Halland", "14" = "Västra Götaland",
"17" = "Värmland", "18" = "Örebro", "19" = "Västmanland",
"20" = "Dalarna", "21" = "Gävleborg", "22" = "Västernorrland",
"23" = "Jämtland", "24" = "Västerbotten", "25" = "Norrbotten"
)
# 00 Helpers ----------------------------------------------------------------
#
# Each helper does ONE small step. They get chained with the pipe in 02 and 04.
# (a) Derive county_code (first two digits) and county_name from muni_code.
# Works on any df that already has a `muni_code` column.
with_county <- function(df) {
df |>
mutate(
muni_code = str_pad(as.character(muni_code), 4, "left", "0"),
county_code = str_sub(muni_code, 1, 2),
county_name = county_names[county_code]
)
}
# (b) Public = Kommunal / Region / Statlig; everything else is private.
classify_ownership <- function(df) {
df |>
mutate(ownership = if_else(
HUVUDMANNATYP %in% c("Kommunal", "Region", "Statlig"),
"public", "private"
))
}
# (c) One school-unit sheet → clean, county-keyed, ownership-classified.
prepare_sheet <- function(df) {
df |>
rename(muni_code = `BELÄGEN I KOMMUN (KOD)`,
muni_name = `BELÄGEN I KOMMUN (NAMN)`) |>
with_county() |>
classify_ownership()
}
# (d) Aggregate a prepared df to county × ownership counts, then to a single
# row per county with n_total / n_public / n_private columns. Uses
# `complete()` to guarantee both ownership categories exist even when
# a type has no private units anywhere (e.g. specialskola).
count_by_county <- function(df, type_label) {
df |>
count(county_code, county_name, ownership) |>
complete(
nesting(county_code, county_name),
ownership = c("public", "private"),
fill = list(n = 0L)
) |>
pivot_wider(names_from = ownership, values_from = n) |>
transmute(
county_code, county_name,
type = type_label,
n_total = public + private,
n_public = public,
n_private = private
)
}
# 01 Read all xlsx sheets ---------------------------------------------------
sheet_map <- c(
"forskoleklass" = "Förskoleklass",
"grundskola" = "Grundskola",
"anpassad_grundskola" = "Anpassad grundskola",
"specialskola" = "Specialskola",
"sameskola" = "Sameskola",
"gymnasieskola" = "Gymnasieskola",
"anpassad_gymnasieskola" = "Anpassad gymnasieskola",
"komvux" = "Komvux"
)
raw <- imap(sheet_map, \(sheet_name, type_label) {
cat("Reading sheet:", sheet_name, "\n")
read_excel(XLSX_PATH, sheet = sheet_name) |> prepare_sheet()
})
# 02 Aggregate to county level ----------------------------------------------
unit_counts <- imap_dfr(raw, count_by_county)
# Komvux units that also offer SFI (column SVENSKA FÖR INVANDRARE == "J"):
# filter then reuse the same county aggregator with a new type label.
sfi_counts <- raw[["komvux"]] |>
filter(`SVENSKA FÖR INVANDRARE` == "J") |>
count_by_county("sfi")
unit_counts <- bind_rows(unit_counts, sfi_counts)
# 03 Higher-education institutions (UKÄ list, hardcoded) --------------------
# Each row is one institution sited in one municipality (multi-campus
# institutions repeat). We derive the county and aggregate the same way.
he_institutions <- tribble(
~institution , ~muni_code , ~type_he ,
# ---- State universities ----
"Uppsala University" , "0380" , "university" ,
"Stockholm University" , "0180" , "university" ,
"Lund University" , "1281" , "university" ,
"University of Gothenburg" , "1480" , "university" ,
"Umeå University" , "2480" , "university" ,
"Linköping University" , "0580" , "university" ,
"Örebro University" , "1880" , "university" ,
"Karlstad University" , "1780" , "university" ,
# ---- State specialised universities ----
"KTH Royal Institute of Technology" , "0180" , "university" ,
"Karolinska Institutet" , "0184" , "university" ,
"Chalmers University of Technology" , "1480" , "university" ,
"SLU Uppsala" , "0380" , "university" ,
"SLU Umeå" , "2480" , "university" ,
"SLU Alnarp (Lomma)" , "1262" , "university" ,
"SLU Skara" , "1495" , "university" ,
# ---- State university colleges ----
"Blekinge Institute of Technology" , "1080" , "university_college" ,
"Dalarna University Falun" , "2080" , "university_college" ,
"Dalarna University Borlänge" , "2081" , "university_college" ,
"University of Gävle" , "2180" , "university_college" ,
"Halmstad University" , "1380" , "university_college" ,
"Kristianstad University" , "1290" , "university_college" ,
"Linnaeus University Växjö" , "0780" , "university_college" ,
"Linnaeus University Kalmar" , "0880" , "university_college" ,
"Malmö University" , "1280" , "university_college" ,
"Mälardalen University Västerås" , "1980" , "university_college" ,
"Mälardalen University Eskilstuna" , "0484" , "university_college" ,
"Mid Sweden University Sundsvall" , "2281" , "university_college" ,
"Mid Sweden University Östersund" , "2380" , "university_college" ,
"Södertörn University" , "0126" , "university_college" ,
"University of Borås" , "1490" , "university_college" ,
"University of Skövde" , "1496" , "university_college" ,
"University West" , "1488" , "university_college" ,
# ---- Private accredited institutions ----
"Stockholm School of Economics" , "0180" , "university_college" ,
"Jönköping University" , "0680" , "university_college" ,
# ---- Art, music, design, sport ----
"Konstfack" , "0180" , "university_college" ,
"Royal University College of Music (KMH)" , "0180" , "university_college" ,
"Stockholm University of the Arts" , "0180" , "university_college" ,
"Royal Institute of Art" , "0180" , "university_college" ,
"Beckmans College of Design" , "0180" , "university_college" ,
"Swedish School of Sport and Health Sciences" , "0180" , "university_college" ,
# ---- Defence / health ----
"Swedish Defence University" , "0180" , "university_college" ,
"Sophiahemmet University" , "0180" , "university_college" ,
"Ersta Sköndal Bräcke University College" , "0180" , "university_college" ,
"Röda Korsets Högskola" , "0180" , "university_college" ,
"Newmaninstitutet" , "0380" , "university_college"
)
# All Swedish HE institutions are state-funded; public = total, private = 0.
# Schema-compatible with `unit_counts` so we can bind_rows them later.
he_counts <- he_institutions |>
with_county() |>
count(county_code, county_name, type_he, name = "n_total") |>
transmute(
county_code, county_name,
type = type_he,
n_total,
n_public = n_total,
n_private = 0L
)
# 04 Combine and reshape wide -----------------------------------------------
# Build a (county × type) skeleton so every one of Sweden's 21 counties
# appears, with zeros for institution types absent from that county.
long <- bind_rows(unit_counts, he_counts)
counties <- enframe(county_names, name = "county_code", value = "county_name")
all_types <- unique(long$type)
wide <- counties |>
expand_grid(type = all_types) |>
left_join(long, by = c("county_code", "county_name", "type")) |>
mutate(across(starts_with("n_"), \(x) replace_na(x, 0L))) |>
pivot_wider(
names_from = type,
values_from = c(n_total, n_public, n_private),
names_glue = "{type}_{.value}"
) |>
rename(county = county_name)
# 05 Save -------------------------------------------------------------------
write_rds(wide, "data/processed/edu_offer_county.rds")
write_csv(wide, "data/processed/edu_offer_county.csv")
cat("\nSaved:\n",
" data/processed/edu_offer_county.rds\n",
" data/processed/edu_offer_county.csv\n",
"Rows: ", nrow(wide), " (counties)\n",
"Cols: ", ncol(wide), "\n", sep = "")