diff --git a/ppt/.~lock.Ruralitic-Hig-20260526-eng.pptx# b/ppt/.~lock.Ruralitic-Hig-20260526-eng.pptx# deleted file mode 100644 index 519aa28..0000000 --- a/ppt/.~lock.Ruralitic-Hig-20260526-eng.pptx# +++ /dev/null @@ -1 +0,0 @@ -,pab,sunny,26.05.2026 08:21,file:///home/pab/.config/libreoffice/4; \ No newline at end of file diff --git a/ppt/Ruralitic-Hig-20260526-eng.pptx b/ppt/Ruralitic-Hig-20260526-eng.pptx index 684fae7..95ba00c 100644 Binary files a/ppt/Ruralitic-Hig-20260526-eng.pptx and b/ppt/Ruralitic-Hig-20260526-eng.pptx differ diff --git a/ppt/content.R b/ppt/content.R new file mode 100644 index 0000000..e95116a --- /dev/null +++ b/ppt/content.R @@ -0,0 +1,325 @@ +# ============================================================ +# PPT CONTENT — Empirical urban-rural typology of Swedish municipalities +# Correspondence Analysis + hierarchical clustering (2022 sampling) +# Six-cluster cut +# +# Run from the project root (ruralitic-qrm/). +# Figures are saved to ppt/figures/. Slide text is in the +# comments below each section header. +# ============================================================ + +library(tidyverse) +library(FactoMineR) +library(factoextra) +library(ggrepel) +library(showtext) + +font_add_google("Source Sans 3", "source_sans_3") +showtext_auto() + +theme_ppt <- theme_minimal(base_size = 14, base_family = "source_sans_3") + + theme( + panel.grid.minor = element_blank(), + panel.grid.major = element_line(colour = "grey92"), + legend.position = "bottom", + legend.title = element_text(face = "bold"), + plot.margin = margin(8, 12, 8, 8) + ) +theme_set(theme_ppt) + +cluster_labels <- c( + "1" = "Remote & peripheral", + "2" = "Central industrial towns", + "3" = "Peri-rural commuter belt", + "4" = "Regional service centres", + "5" = "Affluent suburbs & university satellites", + "6" = "Inner Stockholm core" +) +cluster_palette <- c( + "1" = "#B07F4F", + "2" = "#D7A86E", + "3" = "#7CB07C", + "4" = "#6FA8DC", + "5" = "#C2738B", + "6" = "#7A3A4F" +) + +county_names <- c( + "01" = "Stockholm", "03" = "Uppsala", "04" = "Södermanland", + "05" = "Östergötland", "06" = "Jönköping", "07" = "Kronoberg", + "08" = "Kalmar", "09" = "Gotland", "10" = "Blekinge", + "12" = "Skåne", "13" = "Halland", "14" = "Västra Götaland", + "17" = "Värmland", "18" = "Örebro", "19" = "Västmanland", + "20" = "Dalarna", "21" = "Gävleborg", "22" = "Västernorrland", + "23" = "Jämtland", "24" = "Västerbotten", "25" = "Norrbotten" +) + +# Load data +afc <- read_rds("data/processed/proportions_CA.rds") +hcpc <- read_rds("data/processed/proportions_HCPC_6.rds") +clusters <- read_csv("data/processed/cluster_assignment_6.csv", + show_col_types = FALSE) |> + rename(cluster = c6) |> + mutate(cluster_label = cluster_labels[as.character(cluster)]) + +panel_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx", + col_types = "text", n_max = 290) |> + transmute(municipality, code = str_pad(code, 4, "left", "0")) + +dim1_pct <- round(afc$eig[1, 2], 1) +dim2_pct <- round(afc$eig[2, 2], 1) + +row_df <- as.data.frame(afc$row$coord[, 1:2]) |> + rownames_to_column("municipality") |> + left_join(clusters, by = "municipality") + + +# ============================================================ +# SLIDE 1 — Data & motivation +# Title: "Building an urban–rural typology from the data" +# +# Bullet points: +# • Sweden's 290 municipalities are routinely classified by +# administrative or population-size rules (SCB). These categories +# are imposed from outside the data. +# • This analysis asks instead: how do municipalities actually differ +# across structural dimensions? +# • Data source: Statistics Sweden, 2022 project sampling. +# • Six ACTIVE variable blocks: +# Education (4 attainment levels) +# Employment (16 activity sectors) +# Housing (rented / tenant-owned / owner-occupied) +# Workplace mobility (commuters in, commuters out, working locally) +# Migration (in- and outmigration) +# Demography (retirees, number of localities) +# • Two SUPPLEMENTARY blocks projected post-hoc: +# Educational provision (pre-school through HE, by ownership) +# Opinion (survey satisfaction with local schools) +# • Key pre-processing: block normalisation. Within each block, +# every municipality is rescaled to the same total, preventing +# Stockholm from dominating the analysis due to sheer size. +# +# No figure needed — use the variable block list as a visual +# schematic or table in the slide. +# ============================================================ + + +# ============================================================ +# SLIDE 2 — Correspondence analysis: the space of municipalities +# Title: "Two dimensions capture two-thirds of the variation" +# +# Bullet points: +# • CA places all 290 municipalities in a low-dimensional space +# where proximity = similarity on the active variables. +# • Dim 1 (rural–urban): Left pole → agriculture, mining & +# manufacturing, owner-occupied housing, upper-secondary education. +# Right pole → IT, finance, professional services, post-graduate +# attainment, apartment housing. +# • Dim 2 (labour-market self-containment): Top → residents work +# where they live (Göteborg, Malmö, Umeå). Bottom → outbound +# commuters, residential satellites (Knivsta, Salem, Staffanstorp). +# • Together Dim 1 + Dim 2 account for ~65% of total variability. +# +# Figure: slide2_biplot.png +# ============================================================ + +label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå", + "Linköping", "Solna", "Danderyd", "Kiruna", "Gotland", + "Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå", + "Partille", "Sundbyberg", "Lindesberg") + +contribs <- as.data.frame(afc$col$contrib) |> + rownames_to_column("variable") |> + mutate(total = `Dim 1` + `Dim 2`) |> + arrange(desc(total)) |> + head(15) + +col_df <- as.data.frame(afc$col$coord[, 1:2]) |> + rownames_to_column("variable") |> + filter(variable %in% contribs$variable) + +fig_biplot <- ggplot() + + geom_hline(yintercept = 0, linetype = "dashed", colour = "grey60") + + geom_vline(xintercept = 0, linetype = "dashed", colour = "grey60") + + geom_point(data = row_df, + aes(`Dim 1`, `Dim 2`, colour = cluster_label), + alpha = 0.75, size = 2.2) + + geom_text_repel( + data = row_df |> filter(municipality %in% label_munis), + aes(`Dim 1`, `Dim 2`, label = municipality), + size = 3.5, colour = "grey20", family = "source_sans_3", + max.overlaps = 30, segment.size = 0.25 + ) + + geom_point(data = col_df, aes(`Dim 1`, `Dim 2`), + shape = 17, colour = "firebrick", size = 3) + + geom_text_repel( + data = col_df, aes(`Dim 1`, `Dim 2`, label = variable), + colour = "firebrick", size = 3, family = "source_sans_3", + max.overlaps = 30, segment.size = 0.25 + ) + + scale_colour_manual(values = cluster_palette |> set_names(cluster_labels), + name = NULL) + + labs( + x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"), + y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)") + ) + + guides(colour = guide_legend(nrow = 2)) + +ggsave("ppt/figures/slide2_biplot.png", fig_biplot, + width = 11, height = 7, dpi = 150) +message("Saved: ppt/figures/slide2_biplot.png") + + +# ============================================================ +# SLIDE 3 — Six empirical types of municipality +# Title: "Six coherent types emerge from Ward clustering" +# +# Cluster descriptions: +# +# Cl 1 — Remote & peripheral +# Most rural extreme. Agriculture/forestry/fishing, sparsely +# populated, own full educational infrastructure at every age +# (komvux, preschool). Examples: Kiruna, Piteå, Skellefteå, Gotland. +# +# Cl 2 — Central industrial towns +# Rural-industrial, but not as remote. Mining & manufacturing +# dominant. Owner-occupied housing, upper-secondary ceiling. +# Examples: Falköping, Lindesberg, Hedemora. +# +# Cl 3 — Peri-rural commuter belt +# Small southern and central rural municipalities. Many residents +# commute out. Owner-occupied, construction and agriculture visible. +# Below-average satisfaction with local high schools. +# Examples: Tomelilla, Osby, Klippan, Sölvesborg. +# +# Cl 4 — Regional service centres +# Mid-sized cities with self-contained labour markets. Rented and +# tenant-owned housing, public administration, post-secondary +# attainment. Examples: Göteborg, Malmö, Umeå, Linköping, Kalmar. +# +# Cl 5 — Affluent suburbs & university satellites +# Outbound commuters, post-secondary attainment, tenant-owned +# housing. Residential satellites whose labour markets sit elsewhere. +# Examples: Lund, Mölndal, Partille, Huddinge, Knivsta, Kungsbacka. +# +# Cl 6 — Inner Stockholm core +# Inbound commuting, IT and finance employment, apartment housing, +# post-graduate attainment at extreme levels. These are destinations +# in the commuting network, not origins. +# Examples: Stockholm, Solna, Sundbyberg, Danderyd, Lidingö, Täby. +# +# Figures: slide3_clusters.png (main) · slide3_dendrogram.png (inset) +# ============================================================ + +centroids <- row_df |> + group_by(cluster_label) |> + summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop") + +fig_clusters <- ggplot(row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label)) + + geom_hline(yintercept = 0, linetype = "dashed", colour = "grey70") + + geom_vline(xintercept = 0, linetype = "dashed", colour = "grey70") + + geom_point(alpha = 0.5, size = 2) + + stat_ellipse(level = 0.68, linewidth = 0.7) + + geom_point(data = centroids, size = 5, shape = 18, colour = "grey15") + + geom_label_repel( + data = centroids, aes(label = cluster_label), + fill = "white", colour = "black", + family = "source_sans_3", size = 3.5, + label.size = 0.25, label.padding = unit(0.3, "lines"), + min.segment.length = 0, max.overlaps = 20 + ) + + scale_colour_manual(values = cluster_palette |> set_names(cluster_labels), + guide = "none") + + labs( + x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"), + y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)") + ) + +ggsave("ppt/figures/slide3_clusters.png", fig_clusters, + width = 11, height = 7, dpi = 150) +message("Saved: ppt/figures/slide3_clusters.png") + +# Dendrogram with 6-cluster cut +tree <- hcpc$call$t$tree +h_max <- max(tree$height) + +fig_dendro <- fviz_dend( + tree, k = 6, show_labels = FALSE, + rect = TRUE, + rect_border = unname(cluster_palette), + rect_fill = TRUE, + k_colors = unname(cluster_palette), + main = "", ylab = "Merge distance (Ward)" +) + + coord_cartesian(ylim = c(0, h_max * 1.05)) + + guides(linewidth = "none") + + theme(plot.title = element_blank(), + text = element_text(family = "source_sans_3", size = 13)) + +ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro, + width = 11, height = 4.5, dpi = 150) +message("Saved: ppt/figures/slide3_dendrogram.png") + + +# ============================================================ +# SLIDE 4 — Geography & key takeaways +# Title: "The typology maps coherently onto Swedish geography" +# +# Bullet points: +# • Clusters 1 & 2 (both rural types) dominate almost everywhere +# outside the metropolitan areas — the rural majority. +# • Cluster 6 (inner Stockholm core) is confined to Stockholm and +# Uppsala counties; cluster 5 (affluent suburbs) spreads into +# Skåne (Lund) and Västra Götaland (Mölndal, Partille). +# • Cluster 4 (regional centres) appears thinly but consistently +# across most counties — one or two per county. +# +# Key takeaways: +# • Biggest empirical break: NOT metro vs. non-metro, but between +# two kinds of rural — remote & peripheral (Cl. 1), central +# industrial towns (Cl. 2), and peri-rural commuters (Cl. 3). +# • At the urban end, two kinds of city: self-contained regional +# centres (Cl. 4) vs. the metropolitan region (Cl. 5 & 6). +# • Within the metropolitan region, the data cleanly separate +# residential suburbs that commute IN (Cl. 5) from the inner core +# that receives commuters (Cl. 6). +# +# Figure: slide4_county.png +# ============================================================ + +county_order <- c( + "Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland", + "Jönköping", "Östergötland", "Södermanland", "Västra Götaland", + "Örebro", "Västmanland", "Stockholm", "Uppsala", "Dalarna", "Värmland", + "Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten" +) + +clusters_geo <- clusters |> + left_join(panel_raw, by = "municipality") |> + mutate(county = county_names[str_sub(code, 1, 2)]) |> + filter(!is.na(county)) + +fig_county <- clusters_geo |> + count(county, cluster_label) |> + mutate( + county = factor(county, levels = county_order), + cluster_label = factor(cluster_label, levels = cluster_labels) + ) |> + ggplot(aes(county, n, fill = cluster_label)) + + geom_col(position = "fill") + + scale_fill_manual(values = cluster_palette |> set_names(cluster_labels), + name = NULL) + + scale_y_continuous(labels = scales::percent_format(), expand = c(0, 0)) + + labs(x = NULL, y = "Share of municipalities") + + theme( + axis.text.x = element_text(angle = 45, hjust = 1, size = 11), + legend.position = "bottom", + legend.text = element_text(size = 10) + ) + + guides(fill = guide_legend(nrow = 2)) + +ggsave("ppt/figures/slide4_county.png", fig_county, + width = 12, height = 6.5, dpi = 150) +message("Saved: ppt/figures/slide4_county.png") + +message("\nAll figures written to ppt/figures/. Ready to paste into the slide deck.") diff --git a/ppt/figures/slide2_biplot.png b/ppt/figures/slide2_biplot.png new file mode 100644 index 0000000..ab5938d Binary files /dev/null and b/ppt/figures/slide2_biplot.png differ diff --git a/ppt/figures/slide3_clusters.png b/ppt/figures/slide3_clusters.png new file mode 100644 index 0000000..cb06437 Binary files /dev/null and b/ppt/figures/slide3_clusters.png differ diff --git a/ppt/figures/slide3_dendrogram.png b/ppt/figures/slide3_dendrogram.png new file mode 100644 index 0000000..60d14f6 Binary files /dev/null and b/ppt/figures/slide3_dendrogram.png differ diff --git a/ppt/figures/slide4_county.png b/ppt/figures/slide4_county.png new file mode 100644 index 0000000..09b8759 Binary files /dev/null and b/ppt/figures/slide4_county.png differ