ruralitic-qrm/ppt/content.R

325 lines
13 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ============================================================
# PPT CONTENT — Empirical urban-rural typology of Swedish municipalities
# Correspondence Analysis + hierarchical clustering (2022 sampling)
# Six-cluster cut
#
# Run from the project root (ruralitic-qrm/).
# Figures are saved to ppt/figures/. Slide text is in the
# comments below each section header.
# ============================================================
library(tidyverse)
library(FactoMineR)
library(factoextra)
library(ggrepel)
library(showtext)
font_add_google("Source Sans 3", "source_sans_3")
showtext_auto()
theme_ppt <- theme_minimal(base_size = 14, base_family = "source_sans_3") +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_line(colour = "grey92"),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
plot.margin = margin(8, 12, 8, 8)
)
theme_set(theme_ppt)
cluster_labels <- c(
"1" = "Remote & peripheral",
"2" = "Central industrial towns",
"3" = "Peri-rural commuter belt",
"4" = "Regional service centres",
"5" = "Affluent suburbs & university satellites",
"6" = "Inner Stockholm core"
)
cluster_palette <- c(
"1" = "#B07F4F",
"2" = "#D7A86E",
"3" = "#7CB07C",
"4" = "#6FA8DC",
"5" = "#C2738B",
"6" = "#7A3A4F"
)
county_names <- c(
"01" = "Stockholm", "03" = "Uppsala", "04" = "Södermanland",
"05" = "Östergötland", "06" = "Jönköping", "07" = "Kronoberg",
"08" = "Kalmar", "09" = "Gotland", "10" = "Blekinge",
"12" = "Skåne", "13" = "Halland", "14" = "Västra Götaland",
"17" = "Värmland", "18" = "Örebro", "19" = "Västmanland",
"20" = "Dalarna", "21" = "Gävleborg", "22" = "Västernorrland",
"23" = "Jämtland", "24" = "Västerbotten", "25" = "Norrbotten"
)
# Load data
afc <- read_rds("data/processed/proportions_CA.rds")
hcpc <- read_rds("data/processed/proportions_HCPC_6.rds")
clusters <- read_csv("data/processed/cluster_assignment_6.csv",
show_col_types = FALSE) |>
rename(cluster = c6) |>
mutate(cluster_label = cluster_labels[as.character(cluster)])
panel_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx",
col_types = "text", n_max = 290) |>
transmute(municipality, code = str_pad(code, 4, "left", "0"))
dim1_pct <- round(afc$eig[1, 2], 1)
dim2_pct <- round(afc$eig[2, 2], 1)
row_df <- as.data.frame(afc$row$coord[, 1:2]) |>
rownames_to_column("municipality") |>
left_join(clusters, by = "municipality")
# ============================================================
# SLIDE 1 — Data & motivation
# Title: "Building an urbanrural typology from the data"
#
# Bullet points:
# • Sweden's 290 municipalities are routinely classified by
# administrative or population-size rules (SCB). These categories
# are imposed from outside the data.
# • This analysis asks instead: how do municipalities actually differ
# across structural dimensions?
# • Data source: Statistics Sweden, 2022 project sampling.
# • Six ACTIVE variable blocks:
# Education (4 attainment levels)
# Employment (16 activity sectors)
# Housing (rented / tenant-owned / owner-occupied)
# Workplace mobility (commuters in, commuters out, working locally)
# Migration (in- and outmigration)
# Demography (retirees, number of localities)
# • Two SUPPLEMENTARY blocks projected post-hoc:
# Educational provision (pre-school through HE, by ownership)
# Opinion (survey satisfaction with local schools)
# • Key pre-processing: block normalisation. Within each block,
# every municipality is rescaled to the same total, preventing
# Stockholm from dominating the analysis due to sheer size.
#
# No figure needed — use the variable block list as a visual
# schematic or table in the slide.
# ============================================================
# ============================================================
# SLIDE 2 — Correspondence analysis: the space of municipalities
# Title: "Two dimensions capture two-thirds of the variation"
#
# Bullet points:
# • CA places all 290 municipalities in a low-dimensional space
# where proximity = similarity on the active variables.
# • Dim 1 (ruralurban): Left pole → agriculture, mining &
# manufacturing, owner-occupied housing, upper-secondary education.
# Right pole → IT, finance, professional services, post-graduate
# attainment, apartment housing.
# • Dim 2 (labour-market self-containment): Top → residents work
# where they live (Göteborg, Malmö, Umeå). Bottom → outbound
# commuters, residential satellites (Knivsta, Salem, Staffanstorp).
# • Together Dim 1 + Dim 2 account for ~65% of total variability.
#
# Figure: slide2_biplot.png
# ============================================================
label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå",
"Linköping", "Solna", "Danderyd", "Kiruna", "Gotland",
"Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå",
"Partille", "Sundbyberg", "Lindesberg")
contribs <- as.data.frame(afc$col$contrib) |>
rownames_to_column("variable") |>
mutate(total = `Dim 1` + `Dim 2`) |>
arrange(desc(total)) |>
head(15)
col_df <- as.data.frame(afc$col$coord[, 1:2]) |>
rownames_to_column("variable") |>
filter(variable %in% contribs$variable)
fig_biplot <- ggplot() +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey60") +
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey60") +
geom_point(data = row_df,
aes(`Dim 1`, `Dim 2`, colour = cluster_label),
alpha = 0.75, size = 2.2) +
geom_text_repel(
data = row_df |> filter(municipality %in% label_munis),
aes(`Dim 1`, `Dim 2`, label = municipality),
size = 3.5, colour = "grey20", family = "source_sans_3",
max.overlaps = 30, segment.size = 0.25
) +
geom_point(data = col_df, aes(`Dim 1`, `Dim 2`),
shape = 17, colour = "firebrick", size = 3) +
geom_text_repel(
data = col_df, aes(`Dim 1`, `Dim 2`, label = variable),
colour = "firebrick", size = 3, family = "source_sans_3",
max.overlaps = 30, segment.size = 0.25
) +
scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
name = NULL) +
labs(
x = paste0("Dim 1 — ruralurban (", dim1_pct, "%)"),
y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
) +
guides(colour = guide_legend(nrow = 2))
ggsave("ppt/figures/slide2_biplot.png", fig_biplot,
width = 11, height = 7, dpi = 150)
message("Saved: ppt/figures/slide2_biplot.png")
# ============================================================
# SLIDE 3 — Six empirical types of municipality
# Title: "Six coherent types emerge from Ward clustering"
#
# Cluster descriptions:
#
# Cl 1 — Remote & peripheral
# Most rural extreme. Agriculture/forestry/fishing, sparsely
# populated, own full educational infrastructure at every age
# (komvux, preschool). Examples: Kiruna, Piteå, Skellefteå, Gotland.
#
# Cl 2 — Central industrial towns
# Rural-industrial, but not as remote. Mining & manufacturing
# dominant. Owner-occupied housing, upper-secondary ceiling.
# Examples: Falköping, Lindesberg, Hedemora.
#
# Cl 3 — Peri-rural commuter belt
# Small southern and central rural municipalities. Many residents
# commute out. Owner-occupied, construction and agriculture visible.
# Below-average satisfaction with local high schools.
# Examples: Tomelilla, Osby, Klippan, Sölvesborg.
#
# Cl 4 — Regional service centres
# Mid-sized cities with self-contained labour markets. Rented and
# tenant-owned housing, public administration, post-secondary
# attainment. Examples: Göteborg, Malmö, Umeå, Linköping, Kalmar.
#
# Cl 5 — Affluent suburbs & university satellites
# Outbound commuters, post-secondary attainment, tenant-owned
# housing. Residential satellites whose labour markets sit elsewhere.
# Examples: Lund, Mölndal, Partille, Huddinge, Knivsta, Kungsbacka.
#
# Cl 6 — Inner Stockholm core
# Inbound commuting, IT and finance employment, apartment housing,
# post-graduate attainment at extreme levels. These are destinations
# in the commuting network, not origins.
# Examples: Stockholm, Solna, Sundbyberg, Danderyd, Lidingö, Täby.
#
# Figures: slide3_clusters.png (main) · slide3_dendrogram.png (inset)
# ============================================================
centroids <- row_df |>
group_by(cluster_label) |>
summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop")
fig_clusters <- ggplot(row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label)) +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey70") +
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey70") +
geom_point(alpha = 0.5, size = 2) +
stat_ellipse(level = 0.68, linewidth = 0.7) +
geom_point(data = centroids, size = 5, shape = 18, colour = "grey15") +
geom_label_repel(
data = centroids, aes(label = cluster_label),
fill = "white", colour = "black",
family = "source_sans_3", size = 3.5,
label.size = 0.25, label.padding = unit(0.3, "lines"),
min.segment.length = 0, max.overlaps = 20
) +
scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
guide = "none") +
labs(
x = paste0("Dim 1 — ruralurban (", dim1_pct, "%)"),
y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
)
ggsave("ppt/figures/slide3_clusters.png", fig_clusters,
width = 11, height = 7, dpi = 150)
message("Saved: ppt/figures/slide3_clusters.png")
# Dendrogram with 6-cluster cut
tree <- hcpc$call$t$tree
h_max <- max(tree$height)
fig_dendro <- fviz_dend(
tree, k = 6, show_labels = FALSE,
rect = TRUE,
rect_border = unname(cluster_palette),
rect_fill = TRUE,
k_colors = unname(cluster_palette),
main = "", ylab = "Merge distance (Ward)"
) +
coord_cartesian(ylim = c(0, h_max * 1.05)) +
guides(linewidth = "none") +
theme(plot.title = element_blank(),
text = element_text(family = "source_sans_3", size = 13))
ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro,
width = 11, height = 4.5, dpi = 150)
message("Saved: ppt/figures/slide3_dendrogram.png")
# ============================================================
# SLIDE 4 — Geography & key takeaways
# Title: "The typology maps coherently onto Swedish geography"
#
# Bullet points:
# • Clusters 1 & 2 (both rural types) dominate almost everywhere
# outside the metropolitan areas — the rural majority.
# • Cluster 6 (inner Stockholm core) is confined to Stockholm and
# Uppsala counties; cluster 5 (affluent suburbs) spreads into
# Skåne (Lund) and Västra Götaland (Mölndal, Partille).
# • Cluster 4 (regional centres) appears thinly but consistently
# across most counties — one or two per county.
#
# Key takeaways:
# • Biggest empirical break: NOT metro vs. non-metro, but between
# two kinds of rural — remote & peripheral (Cl. 1), central
# industrial towns (Cl. 2), and peri-rural commuters (Cl. 3).
# • At the urban end, two kinds of city: self-contained regional
# centres (Cl. 4) vs. the metropolitan region (Cl. 5 & 6).
# • Within the metropolitan region, the data cleanly separate
# residential suburbs that commute IN (Cl. 5) from the inner core
# that receives commuters (Cl. 6).
#
# Figure: slide4_county.png
# ============================================================
county_order <- c(
"Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland",
"Jönköping", "Östergötland", "Södermanland", "Västra Götaland",
"Örebro", "Västmanland", "Stockholm", "Uppsala", "Dalarna", "Värmland",
"Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten"
)
clusters_geo <- clusters |>
left_join(panel_raw, by = "municipality") |>
mutate(county = county_names[str_sub(code, 1, 2)]) |>
filter(!is.na(county))
fig_county <- clusters_geo |>
count(county, cluster_label) |>
mutate(
county = factor(county, levels = county_order),
cluster_label = factor(cluster_label, levels = cluster_labels)
) |>
ggplot(aes(county, n, fill = cluster_label)) +
geom_col(position = "fill") +
scale_fill_manual(values = cluster_palette |> set_names(cluster_labels),
name = NULL) +
scale_y_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
labs(x = NULL, y = "Share of municipalities") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 11),
legend.position = "bottom",
legend.text = element_text(size = 10)
) +
guides(fill = guide_legend(nrow = 2))
ggsave("ppt/figures/slide4_county.png", fig_county,
width = 12, height = 6.5, dpi = 150)
message("Saved: ppt/figures/slide4_county.png")
message("\nAll figures written to ppt/figures/. Ready to paste into the slide deck.")