updated figures for ppt
1
ppt/.~lock.Ruralitic-Hig-20260526-eng.pptx#
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
,pab,onigashima,26.05.2026 10:40,file:///home/pab/.config/libreoffice/4;
|
||||||
205
ppt/content.R
|
|
@ -1,11 +1,7 @@
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# PPT CONTENT — Empirical urban-rural typology of Swedish municipalities
|
# PPT CONTENT: Empirical urban-rural typology of Swedish municipalities
|
||||||
# Correspondence Analysis + hierarchical clustering (2022 sampling)
|
# Correspondence Analysis + hierarchical clustering (2022 sampling)
|
||||||
# Six-cluster cut
|
# Six-cluster cut
|
||||||
#
|
|
||||||
# Run from the project root (ruralitic-qrm/).
|
|
||||||
# Figures are saved to ppt/figures/. Slide text is in the
|
|
||||||
# comments below each section header.
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
|
|
@ -17,7 +13,7 @@ library(showtext)
|
||||||
font_add_google("Source Sans 3", "source_sans_3")
|
font_add_google("Source Sans 3", "source_sans_3")
|
||||||
showtext_auto()
|
showtext_auto()
|
||||||
|
|
||||||
theme_ppt <- theme_minimal(base_size = 14, base_family = "source_sans_3") +
|
theme_ppt <- theme_minimal(base_size = 18, base_family = "source_sans_3") +
|
||||||
theme(
|
theme(
|
||||||
panel.grid.minor = element_blank(),
|
panel.grid.minor = element_blank(),
|
||||||
panel.grid.major = element_line(colour = "grey92"),
|
panel.grid.major = element_line(colour = "grey92"),
|
||||||
|
|
@ -73,56 +69,6 @@ row_df <- as.data.frame(afc$row$coord[, 1:2]) |>
|
||||||
rownames_to_column("municipality") |>
|
rownames_to_column("municipality") |>
|
||||||
left_join(clusters, by = "municipality")
|
left_join(clusters, by = "municipality")
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# SLIDE 1 — Data & motivation
|
|
||||||
# Title: "Building an urban–rural typology from the data"
|
|
||||||
#
|
|
||||||
# Bullet points:
|
|
||||||
# • Sweden's 290 municipalities are routinely classified by
|
|
||||||
# administrative or population-size rules (SCB). These categories
|
|
||||||
# are imposed from outside the data.
|
|
||||||
# • This analysis asks instead: how do municipalities actually differ
|
|
||||||
# across structural dimensions?
|
|
||||||
# • Data source: Statistics Sweden, 2022 project sampling.
|
|
||||||
# • Six ACTIVE variable blocks:
|
|
||||||
# Education (4 attainment levels)
|
|
||||||
# Employment (16 activity sectors)
|
|
||||||
# Housing (rented / tenant-owned / owner-occupied)
|
|
||||||
# Workplace mobility (commuters in, commuters out, working locally)
|
|
||||||
# Migration (in- and outmigration)
|
|
||||||
# Demography (retirees, number of localities)
|
|
||||||
# • Two SUPPLEMENTARY blocks projected post-hoc:
|
|
||||||
# Educational provision (pre-school through HE, by ownership)
|
|
||||||
# Opinion (survey satisfaction with local schools)
|
|
||||||
# • Key pre-processing: block normalisation. Within each block,
|
|
||||||
# every municipality is rescaled to the same total, preventing
|
|
||||||
# Stockholm from dominating the analysis due to sheer size.
|
|
||||||
#
|
|
||||||
# No figure needed — use the variable block list as a visual
|
|
||||||
# schematic or table in the slide.
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# SLIDE 2 — Correspondence analysis: the space of municipalities
|
|
||||||
# Title: "Two dimensions capture two-thirds of the variation"
|
|
||||||
#
|
|
||||||
# Bullet points:
|
|
||||||
# • CA places all 290 municipalities in a low-dimensional space
|
|
||||||
# where proximity = similarity on the active variables.
|
|
||||||
# • Dim 1 (rural–urban): Left pole → agriculture, mining &
|
|
||||||
# manufacturing, owner-occupied housing, upper-secondary education.
|
|
||||||
# Right pole → IT, finance, professional services, post-graduate
|
|
||||||
# attainment, apartment housing.
|
|
||||||
# • Dim 2 (labour-market self-containment): Top → residents work
|
|
||||||
# where they live (Göteborg, Malmö, Umeå). Bottom → outbound
|
|
||||||
# commuters, residential satellites (Knivsta, Salem, Staffanstorp).
|
|
||||||
# • Together Dim 1 + Dim 2 account for ~65% of total variability.
|
|
||||||
#
|
|
||||||
# Figure: slide2_biplot.png
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå",
|
label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå",
|
||||||
"Linköping", "Solna", "Danderyd", "Kiruna", "Gotland",
|
"Linköping", "Solna", "Danderyd", "Kiruna", "Gotland",
|
||||||
"Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå",
|
"Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå",
|
||||||
|
|
@ -147,14 +93,14 @@ fig_biplot <- ggplot() +
|
||||||
geom_text_repel(
|
geom_text_repel(
|
||||||
data = row_df |> filter(municipality %in% label_munis),
|
data = row_df |> filter(municipality %in% label_munis),
|
||||||
aes(`Dim 1`, `Dim 2`, label = municipality),
|
aes(`Dim 1`, `Dim 2`, label = municipality),
|
||||||
size = 3.5, colour = "grey20", family = "source_sans_3",
|
size = 5, colour = "grey20", family = "source_sans_3",
|
||||||
max.overlaps = 30, segment.size = 0.25
|
max.overlaps = 30, segment.size = 0.25
|
||||||
) +
|
) +
|
||||||
geom_point(data = col_df, aes(`Dim 1`, `Dim 2`),
|
geom_point(data = col_df, aes(`Dim 1`, `Dim 2`),
|
||||||
shape = 17, colour = "firebrick", size = 3) +
|
shape = 17, colour = "firebrick", size = 3.5) +
|
||||||
geom_text_repel(
|
geom_text_repel(
|
||||||
data = col_df, aes(`Dim 1`, `Dim 2`, label = variable),
|
data = col_df, aes(`Dim 1`, `Dim 2`, label = variable),
|
||||||
colour = "firebrick", size = 3, family = "source_sans_3",
|
colour = "firebrick", size = 4.5, family = "source_sans_3",
|
||||||
max.overlaps = 30, segment.size = 0.25
|
max.overlaps = 30, segment.size = 0.25
|
||||||
) +
|
) +
|
||||||
scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
|
scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
|
||||||
|
|
@ -163,54 +109,13 @@ fig_biplot <- ggplot() +
|
||||||
x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"),
|
x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"),
|
||||||
y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
|
y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
|
||||||
) +
|
) +
|
||||||
guides(colour = guide_legend(nrow = 2))
|
theme(legend.position = "none")
|
||||||
|
#guides(colour = guide_legend(nrow = 2))
|
||||||
|
|
||||||
ggsave("ppt/figures/slide2_biplot.png", fig_biplot,
|
ggsave("ppt/figures/slide2_biplot.png", fig_biplot,
|
||||||
width = 11, height = 7, dpi = 150)
|
width = 11, height = 7, dpi = 150)
|
||||||
message("Saved: ppt/figures/slide2_biplot.png")
|
message("Saved: ppt/figures/slide2_biplot.png")
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# SLIDE 3 — Six empirical types of municipality
|
|
||||||
# Title: "Six coherent types emerge from Ward clustering"
|
|
||||||
#
|
|
||||||
# Cluster descriptions:
|
|
||||||
#
|
|
||||||
# Cl 1 — Remote & peripheral
|
|
||||||
# Most rural extreme. Agriculture/forestry/fishing, sparsely
|
|
||||||
# populated, own full educational infrastructure at every age
|
|
||||||
# (komvux, preschool). Examples: Kiruna, Piteå, Skellefteå, Gotland.
|
|
||||||
#
|
|
||||||
# Cl 2 — Central industrial towns
|
|
||||||
# Rural-industrial, but not as remote. Mining & manufacturing
|
|
||||||
# dominant. Owner-occupied housing, upper-secondary ceiling.
|
|
||||||
# Examples: Falköping, Lindesberg, Hedemora.
|
|
||||||
#
|
|
||||||
# Cl 3 — Peri-rural commuter belt
|
|
||||||
# Small southern and central rural municipalities. Many residents
|
|
||||||
# commute out. Owner-occupied, construction and agriculture visible.
|
|
||||||
# Below-average satisfaction with local high schools.
|
|
||||||
# Examples: Tomelilla, Osby, Klippan, Sölvesborg.
|
|
||||||
#
|
|
||||||
# Cl 4 — Regional service centres
|
|
||||||
# Mid-sized cities with self-contained labour markets. Rented and
|
|
||||||
# tenant-owned housing, public administration, post-secondary
|
|
||||||
# attainment. Examples: Göteborg, Malmö, Umeå, Linköping, Kalmar.
|
|
||||||
#
|
|
||||||
# Cl 5 — Affluent suburbs & university satellites
|
|
||||||
# Outbound commuters, post-secondary attainment, tenant-owned
|
|
||||||
# housing. Residential satellites whose labour markets sit elsewhere.
|
|
||||||
# Examples: Lund, Mölndal, Partille, Huddinge, Knivsta, Kungsbacka.
|
|
||||||
#
|
|
||||||
# Cl 6 — Inner Stockholm core
|
|
||||||
# Inbound commuting, IT and finance employment, apartment housing,
|
|
||||||
# post-graduate attainment at extreme levels. These are destinations
|
|
||||||
# in the commuting network, not origins.
|
|
||||||
# Examples: Stockholm, Solna, Sundbyberg, Danderyd, Lidingö, Täby.
|
|
||||||
#
|
|
||||||
# Figures: slide3_clusters.png (main) · slide3_dendrogram.png (inset)
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
centroids <- row_df |>
|
centroids <- row_df |>
|
||||||
group_by(cluster_label) |>
|
group_by(cluster_label) |>
|
||||||
summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop")
|
summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop")
|
||||||
|
|
@ -224,7 +129,7 @@ fig_clusters <- ggplot(row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label)) +
|
||||||
geom_label_repel(
|
geom_label_repel(
|
||||||
data = centroids, aes(label = cluster_label),
|
data = centroids, aes(label = cluster_label),
|
||||||
fill = "white", colour = "black",
|
fill = "white", colour = "black",
|
||||||
family = "source_sans_3", size = 3.5,
|
family = "source_sans_3", size = 5,
|
||||||
label.size = 0.25, label.padding = unit(0.3, "lines"),
|
label.size = 0.25, label.padding = unit(0.3, "lines"),
|
||||||
min.segment.length = 0, max.overlaps = 20
|
min.segment.length = 0, max.overlaps = 20
|
||||||
) +
|
) +
|
||||||
|
|
@ -254,39 +159,12 @@ fig_dendro <- fviz_dend(
|
||||||
coord_cartesian(ylim = c(0, h_max * 1.05)) +
|
coord_cartesian(ylim = c(0, h_max * 1.05)) +
|
||||||
guides(linewidth = "none") +
|
guides(linewidth = "none") +
|
||||||
theme(plot.title = element_blank(),
|
theme(plot.title = element_blank(),
|
||||||
text = element_text(family = "source_sans_3", size = 13))
|
text = element_text(family = "source_sans_3", size = 17))
|
||||||
|
|
||||||
ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro,
|
ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro,
|
||||||
width = 11, height = 4.5, dpi = 150)
|
width = 11, height = 4.5, dpi = 150)
|
||||||
message("Saved: ppt/figures/slide3_dendrogram.png")
|
message("Saved: ppt/figures/slide3_dendrogram.png")
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# SLIDE 4 — Geography & key takeaways
|
|
||||||
# Title: "The typology maps coherently onto Swedish geography"
|
|
||||||
#
|
|
||||||
# Bullet points:
|
|
||||||
# • Clusters 1 & 2 (both rural types) dominate almost everywhere
|
|
||||||
# outside the metropolitan areas — the rural majority.
|
|
||||||
# • Cluster 6 (inner Stockholm core) is confined to Stockholm and
|
|
||||||
# Uppsala counties; cluster 5 (affluent suburbs) spreads into
|
|
||||||
# Skåne (Lund) and Västra Götaland (Mölndal, Partille).
|
|
||||||
# • Cluster 4 (regional centres) appears thinly but consistently
|
|
||||||
# across most counties — one or two per county.
|
|
||||||
#
|
|
||||||
# Key takeaways:
|
|
||||||
# • Biggest empirical break: NOT metro vs. non-metro, but between
|
|
||||||
# two kinds of rural — remote & peripheral (Cl. 1), central
|
|
||||||
# industrial towns (Cl. 2), and peri-rural commuters (Cl. 3).
|
|
||||||
# • At the urban end, two kinds of city: self-contained regional
|
|
||||||
# centres (Cl. 4) vs. the metropolitan region (Cl. 5 & 6).
|
|
||||||
# • Within the metropolitan region, the data cleanly separate
|
|
||||||
# residential suburbs that commute IN (Cl. 5) from the inner core
|
|
||||||
# that receives commuters (Cl. 6).
|
|
||||||
#
|
|
||||||
# Figure: slide4_county.png
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
county_order <- c(
|
county_order <- c(
|
||||||
"Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland",
|
"Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland",
|
||||||
"Jönköping", "Östergötland", "Södermanland", "Västra Götaland",
|
"Jönköping", "Östergötland", "Södermanland", "Västra Götaland",
|
||||||
|
|
@ -294,33 +172,76 @@ county_order <- c(
|
||||||
"Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten"
|
"Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Population per municipality (2022)
|
||||||
|
pop_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx",
|
||||||
|
col_types = "text") |>
|
||||||
|
filter(year == "2022") |>
|
||||||
|
transmute(municipality, pop = as.numeric(Population))
|
||||||
|
|
||||||
clusters_geo <- clusters |>
|
clusters_geo <- clusters |>
|
||||||
left_join(panel_raw, by = "municipality") |>
|
left_join(panel_raw, by = "municipality") |>
|
||||||
|
left_join(pop_raw, by = "municipality") |>
|
||||||
mutate(county = county_names[str_sub(code, 1, 2)]) |>
|
mutate(county = county_names[str_sub(code, 1, 2)]) |>
|
||||||
filter(!is.na(county))
|
filter(!is.na(county))
|
||||||
|
|
||||||
fig_county <- clusters_geo |>
|
county_pop <- clusters_geo |>
|
||||||
count(county, cluster_label) |>
|
group_by(county, cluster_label) |>
|
||||||
|
summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop") |>
|
||||||
|
group_by(county) |>
|
||||||
mutate(
|
mutate(
|
||||||
# south at bottom → north at top: keep county_order as factor levels
|
share = pop / sum(pop),
|
||||||
|
pop_label = ifelse(pop >= 1e6,
|
||||||
|
paste0(round(pop / 1e6, 1), "M"),
|
||||||
|
paste0(round(pop / 1e3), "k")),
|
||||||
county = factor(county, levels = county_order),
|
county = factor(county, levels = county_order),
|
||||||
cluster_label = factor(cluster_label, levels = cluster_labels)
|
cluster_label = factor(cluster_label, levels = cluster_labels)
|
||||||
) |>
|
) |>
|
||||||
ggplot(aes(x = n, y = county, fill = cluster_label)) +
|
ungroup()
|
||||||
geom_col(position = "fill") +
|
|
||||||
|
# Combined faceted figure: municipalities (left) and population (right)
|
||||||
|
county_munis <- clusters_geo |>
|
||||||
|
count(county, cluster_label) |>
|
||||||
|
group_by(county) |>
|
||||||
|
mutate(share = n / sum(n)) |>
|
||||||
|
ungroup() |>
|
||||||
|
mutate(
|
||||||
|
county = factor(county, levels = county_order),
|
||||||
|
cluster_label = factor(cluster_label, levels = cluster_labels),
|
||||||
|
facet = "Share of municipalities",
|
||||||
|
pop_label = NA_character_
|
||||||
|
)
|
||||||
|
|
||||||
|
county_pop_f <- county_pop |>
|
||||||
|
mutate(facet = "Share of population")
|
||||||
|
|
||||||
|
combined <- bind_rows(
|
||||||
|
county_munis |> select(county, cluster_label, share, facet, pop_label),
|
||||||
|
county_pop_f |> select(county, cluster_label, share, facet, pop_label)
|
||||||
|
) |>
|
||||||
|
mutate(facet = factor(facet, levels = c("Share of municipalities",
|
||||||
|
"Share of population")))
|
||||||
|
|
||||||
|
fig_county <- ggplot(combined, aes(x = share, y = county, fill = cluster_label)) +
|
||||||
|
geom_col(position = "stack", width = 0.8) +
|
||||||
|
geom_text(
|
||||||
|
aes(label = pop_label),
|
||||||
|
position = position_stack(vjust = 0.5),
|
||||||
|
size = 3, family = "source_sans_3", colour = "grey20",
|
||||||
|
check_overlap = TRUE, na.rm = TRUE
|
||||||
|
) +
|
||||||
|
facet_wrap(~facet) +
|
||||||
scale_fill_manual(values = cluster_palette |> set_names(cluster_labels),
|
scale_fill_manual(values = cluster_palette |> set_names(cluster_labels),
|
||||||
name = NULL) +
|
name = NULL) +
|
||||||
scale_x_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
|
scale_x_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
|
||||||
labs(y = NULL, x = "Share of municipalities") +
|
labs(y = NULL, x = NULL) +
|
||||||
theme(
|
theme(
|
||||||
axis.text.y = element_text(size = 11),
|
axis.text.y = element_text(size = 13),
|
||||||
|
strip.text = element_text(size = 14, face = "bold"),
|
||||||
legend.position = "bottom",
|
legend.position = "bottom",
|
||||||
legend.text = element_text(size = 10)
|
legend.text = element_text(size = 12)
|
||||||
) +
|
) +
|
||||||
guides(fill = guide_legend(nrow = 3))
|
guides(fill = guide_legend(nrow = 3))
|
||||||
|
|
||||||
ggsave("ppt/figures/slide4_county.png", fig_county,
|
ggsave("ppt/figures/slide4_county.png", fig_county,
|
||||||
width = 8, height = 9, dpi = 150)
|
width = 14, height = 9, dpi = 150)
|
||||||
message("Saved: ppt/figures/slide4_county.png")
|
message("Saved: ppt/figures/slide4_county.png")
|
||||||
|
|
||||||
message("\nAll figures written to ppt/figures/. Ready to paste into the slide deck.")
|
|
||||||
|
|
|
||||||
|
Before Width: | Height: | Size: 176 KiB After Width: | Height: | Size: 205 KiB |
|
Before Width: | Height: | Size: 197 KiB After Width: | Height: | Size: 212 KiB |
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 93 KiB |
BIN
ppt/figures/slide4_county_munis.png
Normal file
|
After Width: | Height: | Size: 58 KiB |
BIN
ppt/figures/slide4_county_pop.png
Normal file
|
After Width: | Height: | Size: 90 KiB |