2026-05-26 09:10:52 +02:00
|
|
|
|
# ============================================================
|
2026-05-26 10:41:13 +02:00
|
|
|
|
# PPT CONTENT: Empirical urban-rural typology of Swedish municipalities
|
2026-05-26 09:10:52 +02:00
|
|
|
|
# Correspondence Analysis + hierarchical clustering (2022 sampling)
|
|
|
|
|
|
# Six-cluster cut
|
|
|
|
|
|
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
library(tidyverse)
|
|
|
|
|
|
library(FactoMineR)
|
|
|
|
|
|
library(factoextra)
|
|
|
|
|
|
library(ggrepel)
|
|
|
|
|
|
library(showtext)
|
|
|
|
|
|
|
|
|
|
|
|
font_add_google("Source Sans 3", "source_sans_3")
|
|
|
|
|
|
showtext_auto()
|
|
|
|
|
|
|
2026-05-26 10:41:13 +02:00
|
|
|
|
theme_ppt <- theme_minimal(base_size = 18, base_family = "source_sans_3") +
|
2026-05-26 09:10:52 +02:00
|
|
|
|
theme(
|
|
|
|
|
|
panel.grid.minor = element_blank(),
|
|
|
|
|
|
panel.grid.major = element_line(colour = "grey92"),
|
|
|
|
|
|
legend.position = "bottom",
|
|
|
|
|
|
legend.title = element_text(face = "bold"),
|
|
|
|
|
|
plot.margin = margin(8, 12, 8, 8)
|
|
|
|
|
|
)
|
|
|
|
|
|
theme_set(theme_ppt)
|
|
|
|
|
|
|
|
|
|
|
|
cluster_labels <- c(
|
|
|
|
|
|
"1" = "Remote & peripheral",
|
|
|
|
|
|
"2" = "Central industrial towns",
|
|
|
|
|
|
"3" = "Peri-rural commuter belt",
|
|
|
|
|
|
"4" = "Regional service centres",
|
|
|
|
|
|
"5" = "Affluent suburbs & university satellites",
|
|
|
|
|
|
"6" = "Inner Stockholm core"
|
|
|
|
|
|
)
|
|
|
|
|
|
cluster_palette <- c(
|
|
|
|
|
|
"1" = "#B07F4F",
|
|
|
|
|
|
"2" = "#D7A86E",
|
|
|
|
|
|
"3" = "#7CB07C",
|
|
|
|
|
|
"4" = "#6FA8DC",
|
|
|
|
|
|
"5" = "#C2738B",
|
|
|
|
|
|
"6" = "#7A3A4F"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
county_names <- c(
|
|
|
|
|
|
"01" = "Stockholm", "03" = "Uppsala", "04" = "Södermanland",
|
|
|
|
|
|
"05" = "Östergötland", "06" = "Jönköping", "07" = "Kronoberg",
|
|
|
|
|
|
"08" = "Kalmar", "09" = "Gotland", "10" = "Blekinge",
|
|
|
|
|
|
"12" = "Skåne", "13" = "Halland", "14" = "Västra Götaland",
|
|
|
|
|
|
"17" = "Värmland", "18" = "Örebro", "19" = "Västmanland",
|
|
|
|
|
|
"20" = "Dalarna", "21" = "Gävleborg", "22" = "Västernorrland",
|
|
|
|
|
|
"23" = "Jämtland", "24" = "Västerbotten", "25" = "Norrbotten"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Load data
|
|
|
|
|
|
afc <- read_rds("data/processed/proportions_CA.rds")
|
|
|
|
|
|
hcpc <- read_rds("data/processed/proportions_HCPC_6.rds")
|
|
|
|
|
|
clusters <- read_csv("data/processed/cluster_assignment_6.csv",
|
|
|
|
|
|
show_col_types = FALSE) |>
|
|
|
|
|
|
rename(cluster = c6) |>
|
|
|
|
|
|
mutate(cluster_label = cluster_labels[as.character(cluster)])
|
|
|
|
|
|
|
|
|
|
|
|
panel_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx",
|
|
|
|
|
|
col_types = "text", n_max = 290) |>
|
|
|
|
|
|
transmute(municipality, code = str_pad(code, 4, "left", "0"))
|
|
|
|
|
|
|
|
|
|
|
|
dim1_pct <- round(afc$eig[1, 2], 1)
|
|
|
|
|
|
dim2_pct <- round(afc$eig[2, 2], 1)
|
|
|
|
|
|
|
|
|
|
|
|
row_df <- as.data.frame(afc$row$coord[, 1:2]) |>
|
|
|
|
|
|
rownames_to_column("municipality") |>
|
|
|
|
|
|
left_join(clusters, by = "municipality")
|
|
|
|
|
|
|
|
|
|
|
|
label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå",
|
|
|
|
|
|
"Linköping", "Solna", "Danderyd", "Kiruna", "Gotland",
|
|
|
|
|
|
"Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå",
|
2026-05-26 11:03:19 +02:00
|
|
|
|
"Partille", "Sundbyberg", "Lindesberg", "Gävle")
|
2026-05-26 09:10:52 +02:00
|
|
|
|
|
|
|
|
|
|
contribs <- as.data.frame(afc$col$contrib) |>
|
|
|
|
|
|
rownames_to_column("variable") |>
|
|
|
|
|
|
mutate(total = `Dim 1` + `Dim 2`) |>
|
|
|
|
|
|
arrange(desc(total)) |>
|
|
|
|
|
|
head(15)
|
|
|
|
|
|
|
|
|
|
|
|
col_df <- as.data.frame(afc$col$coord[, 1:2]) |>
|
|
|
|
|
|
rownames_to_column("variable") |>
|
|
|
|
|
|
filter(variable %in% contribs$variable)
|
|
|
|
|
|
|
2026-05-26 11:03:19 +02:00
|
|
|
|
highlight_munis <- c("Gävle")
|
|
|
|
|
|
|
2026-05-26 09:10:52 +02:00
|
|
|
|
fig_biplot <- ggplot() +
|
|
|
|
|
|
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey60") +
|
|
|
|
|
|
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey60") +
|
|
|
|
|
|
geom_point(data = row_df,
|
|
|
|
|
|
aes(`Dim 1`, `Dim 2`, colour = cluster_label),
|
|
|
|
|
|
alpha = 0.75, size = 2.2) +
|
2026-05-26 11:03:19 +02:00
|
|
|
|
geom_point(data = row_df |> filter(municipality %in% highlight_munis),
|
|
|
|
|
|
aes(`Dim 1`, `Dim 2`),
|
|
|
|
|
|
shape = 21, size = 5, stroke = 1.5,
|
|
|
|
|
|
fill = NA, colour = "black") +
|
2026-05-26 09:10:52 +02:00
|
|
|
|
geom_text_repel(
|
|
|
|
|
|
data = row_df |> filter(municipality %in% label_munis),
|
2026-05-26 11:03:19 +02:00
|
|
|
|
aes(`Dim 1`, `Dim 2`, label = municipality,
|
|
|
|
|
|
fontface = if_else(municipality %in% highlight_munis, "bold", "plain")),
|
2026-05-26 10:41:13 +02:00
|
|
|
|
size = 5, colour = "grey20", family = "source_sans_3",
|
2026-05-26 09:10:52 +02:00
|
|
|
|
max.overlaps = 30, segment.size = 0.25
|
|
|
|
|
|
) +
|
|
|
|
|
|
geom_point(data = col_df, aes(`Dim 1`, `Dim 2`),
|
2026-05-26 10:41:13 +02:00
|
|
|
|
shape = 17, colour = "firebrick", size = 3.5) +
|
2026-05-26 09:10:52 +02:00
|
|
|
|
geom_text_repel(
|
|
|
|
|
|
data = col_df, aes(`Dim 1`, `Dim 2`, label = variable),
|
2026-05-26 10:41:13 +02:00
|
|
|
|
colour = "firebrick", size = 4.5, family = "source_sans_3",
|
2026-05-26 09:10:52 +02:00
|
|
|
|
max.overlaps = 30, segment.size = 0.25
|
|
|
|
|
|
) +
|
|
|
|
|
|
scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
|
|
|
|
|
|
name = NULL) +
|
|
|
|
|
|
labs(
|
|
|
|
|
|
x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"),
|
|
|
|
|
|
y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
|
|
|
|
|
|
) +
|
2026-05-26 10:41:13 +02:00
|
|
|
|
theme(legend.position = "none")
|
|
|
|
|
|
#guides(colour = guide_legend(nrow = 2))
|
2026-05-26 09:10:52 +02:00
|
|
|
|
|
|
|
|
|
|
ggsave("ppt/figures/slide2_biplot.png", fig_biplot,
|
|
|
|
|
|
width = 11, height = 7, dpi = 150)
|
|
|
|
|
|
message("Saved: ppt/figures/slide2_biplot.png")
|
|
|
|
|
|
|
|
|
|
|
|
centroids <- row_df |>
|
|
|
|
|
|
group_by(cluster_label) |>
|
|
|
|
|
|
summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop")
|
|
|
|
|
|
|
|
|
|
|
|
fig_clusters <- ggplot(row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label)) +
|
|
|
|
|
|
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey70") +
|
|
|
|
|
|
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey70") +
|
|
|
|
|
|
geom_point(alpha = 0.5, size = 2) +
|
|
|
|
|
|
stat_ellipse(level = 0.68, linewidth = 0.7) +
|
|
|
|
|
|
geom_point(data = centroids, size = 5, shape = 18, colour = "grey15") +
|
|
|
|
|
|
geom_label_repel(
|
|
|
|
|
|
data = centroids, aes(label = cluster_label),
|
|
|
|
|
|
fill = "white", colour = "black",
|
2026-05-26 10:41:13 +02:00
|
|
|
|
family = "source_sans_3", size = 5,
|
2026-05-26 09:10:52 +02:00
|
|
|
|
label.size = 0.25, label.padding = unit(0.3, "lines"),
|
|
|
|
|
|
min.segment.length = 0, max.overlaps = 20
|
|
|
|
|
|
) +
|
|
|
|
|
|
scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
|
|
|
|
|
|
guide = "none") +
|
|
|
|
|
|
labs(
|
|
|
|
|
|
x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"),
|
|
|
|
|
|
y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
ggsave("ppt/figures/slide3_clusters.png", fig_clusters,
|
|
|
|
|
|
width = 11, height = 7, dpi = 150)
|
|
|
|
|
|
message("Saved: ppt/figures/slide3_clusters.png")
|
|
|
|
|
|
|
|
|
|
|
|
# Dendrogram with 6-cluster cut
|
|
|
|
|
|
tree <- hcpc$call$t$tree
|
|
|
|
|
|
h_max <- max(tree$height)
|
|
|
|
|
|
|
|
|
|
|
|
fig_dendro <- fviz_dend(
|
|
|
|
|
|
tree, k = 6, show_labels = FALSE,
|
|
|
|
|
|
rect = TRUE,
|
|
|
|
|
|
rect_border = unname(cluster_palette),
|
|
|
|
|
|
rect_fill = TRUE,
|
|
|
|
|
|
k_colors = unname(cluster_palette),
|
|
|
|
|
|
main = "", ylab = "Merge distance (Ward)"
|
|
|
|
|
|
) +
|
|
|
|
|
|
coord_cartesian(ylim = c(0, h_max * 1.05)) +
|
|
|
|
|
|
guides(linewidth = "none") +
|
|
|
|
|
|
theme(plot.title = element_blank(),
|
2026-05-26 10:41:13 +02:00
|
|
|
|
text = element_text(family = "source_sans_3", size = 17))
|
2026-05-26 09:10:52 +02:00
|
|
|
|
|
|
|
|
|
|
ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro,
|
|
|
|
|
|
width = 11, height = 4.5, dpi = 150)
|
|
|
|
|
|
message("Saved: ppt/figures/slide3_dendrogram.png")
|
|
|
|
|
|
|
|
|
|
|
|
county_order <- c(
|
|
|
|
|
|
"Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland",
|
|
|
|
|
|
"Jönköping", "Östergötland", "Södermanland", "Västra Götaland",
|
|
|
|
|
|
"Örebro", "Västmanland", "Stockholm", "Uppsala", "Dalarna", "Värmland",
|
|
|
|
|
|
"Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-26 10:41:13 +02:00
|
|
|
|
# Population per municipality (2022)
|
|
|
|
|
|
pop_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx",
|
|
|
|
|
|
col_types = "text") |>
|
|
|
|
|
|
filter(year == "2022") |>
|
|
|
|
|
|
transmute(municipality, pop = as.numeric(Population))
|
|
|
|
|
|
|
2026-05-26 09:10:52 +02:00
|
|
|
|
clusters_geo <- clusters |>
|
|
|
|
|
|
left_join(panel_raw, by = "municipality") |>
|
2026-05-26 10:41:13 +02:00
|
|
|
|
left_join(pop_raw, by = "municipality") |>
|
2026-05-26 09:10:52 +02:00
|
|
|
|
mutate(county = county_names[str_sub(code, 1, 2)]) |>
|
|
|
|
|
|
filter(!is.na(county))
|
|
|
|
|
|
|
2026-05-26 10:41:13 +02:00
|
|
|
|
county_pop <- clusters_geo |>
|
|
|
|
|
|
group_by(county, cluster_label) |>
|
|
|
|
|
|
summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop") |>
|
|
|
|
|
|
group_by(county) |>
|
2026-05-26 09:10:52 +02:00
|
|
|
|
mutate(
|
2026-05-26 10:41:13 +02:00
|
|
|
|
share = pop / sum(pop),
|
|
|
|
|
|
pop_label = ifelse(pop >= 1e6,
|
|
|
|
|
|
paste0(round(pop / 1e6, 1), "M"),
|
|
|
|
|
|
paste0(round(pop / 1e3), "k")),
|
2026-05-26 09:10:52 +02:00
|
|
|
|
county = factor(county, levels = county_order),
|
|
|
|
|
|
cluster_label = factor(cluster_label, levels = cluster_labels)
|
|
|
|
|
|
) |>
|
2026-05-26 10:41:13 +02:00
|
|
|
|
ungroup()
|
|
|
|
|
|
|
|
|
|
|
|
# Combined faceted figure: municipalities (left) and population (right)
|
|
|
|
|
|
county_munis <- clusters_geo |>
|
|
|
|
|
|
count(county, cluster_label) |>
|
|
|
|
|
|
group_by(county) |>
|
|
|
|
|
|
mutate(share = n / sum(n)) |>
|
|
|
|
|
|
ungroup() |>
|
|
|
|
|
|
mutate(
|
|
|
|
|
|
county = factor(county, levels = county_order),
|
|
|
|
|
|
cluster_label = factor(cluster_label, levels = cluster_labels),
|
|
|
|
|
|
facet = "Share of municipalities",
|
|
|
|
|
|
pop_label = NA_character_
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
county_pop_f <- county_pop |>
|
|
|
|
|
|
mutate(facet = "Share of population")
|
|
|
|
|
|
|
|
|
|
|
|
combined <- bind_rows(
|
|
|
|
|
|
county_munis |> select(county, cluster_label, share, facet, pop_label),
|
|
|
|
|
|
county_pop_f |> select(county, cluster_label, share, facet, pop_label)
|
|
|
|
|
|
) |>
|
|
|
|
|
|
mutate(facet = factor(facet, levels = c("Share of municipalities",
|
|
|
|
|
|
"Share of population")))
|
|
|
|
|
|
|
|
|
|
|
|
fig_county <- ggplot(combined, aes(x = share, y = county, fill = cluster_label)) +
|
|
|
|
|
|
geom_col(position = "stack", width = 0.8) +
|
|
|
|
|
|
geom_text(
|
|
|
|
|
|
aes(label = pop_label),
|
|
|
|
|
|
position = position_stack(vjust = 0.5),
|
|
|
|
|
|
size = 3, family = "source_sans_3", colour = "grey20",
|
|
|
|
|
|
check_overlap = TRUE, na.rm = TRUE
|
|
|
|
|
|
) +
|
|
|
|
|
|
facet_wrap(~facet) +
|
2026-05-26 09:10:52 +02:00
|
|
|
|
scale_fill_manual(values = cluster_palette |> set_names(cluster_labels),
|
|
|
|
|
|
name = NULL) +
|
2026-05-26 09:35:04 +02:00
|
|
|
|
scale_x_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
|
2026-05-26 10:41:13 +02:00
|
|
|
|
labs(y = NULL, x = NULL) +
|
2026-05-26 09:10:52 +02:00
|
|
|
|
theme(
|
2026-05-26 10:41:13 +02:00
|
|
|
|
axis.text.y = element_text(size = 13),
|
|
|
|
|
|
strip.text = element_text(size = 14, face = "bold"),
|
2026-05-26 09:10:52 +02:00
|
|
|
|
legend.position = "bottom",
|
2026-05-26 10:41:13 +02:00
|
|
|
|
legend.text = element_text(size = 12)
|
2026-05-26 09:10:52 +02:00
|
|
|
|
) +
|
2026-05-26 09:35:04 +02:00
|
|
|
|
guides(fill = guide_legend(nrow = 3))
|
2026-05-26 09:10:52 +02:00
|
|
|
|
|
|
|
|
|
|
ggsave("ppt/figures/slide4_county.png", fig_county,
|
2026-05-26 10:41:13 +02:00
|
|
|
|
width = 14, height = 9, dpi = 150)
|
2026-05-26 09:10:52 +02:00
|
|
|
|
message("Saved: ppt/figures/slide4_county.png")
|
2026-05-26 11:03:19 +02:00
|
|
|
|
|
|
|
|
|
|
# Standalone: absolute population stacked bars (bar length = total population)
|
|
|
|
|
|
fig_county_abs <- ggplot(county_pop,
|
|
|
|
|
|
aes(x = pop, y = county, fill = cluster_label)) +
|
|
|
|
|
|
geom_col(width = 0.8) +
|
|
|
|
|
|
geom_text(
|
|
|
|
|
|
aes(label = pop_label),
|
|
|
|
|
|
position = position_stack(vjust = 0.5),
|
|
|
|
|
|
size = 3.3, family = "source_sans_3", colour = "grey20",
|
|
|
|
|
|
check_overlap = TRUE
|
|
|
|
|
|
) +
|
|
|
|
|
|
scale_fill_manual(values = cluster_palette |> set_names(cluster_labels),
|
|
|
|
|
|
name = NULL) +
|
|
|
|
|
|
scale_x_continuous(labels = \(x) paste0(round(x / 1e6, 1), "M"),
|
|
|
|
|
|
expand = c(0, 0)) +
|
|
|
|
|
|
labs(y = NULL, x = "Total population") +
|
|
|
|
|
|
theme(
|
|
|
|
|
|
axis.text.y = element_text(size = 14),
|
|
|
|
|
|
legend.position = "bottom",
|
|
|
|
|
|
legend.text = element_text(size = 13)
|
|
|
|
|
|
) +
|
|
|
|
|
|
guides(fill = guide_legend(nrow = 3))
|
|
|
|
|
|
|
|
|
|
|
|
ggsave("ppt/figures/slide4_county_abs.png", fig_county_abs,
|
|
|
|
|
|
width = 9, height = 9, dpi = 150)
|
|
|
|
|
|
message("Saved: ppt/figures/slide4_county_abs.png")
|
2026-05-26 11:14:31 +02:00
|
|
|
|
|
|
|
|
|
|
# Clusters detail
|
|
|
|
|
|
cl_lb <- as.data.frame(cluster_labels) |>
|
|
|
|
|
|
rowid_to_column("c6")
|
|
|
|
|
|
|
|
|
|
|
|
cl <- read.csv("data/processed/cluster_assignment_6.csv") |>
|
|
|
|
|
|
arrange(-2) |>
|
|
|
|
|
|
left_join(
|
|
|
|
|
|
cl_lb
|
|
|
|
|
|
) |>
|
|
|
|
|
|
select(-c6)
|
2026-06-02 10:45:48 +02:00
|
|
|
|
|
|
|
|
|
|
write.csv(cl, "data/processed/cluster_assignment_6_labels.csv")
|