# ============================================================ # PPT CONTENT: Empirical urban-rural typology of Swedish municipalities # Correspondence Analysis + hierarchical clustering (2022 sampling) # Six-cluster cut # ============================================================ library(tidyverse) library(FactoMineR) library(factoextra) library(ggrepel) library(showtext) font_add_google("Source Sans 3", "source_sans_3") showtext_auto() theme_ppt <- theme_minimal(base_size = 18, base_family = "source_sans_3") + theme( panel.grid.minor = element_blank(), panel.grid.major = element_line(colour = "grey92"), legend.position = "bottom", legend.title = element_text(face = "bold"), plot.margin = margin(8, 12, 8, 8) ) theme_set(theme_ppt) cluster_labels <- c( "1" = "Remote & peripheral", "2" = "Central industrial towns", "3" = "Peri-rural commuter belt", "4" = "Regional service centres", "5" = "Affluent suburbs & university satellites", "6" = "Inner Stockholm core" ) cluster_palette <- c( "1" = "#B07F4F", "2" = "#D7A86E", "3" = "#7CB07C", "4" = "#6FA8DC", "5" = "#C2738B", "6" = "#7A3A4F" ) county_names <- c( "01" = "Stockholm", "03" = "Uppsala", "04" = "Södermanland", "05" = "Östergötland", "06" = "Jönköping", "07" = "Kronoberg", "08" = "Kalmar", "09" = "Gotland", "10" = "Blekinge", "12" = "Skåne", "13" = "Halland", "14" = "Västra Götaland", "17" = "Värmland", "18" = "Örebro", "19" = "Västmanland", "20" = "Dalarna", "21" = "Gävleborg", "22" = "Västernorrland", "23" = "Jämtland", "24" = "Västerbotten", "25" = "Norrbotten" ) # Load data afc <- read_rds("data/processed/proportions_CA.rds") hcpc <- read_rds("data/processed/proportions_HCPC_6.rds") clusters <- read_csv("data/processed/cluster_assignment_6.csv", show_col_types = FALSE) |> rename(cluster = c6) |> mutate(cluster_label = cluster_labels[as.character(cluster)]) panel_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx", col_types = "text", n_max = 290) |> transmute(municipality, code = str_pad(code, 4, "left", "0")) dim1_pct <- round(afc$eig[1, 2], 1) dim2_pct <- round(afc$eig[2, 2], 1) row_df <- as.data.frame(afc$row$coord[, 1:2]) |> rownames_to_column("municipality") |> left_join(clusters, by = "municipality") label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå", "Linköping", "Solna", "Danderyd", "Kiruna", "Gotland", "Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå", "Partille", "Sundbyberg", "Lindesberg", "Gävle") contribs <- as.data.frame(afc$col$contrib) |> rownames_to_column("variable") |> mutate(total = `Dim 1` + `Dim 2`) |> arrange(desc(total)) |> head(15) col_df <- as.data.frame(afc$col$coord[, 1:2]) |> rownames_to_column("variable") |> filter(variable %in% contribs$variable) highlight_munis <- c("Gävle") fig_biplot <- ggplot() + geom_hline(yintercept = 0, linetype = "dashed", colour = "grey60") + geom_vline(xintercept = 0, linetype = "dashed", colour = "grey60") + geom_point(data = row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label), alpha = 0.75, size = 2.2) + geom_point(data = row_df |> filter(municipality %in% highlight_munis), aes(`Dim 1`, `Dim 2`), shape = 21, size = 5, stroke = 1.5, fill = NA, colour = "black") + geom_text_repel( data = row_df |> filter(municipality %in% label_munis), aes(`Dim 1`, `Dim 2`, label = municipality, fontface = if_else(municipality %in% highlight_munis, "bold", "plain")), size = 5, colour = "grey20", family = "source_sans_3", max.overlaps = 30, segment.size = 0.25 ) + geom_point(data = col_df, aes(`Dim 1`, `Dim 2`), shape = 17, colour = "firebrick", size = 3.5) + geom_text_repel( data = col_df, aes(`Dim 1`, `Dim 2`, label = variable), colour = "firebrick", size = 4.5, family = "source_sans_3", max.overlaps = 30, segment.size = 0.25 ) + scale_colour_manual(values = cluster_palette |> set_names(cluster_labels), name = NULL) + labs( x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"), y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)") ) + theme(legend.position = "none") #guides(colour = guide_legend(nrow = 2)) ggsave("ppt/figures/slide2_biplot.png", fig_biplot, width = 11, height = 7, dpi = 150) message("Saved: ppt/figures/slide2_biplot.png") centroids <- row_df |> group_by(cluster_label) |> summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop") fig_clusters <- ggplot(row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label)) + geom_hline(yintercept = 0, linetype = "dashed", colour = "grey70") + geom_vline(xintercept = 0, linetype = "dashed", colour = "grey70") + geom_point(alpha = 0.5, size = 2) + stat_ellipse(level = 0.68, linewidth = 0.7) + geom_point(data = centroids, size = 5, shape = 18, colour = "grey15") + geom_label_repel( data = centroids, aes(label = cluster_label), fill = "white", colour = "black", family = "source_sans_3", size = 5, label.size = 0.25, label.padding = unit(0.3, "lines"), min.segment.length = 0, max.overlaps = 20 ) + scale_colour_manual(values = cluster_palette |> set_names(cluster_labels), guide = "none") + labs( x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"), y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)") ) ggsave("ppt/figures/slide3_clusters.png", fig_clusters, width = 11, height = 7, dpi = 150) message("Saved: ppt/figures/slide3_clusters.png") # Dendrogram with 6-cluster cut tree <- hcpc$call$t$tree h_max <- max(tree$height) fig_dendro <- fviz_dend( tree, k = 6, show_labels = FALSE, rect = TRUE, rect_border = unname(cluster_palette), rect_fill = TRUE, k_colors = unname(cluster_palette), main = "", ylab = "Merge distance (Ward)" ) + coord_cartesian(ylim = c(0, h_max * 1.05)) + guides(linewidth = "none") + theme(plot.title = element_blank(), text = element_text(family = "source_sans_3", size = 17)) ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro, width = 11, height = 4.5, dpi = 150) message("Saved: ppt/figures/slide3_dendrogram.png") county_order <- c( "Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland", "Jönköping", "Östergötland", "Södermanland", "Västra Götaland", "Örebro", "Västmanland", "Stockholm", "Uppsala", "Dalarna", "Värmland", "Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten" ) # Population per municipality (2022) pop_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx", col_types = "text") |> filter(year == "2022") |> transmute(municipality, pop = as.numeric(Population)) clusters_geo <- clusters |> left_join(panel_raw, by = "municipality") |> left_join(pop_raw, by = "municipality") |> mutate(county = county_names[str_sub(code, 1, 2)]) |> filter(!is.na(county)) county_pop <- clusters_geo |> group_by(county, cluster_label) |> summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop") |> group_by(county) |> mutate( share = pop / sum(pop), pop_label = ifelse(pop >= 1e6, paste0(round(pop / 1e6, 1), "M"), paste0(round(pop / 1e3), "k")), county = factor(county, levels = county_order), cluster_label = factor(cluster_label, levels = cluster_labels) ) |> ungroup() # Combined faceted figure: municipalities (left) and population (right) county_munis <- clusters_geo |> count(county, cluster_label) |> group_by(county) |> mutate(share = n / sum(n)) |> ungroup() |> mutate( county = factor(county, levels = county_order), cluster_label = factor(cluster_label, levels = cluster_labels), facet = "Share of municipalities", pop_label = NA_character_ ) county_pop_f <- county_pop |> mutate(facet = "Share of population") combined <- bind_rows( county_munis |> select(county, cluster_label, share, facet, pop_label), county_pop_f |> select(county, cluster_label, share, facet, pop_label) ) |> mutate(facet = factor(facet, levels = c("Share of municipalities", "Share of population"))) fig_county <- ggplot(combined, aes(x = share, y = county, fill = cluster_label)) + geom_col(position = "stack", width = 0.8) + geom_text( aes(label = pop_label), position = position_stack(vjust = 0.5), size = 3, family = "source_sans_3", colour = "grey20", check_overlap = TRUE, na.rm = TRUE ) + facet_wrap(~facet) + scale_fill_manual(values = cluster_palette |> set_names(cluster_labels), name = NULL) + scale_x_continuous(labels = scales::percent_format(), expand = c(0, 0)) + labs(y = NULL, x = NULL) + theme( axis.text.y = element_text(size = 13), strip.text = element_text(size = 14, face = "bold"), legend.position = "bottom", legend.text = element_text(size = 12) ) + guides(fill = guide_legend(nrow = 3)) ggsave("ppt/figures/slide4_county.png", fig_county, width = 14, height = 9, dpi = 150) message("Saved: ppt/figures/slide4_county.png") # Standalone: absolute population stacked bars (bar length = total population) fig_county_abs <- ggplot(county_pop, aes(x = pop, y = county, fill = cluster_label)) + geom_col(width = 0.8) + geom_text( aes(label = pop_label), position = position_stack(vjust = 0.5), size = 3.3, family = "source_sans_3", colour = "grey20", check_overlap = TRUE ) + scale_fill_manual(values = cluster_palette |> set_names(cluster_labels), name = NULL) + scale_x_continuous(labels = \(x) paste0(round(x / 1e6, 1), "M"), expand = c(0, 0)) + labs(y = NULL, x = "Total population") + theme( axis.text.y = element_text(size = 14), legend.position = "bottom", legend.text = element_text(size = 13) ) + guides(fill = guide_legend(nrow = 3)) ggsave("ppt/figures/slide4_county_abs.png", fig_county_abs, width = 9, height = 9, dpi = 150) message("Saved: ppt/figures/slide4_county_abs.png") # Clusters detail cl_lb <- as.data.frame(cluster_labels) |> rowid_to_column("c6") cl <- read.csv("data/processed/cluster_assignment_6.csv") |> arrange(-2) |> left_join( cl_lb ) |> select(-c6)