updated figures for ppt

2026-05-26 10:41:13 +02:00 · 2026-05-26 10:41:13 +02:00 · e0ad813644
commit e0ad813644
parent 5d4f816bc9
9 changed files with 64 additions and 142 deletions
--- a/ppt/.~lock.Ruralitic-Hig-20260526-eng.pptx#
+++ b/ppt/.~lock.Ruralitic-Hig-20260526-eng.pptx#
@ -0,0 +1 @@
 ,pab,onigashima,26.05.2026 10:40,file:///home/pab/.config/libreoffice/4;
--- a/ppt/Ruralitic-Hig-20260526-eng.pptx
+++ b/ppt/Ruralitic-Hig-20260526-eng.pptx
--- a/ppt/content.R
+++ b/ppt/content.R
@ -1,11 +1,7 @@
 # ============================================================
-#  PPT CONTENT — Empirical urban-rural typology of Swedish municipalities
+#  PPT CONTENT: Empirical urban-rural typology of Swedish municipalities
 #  Correspondence Analysis + hierarchical clustering (2022 sampling)
 #  Six-cluster cut
 #
 #  Run from the project root (ruralitic-qrm/).
 #  Figures are saved to ppt/figures/. Slide text is in the
 #  comments below each section header.
 # ============================================================
 library(tidyverse)
@ -17,7 +13,7 @@ library(showtext)
 font_add_google("Source Sans 3", "source_sans_3")
 showtext_auto()
-theme_ppt <- theme_minimal(base_size = 14, base_family = "source_sans_3") +
+theme_ppt <- theme_minimal(base_size = 18, base_family = "source_sans_3") +
  theme(
    panel.grid.minor  = element_blank(),
    panel.grid.major  = element_line(colour = "grey92"),
@ -73,56 +69,6 @@ row_df <- as.data.frame(afc$row$coord[, 1:2]) |>
  rownames_to_column("municipality") |>
  left_join(clusters, by = "municipality")
 # ============================================================
 #  SLIDE 1 — Data & motivation
 #  Title: "Building an urban–rural typology from the data"
 #
 #  Bullet points:
 #   • Sweden's 290 municipalities are routinely classified by
 #     administrative or population-size rules (SCB). These categories
 #     are imposed from outside the data.
 #   • This analysis asks instead: how do municipalities actually differ
 #     across structural dimensions?
 #   • Data source: Statistics Sweden, 2022 project sampling.
 #   • Six ACTIVE variable blocks:
 #       Education (4 attainment levels)
 #       Employment (16 activity sectors)
 #       Housing (rented / tenant-owned / owner-occupied)
 #       Workplace mobility (commuters in, commuters out, working locally)
 #       Migration (in- and outmigration)
 #       Demography (retirees, number of localities)
 #   • Two SUPPLEMENTARY blocks projected post-hoc:
 #       Educational provision (pre-school through HE, by ownership)
 #       Opinion (survey satisfaction with local schools)
 #   • Key pre-processing: block normalisation. Within each block,
 #     every municipality is rescaled to the same total, preventing
 #     Stockholm from dominating the analysis due to sheer size.
 #
 #  No figure needed — use the variable block list as a visual
 #  schematic or table in the slide.
 # ============================================================
 # ============================================================
 #  SLIDE 2 — Correspondence analysis: the space of municipalities
 #  Title: "Two dimensions capture two-thirds of the variation"
 #
 #  Bullet points:
 #   • CA places all 290 municipalities in a low-dimensional space
 #     where proximity = similarity on the active variables.
 #   • Dim 1 (rural–urban): Left pole → agriculture, mining &
 #     manufacturing, owner-occupied housing, upper-secondary education.
 #     Right pole → IT, finance, professional services, post-graduate
 #     attainment, apartment housing.
 #   • Dim 2 (labour-market self-containment): Top → residents work
 #     where they live (Göteborg, Malmö, Umeå). Bottom → outbound
 #     commuters, residential satellites (Knivsta, Salem, Staffanstorp).
 #   • Together Dim 1 + Dim 2 account for ~65% of total variability.
 #
 #  Figure: slide2_biplot.png
 # ============================================================
 label_munis <- c("Stockholm", "Göteborg", "Malmö", "Uppsala", "Lund", "Umeå",
                 "Linköping", "Solna", "Danderyd", "Kiruna", "Gotland",
                 "Knivsta", "Falköping", "Tomelilla", "Skellefteå", "Piteå",
@ -147,14 +93,14 @@ fig_biplot <- ggplot() +
  geom_text_repel(
    data    = row_df |> filter(municipality %in% label_munis),
    aes(`Dim 1`, `Dim 2`, label = municipality),
-    size = 3.5, colour = "grey20", family = "source_sans_3",
+    size = 5, colour = "grey20", family = "source_sans_3",
    max.overlaps = 30, segment.size = 0.25
  ) +
  geom_point(data = col_df, aes(`Dim 1`, `Dim 2`),
-             shape = 17, colour = "firebrick", size = 3) +
+             shape = 17, colour = "firebrick", size = 3.5) +
  geom_text_repel(
    data = col_df, aes(`Dim 1`, `Dim 2`, label = variable),
-    colour = "firebrick", size = 3, family = "source_sans_3",
+    colour = "firebrick", size = 4.5, family = "source_sans_3",
    max.overlaps = 30, segment.size = 0.25
  ) +
  scale_colour_manual(values = cluster_palette |> set_names(cluster_labels),
@ -163,54 +109,13 @@ fig_biplot <- ggplot() +
    x = paste0("Dim 1 — rural–urban (", dim1_pct, "%)"),
    y = paste0("Dim 2 — labour-market self-containment (", dim2_pct, "%)")
  ) +
-  guides(colour = guide_legend(nrow = 2))
+  theme(legend.position = "none")
  #guides(colour = guide_legend(nrow = 2))
 ggsave("ppt/figures/slide2_biplot.png", fig_biplot,
       width = 11, height = 7, dpi = 150)
 message("Saved: ppt/figures/slide2_biplot.png")
 # ============================================================
 #  SLIDE 3 — Six empirical types of municipality
 #  Title: "Six coherent types emerge from Ward clustering"
 #
 #  Cluster descriptions:
 #
 #  Cl 1 — Remote & peripheral
 #    Most rural extreme. Agriculture/forestry/fishing, sparsely
 #    populated, own full educational infrastructure at every age
 #    (komvux, preschool). Examples: Kiruna, Piteå, Skellefteå, Gotland.
 #
 #  Cl 2 — Central industrial towns
 #    Rural-industrial, but not as remote. Mining & manufacturing
 #    dominant. Owner-occupied housing, upper-secondary ceiling.
 #    Examples: Falköping, Lindesberg, Hedemora.
 #
 #  Cl 3 — Peri-rural commuter belt
 #    Small southern and central rural municipalities. Many residents
 #    commute out. Owner-occupied, construction and agriculture visible.
 #    Below-average satisfaction with local high schools.
 #    Examples: Tomelilla, Osby, Klippan, Sölvesborg.
 #
 #  Cl 4 — Regional service centres
 #    Mid-sized cities with self-contained labour markets. Rented and
 #    tenant-owned housing, public administration, post-secondary
 #    attainment. Examples: Göteborg, Malmö, Umeå, Linköping, Kalmar.
 #
 #  Cl 5 — Affluent suburbs & university satellites
 #    Outbound commuters, post-secondary attainment, tenant-owned
 #    housing. Residential satellites whose labour markets sit elsewhere.
 #    Examples: Lund, Mölndal, Partille, Huddinge, Knivsta, Kungsbacka.
 #
 #  Cl 6 — Inner Stockholm core
 #    Inbound commuting, IT and finance employment, apartment housing,
 #    post-graduate attainment at extreme levels. These are destinations
 #    in the commuting network, not origins.
 #    Examples: Stockholm, Solna, Sundbyberg, Danderyd, Lidingö, Täby.
 #
 #  Figures: slide3_clusters.png (main) · slide3_dendrogram.png (inset)
 # ============================================================
 centroids <- row_df |>
  group_by(cluster_label) |>
  summarise(`Dim 1` = mean(`Dim 1`), `Dim 2` = mean(`Dim 2`), .groups = "drop")
@ -224,7 +129,7 @@ fig_clusters <- ggplot(row_df, aes(`Dim 1`, `Dim 2`, colour = cluster_label)) +
  geom_label_repel(
    data  = centroids, aes(label = cluster_label),
    fill  = "white", colour = "black",
-    family = "source_sans_3", size = 3.5,
+    family = "source_sans_3", size = 5,
    label.size = 0.25, label.padding = unit(0.3, "lines"),
    min.segment.length = 0, max.overlaps = 20
  ) +
@ -254,39 +159,12 @@ fig_dendro <- fviz_dend(
  coord_cartesian(ylim = c(0, h_max * 1.05)) +
  guides(linewidth = "none") +
  theme(plot.title = element_blank(),
-        text = element_text(family = "source_sans_3", size = 13))
+        text = element_text(family = "source_sans_3", size = 17))
 ggsave("ppt/figures/slide3_dendrogram.png", fig_dendro,
       width = 11, height = 4.5, dpi = 150)
 message("Saved: ppt/figures/slide3_dendrogram.png")
 # ============================================================
 #  SLIDE 4 — Geography & key takeaways
 #  Title: "The typology maps coherently onto Swedish geography"
 #
 #  Bullet points:
 #   • Clusters 1 & 2 (both rural types) dominate almost everywhere
 #     outside the metropolitan areas — the rural majority.
 #   • Cluster 6 (inner Stockholm core) is confined to Stockholm and
 #     Uppsala counties; cluster 5 (affluent suburbs) spreads into
 #     Skåne (Lund) and Västra Götaland (Mölndal, Partille).
 #   • Cluster 4 (regional centres) appears thinly but consistently
 #     across most counties — one or two per county.
 #
 #  Key takeaways:
 #   • Biggest empirical break: NOT metro vs. non-metro, but between
 #     two kinds of rural — remote & peripheral (Cl. 1), central
 #     industrial towns (Cl. 2), and peri-rural commuters (Cl. 3).
 #   • At the urban end, two kinds of city: self-contained regional
 #     centres (Cl. 4) vs. the metropolitan region (Cl. 5 & 6).
 #   • Within the metropolitan region, the data cleanly separate
 #     residential suburbs that commute IN (Cl. 5) from the inner core
 #     that receives commuters (Cl. 6).
 #
 #  Figure: slide4_county.png
 # ============================================================
 county_order <- c(
  "Skåne", "Blekinge", "Halland", "Kronoberg", "Kalmar", "Gotland",
  "Jönköping", "Östergötland", "Södermanland", "Västra Götaland",
@ -294,33 +172,76 @@ county_order <- c(
  "Gävleborg", "Västernorrland", "Jämtland", "Västerbotten", "Norrbotten"
 )
 # Population per municipality (2022)
 pop_raw <- readxl::read_excel("data/Municipalities_db_2.xlsx",
                              col_types = "text") |>
  filter(year == "2022") |>
  transmute(municipality, pop = as.numeric(Population))
 clusters_geo <- clusters |>
  left_join(panel_raw, by = "municipality") |>
  left_join(pop_raw,   by = "municipality") |>
  mutate(county = county_names[str_sub(code, 1, 2)]) |>
  filter(!is.na(county))
-fig_county <- clusters_geo |>
+county_pop <- clusters_geo |>
-  count(county, cluster_label) |>
+  group_by(county, cluster_label) |>
  summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop") |>
  group_by(county) |>
  mutate(
-    # south at bottom → north at top: keep county_order as factor levels
+    share     = pop / sum(pop),
    pop_label = ifelse(pop >= 1e6,
                       paste0(round(pop / 1e6, 1), "M"),
                       paste0(round(pop / 1e3), "k")),
    county        = factor(county, levels = county_order),
    cluster_label = factor(cluster_label, levels = cluster_labels)
  ) |>
-  ggplot(aes(x = n, y = county, fill = cluster_label)) +
+  ungroup()
-  geom_col(position = "fill") +
+
 # Combined faceted figure: municipalities (left) and population (right)
 county_munis <- clusters_geo |>
  count(county, cluster_label) |>
  group_by(county) |>
  mutate(share = n / sum(n)) |>
  ungroup() |>
  mutate(
    county        = factor(county, levels = county_order),
    cluster_label = factor(cluster_label, levels = cluster_labels),
    facet         = "Share of municipalities",
    pop_label     = NA_character_
  )
 county_pop_f <- county_pop |>
  mutate(facet = "Share of population")
 combined <- bind_rows(
  county_munis |> select(county, cluster_label, share, facet, pop_label),
  county_pop_f |> select(county, cluster_label, share, facet, pop_label)
 ) |>
  mutate(facet = factor(facet, levels = c("Share of municipalities",
                                          "Share of population")))
 fig_county <- ggplot(combined, aes(x = share, y = county, fill = cluster_label)) +
  geom_col(position = "stack", width = 0.8) +
  geom_text(
    aes(label = pop_label),
    position = position_stack(vjust = 0.5),
    size = 3, family = "source_sans_3", colour = "grey20",
    check_overlap = TRUE, na.rm = TRUE
  ) +
  facet_wrap(~facet) +
  scale_fill_manual(values = cluster_palette |> set_names(cluster_labels),
                    name = NULL) +
  scale_x_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
-  labs(y = NULL, x = "Share of municipalities") +
+  labs(y = NULL, x = NULL) +
  theme(
-    axis.text.y     = element_text(size = 11),
+    axis.text.y     = element_text(size = 13),
    strip.text      = element_text(size = 14, face = "bold"),
    legend.position = "bottom",
-    legend.text     = element_text(size = 10)
+    legend.text     = element_text(size = 12)
  ) +
  guides(fill = guide_legend(nrow = 3))
 ggsave("ppt/figures/slide4_county.png", fig_county,
-       width = 8, height = 9, dpi = 150)
+       width = 14, height = 9, dpi = 150)
 message("Saved: ppt/figures/slide4_county.png")
 message("\nAll figures written to ppt/figures/. Ready to paste into the slide deck.")
--- a/ppt/figures/slide2_biplot.png
+++ b/ppt/figures/slide2_biplot.png
--- a/ppt/figures/slide3_clusters.png
+++ b/ppt/figures/slide3_clusters.png
--- a/ppt/figures/slide3_dendrogram.png
+++ b/ppt/figures/slide3_dendrogram.png
--- a/ppt/figures/slide4_county.png
+++ b/ppt/figures/slide4_county.png
--- a/ppt/figures/slide4_county_munis.png
+++ b/ppt/figures/slide4_county_munis.png
--- a/ppt/figures/slide4_county_pop.png
+++ b/ppt/figures/slide4_county_pop.png
		`@ -0,0 +1 @@`
							`,pab,onigashima,26.05.2026 10:40,file:///home/pab/.config/libreoffice/4;`