## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) library(ggplot2) library(sf) library(dplyr) library(tidyr) library(tidycensuskr) library(biscale) library(cowplot) library(grid) library(ggspatial) ## ----load-data_1-------------------------------------------------------------- # Load 2020 boundaries data(adm2_sf_2020) ## ----filter-merge_1----------------------------------------------------------- # Load census population data for 2010 and 2020 df_2010_pop <- anycensus(year = 2010, codes = c("Gyeongsangnam-do", "Busan", "Ulsan"), type = "population") df_2020_pop <- anycensus(year = 2020, codes = c("Gyeongsangnam-do", "Busan", "Ulsan"), type = "population") # Merge with spatial data and compute population change sf_target <- adm2_sf_2020 |> inner_join(df_2010_pop, by = "adm2_code") |> inner_join(df_2020_pop, by = "adm2_code") |> mutate(change = `all households_total_prs.y` - `all households_total_prs.x`) ## ----choropleth_1, fig.width=7, fig.height=5---------------------------------- # Choropleth map for population change map <- ggplot(sf_target) + geom_sf(aes(fill = change), color = "gray80", size = 0.1) + labs(title = "Population change between 2010 and 2020") + scale_fill_gradient2( low = "#2C7BB6", mid = "white", high = "#D7191C", midpoint = 0, name = "Change" ) + theme_void() + annotation_scale(location = "tr", width_hint = 0.25, text_cex = 0.7, line_width = 0.7) # National boundary (union of all districts) sf_korea_boundary <- adm2_sf_2020 |> summarise(geometry = st_union(geometry)) # Target region boundary (union of selected provinces/cities) sf_target_boundary <- sf_target |> summarise(geometry = st_union(geometry)) # Inset map: whole Korea + highlighted target region korea_inset <- ggplot() + geom_sf(data = sf_korea_boundary, fill = "grey90", color = "grey90") + geom_sf(data = sf_target_boundary, fill = "grey30", color = "grey30") + theme_void() # Combine main map and inset cowplot::ggdraw() + cowplot::draw_plot(map) + cowplot::draw_plot(korea_inset, x = 0.7, y = 0.05, width = 0.25, height = 0.25) + draw_grob(grid::rectGrob(gp = gpar(col = "black", fill = NA, lwd = 0.6)), x = 0.7, y = 0.05, width = 0.25, height = 0.25) ## ----filter-merge_2----------------------------------------------------------- # Load census data df_2020_pop <- anycensus(year = 2020, type = "population") df_2020_tax <- anycensus(year = 2020, type = "tax") # Merge population with boundaries adm2_sf_2020_pop <- adm2_sf_2020 |> left_join(df_2020_pop, by = "adm2_code") |> mutate( adm2_code_chr = as.character(adm2_code), adm2_prefix4 = substr(adm2_code_chr, 1, 4), last_digit = substr(adm2_code_chr, 5, 5) ) # Aggregate smaller units (adm2_code ending not with 0) into 4-digit groups sf_union_needed <- adm2_sf_2020_pop |> filter(last_digit != "0") |> group_by(adm2_prefix4) |> summarise( across(where(is.numeric), ~ sum(.x, na.rm = TRUE)), geometry = st_union(geometry), .groups = "drop" ) |> mutate(adm2_code = as.numeric(paste0(adm2_prefix4, "0"))) # Combine aggregated units with existing "0"-ending districts adm2_sf_2020_unioned <- adm2_sf_2020_pop |> filter(last_digit == "0") |> bind_rows(sf_union_needed) # Join with tax data sf_final <- adm2_sf_2020_unioned |> left_join(df_2020_tax, by = "adm2_code") ## ----bivariate_2, fig.width=7, fig.height=5----------------------------------- # Create 3x3 bivariate classes (population vs tax) bi_data <- bi_class( sf_final, x = `all households_total_prs`, y = income_general_mkr, style = "quantile", dim = 3 ) # Bivariate legend legend <- bi_legend( pal = "DkCyan", dim = 3, xlab = "Low to High population", ylab = "Low to High tax", size = 7 ) # Mapping cowplot::ggdraw() + cowplot::draw_plot( ggplot() + geom_sf(data = bi_data, aes(fill = bi_class), color = NA) + bi_scale_fill(pal = "DkCyan", dim = 3, guide = "none") + bi_theme() + labs(title = "Population vs Tax (2020)") + theme(plot.title = element_text(size = 10)) ) + cowplot::draw_plot(legend, x = 0.7, y = 0.1, width = 0.3, height = 0.3) ## ----data_load_3, include=TRUE------------------------------------------------ # Load population data for the Seoul Metropolitan Area (SMA) df_sma <- anycensus( year = 2020, codes = c("Seoul", "Gyeonggi-do", "Incheon"), type = "population" ) # Calculate sex ratio (males per 100 females) df_sma <- df_sma |> mutate(sex_ratio = `all households_male_prs` / `all households_female_prs` * 100) # Extract overall distribution (all SMA combined) df_all <- df_sma |> select(sex_ratio) ## ----histogram_3, include=TRUE, fig.width=7, fig.height=3--------------------- ggplot() + # Background: overall distribution across all SMA geom_histogram( data = df_all, aes(x = sex_ratio, y = after_stat(density)), bins = 20, fill = "grey80", color = NA, alpha = 1 ) + # Regional distributions geom_histogram( data = df_sma, aes(x = sex_ratio, y = after_stat(density), fill = adm1), bins = 20, alpha = 0.7, color = NA, position = "identity" ) + facet_wrap(~ adm1, ncol = 3, scales = "free_y") + scale_fill_manual(values = c( "Seoul" = "darkorange", "Incheon" = "purple", "Gyeonggi-do" = "cyan4" )) + labs( title = "Sex ratio distribution by region (SMA, 2020)", x = "Sex ratio (male per 100 females)", y = "Density" ) + theme_bw() ## ----choropleth_3, fig.width=7, fig.height=3.5-------------------------------- # Merge SMA population data with boundaries adm2_sf_2020_sma <- adm2_sf_2020 |> inner_join(df_sma, by = "adm2_code") # Choropleth map for sex ratio ggplot(adm2_sf_2020_sma) + geom_sf(aes(fill = sex_ratio), color = "gray", size = 0.01) + scale_fill_gradient2( low = "#2C7BB6", mid = "white", high = "#D7191C", midpoint = 100, # 100 = equal male/female name = "Sex ratio\n(males per 100 females)" ) + labs(title = "Sex ratio in the Seoul Metropolitan Area (2020)") + theme_void()