--- title: "Practical Workflows" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Practical Workflows} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) library(hexify) library(sf) library(ggplot2) ``` This vignette covers practical workflows for common spatial analysis tasks: sharing grids across datasets, generating grid polygons, choosing resolution, and exporting to GIS formats. ## One Grid, Many Datasets The most powerful pattern in hexify is defining a grid once and reusing it across multiple datasets. This ensures spatial consistency and eliminates parameter repetition. ### The Problem You often have: - Several independent datasets (observations, sensors, surveys) - All in longitude/latitude coordinates - Collected at different times or from different sources You want to: - Put everything on one common global grid - Be sure the grids actually match - Combine results later without subtle errors ### The Solution: Shared Grid Objects ```{r one-grid-many-datasets} # Step 1: Define the grid once # This is your shared spatial reference system grid <- hex_grid(area_km2 = 5000) print(grid) # Step 2: Create multiple datasets with different structures set.seed(123) # Dataset 1: Bird observations (note different column names) bird_obs <- data.frame( species = sample(c("Passer domesticus", "Turdus merula", "Parus major"), 50, replace = TRUE), longitude = runif(50, -10, 30), latitude = runif(50, 35, 60) ) # Dataset 2: Mammal records mammal_obs <- data.frame( species = sample(c("Vulpes vulpes", "Meles meles", "Sciurus vulgaris"), 40, replace = TRUE), lon = runif(40, -10, 30), lat = runif(40, 35, 60) ) # Dataset 3: Climate stations climate_data <- data.frame( station_id = paste0("WS", 1:20), x = runif(20, -10, 30), y = runif(20, 35, 60), temp_c = rnorm(20, 12, 5) ) # Step 3: Attach all datasets to the SAME grid birds <- hexify(bird_obs, lon = "longitude", lat = "latitude", grid = grid) mammals <- hexify(mammal_obs, lon = "lon", lat = "lat", grid = grid) climate <- hexify(climate_data, lon = "x", lat = "y", grid = grid) cat("Birds: ", nrow(as.data.frame(birds)), "observations in", n_cells(birds), "cells\n") cat("Mammals:", nrow(as.data.frame(mammals)), "observations in", n_cells(mammals), "cells\n") cat("Climate:", nrow(as.data.frame(climate)), "stations in", n_cells(climate), "cells\n") ``` ### Combining at the Cell Level Once data are hexified, longitude/latitude no longer matter for analysis. The `cell_id` becomes the shared spatial key: ```{r cell-level-analysis} # Extract data frames with cell IDs birds_df <- as.data.frame(birds) birds_df$cell_id <- birds@cell_id mammals_df <- as.data.frame(mammals) mammals_df$cell_id <- mammals@cell_id climate_df <- as.data.frame(climate) climate_df$cell_id <- climate@cell_id # Aggregate each dataset by cell bird_richness <- aggregate( species ~ cell_id, data = birds_df, FUN = function(x) length(unique(x)) ) names(bird_richness)[2] <- "bird_species" mammal_richness <- aggregate( species ~ cell_id, data = mammals_df, FUN = function(x) length(unique(x)) ) names(mammal_richness)[2] <- "mammal_species" mean_temp <- aggregate( temp_c ~ cell_id, data = climate_df, FUN = mean ) names(mean_temp)[2] <- "mean_temp" # Join datasets by cell_id - guaranteed to align because same grid combined <- merge(bird_richness, mammal_richness, by = "cell_id", all = TRUE) combined <- merge(combined, mean_temp, by = "cell_id", all = TRUE) head(combined) ``` ## Grid Generation hexify provides functions to generate grid polygons over regions for visualization and analysis. ### Grid Over a Rectangular Region ```{r grid-rect, fig.width=7, fig.height=5} # Create grid specification grid <- hex_grid(area_km2 = 5000) # Generate hexagons over Western Europe europe_hexes <- grid_rect(c(-10, 35, 25, 60), grid) # Get European countries for context europe <- hexify_world[hexify_world$continent == "Europe", ] ggplot() + geom_sf(data = europe, fill = "gray95", color = "gray60") + geom_sf(data = europe_hexes, fill = NA, color = "steelblue", linewidth = 0.4) + coord_sf(xlim = c(-10, 25), ylim = c(35, 60)) + labs(title = sprintf("Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) + theme_minimal() ``` ### Grid Over a Polygon (Shapefile) Clip a grid to any sf polygon boundary: ```{r grid-polygon, fig.width=6, fig.height=6} # Get France boundary france <- hexify_world[hexify_world$name == "France", ] # Generate grid covering mainland France grid <- hex_grid(area_km2 = 2000) france_grid <- grid_rect(c(-5, 41, 10, 52), grid) # Clip grid to France boundary france_grid_clipped <- st_intersection(france_grid, st_geometry(france)) ggplot() + geom_sf(data = france, fill = "gray95", color = "gray40", linewidth = 0.5) + geom_sf(data = france_grid_clipped, fill = alpha("steelblue", 0.3), color = "steelblue", linewidth = 0.3) + coord_sf(xlim = c(-5, 10), ylim = c(41, 52)) + labs(title = sprintf("Hexagonal Grid Clipped to France (~%.0f km² cells)", grid@area_km2)) + theme_minimal() ``` ### Global Grid ```{r grid-global, fig.width=8, fig.height=4, warning=FALSE} # Coarse global grid (be careful with fine grids - many cells!) grid <- hex_grid(area_km2 = 500000) global_hexes <- grid_global(grid) ggplot() + geom_sf(data = hexify_world, fill = "gray90", color = "gray70", linewidth = 0.2) + geom_sf(data = global_hexes, fill = NA, color = "darkgreen", linewidth = 0.3) + labs(title = sprintf("Global Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) + theme_minimal() + theme(axis.text = element_blank(), axis.ticks = element_blank()) ``` ## Multi-Resolution Analysis Analyze data at multiple spatial scales using different target areas. ```{r multi-resolution} # Sample data set.seed(42) observations <- data.frame( species = sample(c("Species A", "Species B", "Species C"), 100, replace = TRUE), lon = runif(100, -10, 30), lat = runif(100, 35, 60) ) # Fine resolution (~1000 km² cells) grid_fine <- hex_grid(area_km2 = 1000) obs_fine <- hexify(observations, lon = "lon", lat = "lat", grid = grid_fine) # Coarse resolution (~10000 km² cells) grid_coarse <- hex_grid(area_km2 = 10000) obs_coarse <- hexify(observations, lon = "lon", lat = "lat", grid = grid_coarse) cat(sprintf("Fine resolution: %d unique cells (area: %.1f km²)\n", n_cells(obs_fine), grid_fine@area_km2)) cat(sprintf("Coarse resolution: %d unique cells (area: %.1f km²)\n", n_cells(obs_coarse), grid_coarse@area_km2)) ``` ### Scale-Dependent Patterns ```{r multi-scale-aggregate} # Extract data with cell IDs fine_df <- as.data.frame(obs_fine) fine_df$cell_id <- obs_fine@cell_id coarse_df <- as.data.frame(obs_coarse) coarse_df$cell_id <- obs_coarse@cell_id # Species richness at each scale richness_fine <- aggregate(species ~ cell_id, data = fine_df, FUN = function(x) length(unique(x))) richness_coarse <- aggregate(species ~ cell_id, data = coarse_df, FUN = function(x) length(unique(x))) cat(sprintf("Fine scale: mean %.2f species per cell\n", mean(richness_fine$species))) cat(sprintf("Coarse scale: mean %.2f species per cell\n", mean(richness_coarse$species))) ``` ## Spatial Joins Join datasets based on shared grid cells rather than proximity. ```{r spatial-joins} # Dataset 1: Weather stations stations <- data.frame( station_id = paste0("ST", 1:50), lon = runif(50, -10, 30), lat = runif(50, 35, 60), temperature = rnorm(50, 15, 5) ) # Dataset 2: Cities cities <- data.frame( city = c("Vienna", "Paris", "London", "Berlin", "Rome", "Madrid", "Prague", "Warsaw", "Budapest", "Amsterdam"), lon = c(16.37, 2.35, -0.12, 13.40, 12.50, -3.70, 14.42, 21.01, 19.04, 4.90), lat = c(48.21, 48.86, 51.51, 52.52, 41.90, 40.42, 50.08, 52.23, 47.50, 52.37) ) # Use a coarse grid for joining disparate points grid <- hex_grid(area_km2 = 50000) # Hexify both datasets with the same grid stations_hex <- hexify(stations, lon = "lon", lat = "lat", grid = grid) cities_hex <- hexify(cities, lon = "lon", lat = "lat", grid = grid) # Extract with cell IDs stations_df <- as.data.frame(stations_hex) stations_df$cell_id <- stations_hex@cell_id cities_df <- as.data.frame(cities_hex) cities_df$cell_id <- cities_hex@cell_id # Join by cell_id city_weather <- merge( cities_df[, c("city", "cell_id")], aggregate(temperature ~ cell_id, data = stations_df, FUN = mean), by = "cell_id", all.x = TRUE ) city_weather ``` ## Choosing Resolution ### By Target Area Use `hex_grid()` with `area_km2` to get the closest available resolution: ```{r choosing-resolution} # Target: 100 km² cells grid_100 <- hex_grid(area_km2 = 100, aperture = 3) cat(sprintf("Target ~100 km²: resolution %d (actual: %.1f km²)\n", grid_100@resolution, grid_100@area_km2)) # Target: 1000 km² cells grid_1000 <- hex_grid(area_km2 = 1000, aperture = 3) cat(sprintf("Target ~1000 km²: resolution %d (actual: %.1f km²)\n", grid_1000@resolution, grid_1000@area_km2)) # Target: 10000 km² cells grid_10000 <- hex_grid(area_km2 = 10000, aperture = 3) cat(sprintf("Target ~10000 km²: resolution %d (actual: %.1f km²)\n", grid_10000@resolution, grid_10000@area_km2)) ``` ### Resolution Table (Aperture 3) ```{r resolution-table, echo=FALSE} comparison <- hexify_compare_resolutions(aperture = 3, res_range = 0:15) comparison$n_cells_fmt <- ifelse( comparison$n_cells > 1e6, sprintf("%.1fM", comparison$n_cells / 1e6), ifelse(comparison$n_cells > 1e3, sprintf("%.1fK", comparison$n_cells / 1e3), as.character(comparison$n_cells)) ) knitr::kable( comparison[, c("resolution", "n_cells_fmt", "cell_area_km2", "cell_spacing_km")], col.names = c("Resolution", "# Cells", "Cell Area (km²)", "Spacing (km)"), digits = 1 ) ``` ### Comparing Apertures Different apertures offer different resolution steps: ```{r compare-apertures} target_area <- 1000 # km² cat(sprintf("Target: ~%d km² cells\n\n", target_area)) for (ap in c(3, 4, 7)) { grid <- hex_grid(area_km2 = target_area, aperture = ap) n_cells <- 10 * (ap^grid@resolution) + 2 cat(sprintf("Aperture %d: resolution %d -> %.1f km² (%s cells globally)\n", ap, grid@resolution, grid@area_km2, format(n_cells, big.mark = ","))) } ``` | Aperture | Best For | Trade-offs | |----------|----------|------------| | 3 | Fine resolution control, dggridR compatibility | Slowest cell growth | | 4 | Power-of-2 scaling, GIS workflows | Moderate resolution steps | | 7 | Rapid cell count growth, coarse analysis | Largest resolution jumps | | 4/3 | Balance of 4's fast start + 3's fine control | More complex indexing | ## Working with sf ### Convert HexData to sf ```{r sf-workflow} # Hexify some data grid <- hex_grid(area_km2 = 20000) result <- hexify(cities, lon = "lon", lat = "lat", grid = grid) # Convert to sf points (uses cell centers) sf_points <- as_sf(result, geometry = "point") class(sf_points) # Convert to sf polygons (for choropleth maps) sf_polys <- as_sf(result, geometry = "polygon") class(sf_polys) # Or generate polygons directly from cell IDs unique_cells <- cells(result) cell_polys <- cell_to_sf(unique_cells, grid) ``` ### Visualize with ggplot2 ```{r sf-plot, fig.width=7, fig.height=5} europe <- hexify_world[hexify_world$continent == "Europe", ] ggplot() + geom_sf(data = europe, fill = "ivory", color = "gray70") + geom_sf(data = cell_polys, fill = "steelblue", alpha = 0.5, color = "darkblue") + coord_sf(xlim = c(-10, 25), ylim = c(35, 58)) + labs(title = "European Cities - Hexagonal Grid") + theme_minimal() ``` ## Export to GIS Formats Use sf's `st_write()` to export grids for use in external GIS software: ```{r export-formats, eval=FALSE} # Generate a grid grid <- hex_grid(area_km2 = 10000) europe <- grid_rect(c(-10, 35, 25, 60), grid) # Export to various formats st_write(europe, "europe_grid.gpkg", layer = "hexgrid") # GeoPackage st_write(europe, "europe_grid.shp") # Shapefile st_write(europe, "europe_grid.geojson") # GeoJSON st_write(europe, "europe_grid.kml", layer = "hexgrid") # KML (Google Earth) ``` ## Edge Cases ### Handling NA Coordinates ```{r edge-cases} data_with_na <- data.frame( lon = c(16.37, NA, 2.35, 13.40), lat = c(48.21, 48.86, NA, 52.52) ) grid <- hex_grid(area_km2 = 1000) result <- hexify(data_with_na, lon = "lon", lat = "lat", grid = grid) # Check which rows have valid cell assignments cat("Cell IDs:", result@cell_id, "\n") cat("NA indicates invalid coordinates\n") ``` ### Polar Regions Coordinates with latitude > 89° may have projection artifacts. The grid remains valid, but polygon visualization can be distorted near poles. ### Date Line The date line (lon = ±180°) is handled correctly. Use `st_wrap_dateline()` when visualizing polygons that cross it: ```{r dateline, eval=FALSE} # For polygons crossing the date line wrapped <- st_wrap_dateline( polygons, options = c("WRAPDATELINE=YES", "DATELINEOFFSET=180"), quiet = TRUE ) ``` ## Function Summary | Task | Function | |------|----------| | Create grid specification | `hex_grid(area_km2 = ...)` | | Assign points to cells | `hexify(df, lon, lat, grid)` | | Get grid from HexData | `grid_info(result)` | | Get unique cell IDs | `cells(result)` | | Count cells | `n_cells(result)` | | Extract data frame | `as.data.frame(result)` | | Convert to sf | `as_sf(result, geometry = "polygon")` | | Generate polygons | `cell_to_sf(cell_ids, grid)` | | Grid over region | `grid_rect(bbox, grid)` | | Global grid | `grid_global(grid)` | | Coordinate conversion | `lonlat_to_cell()`, `cell_to_lonlat()` | | Compare resolutions | `hexify_compare_resolutions()` | ## See Also - `vignette("quickstart")` - Getting started with hexify - `vignette("visualization")` - Plotting with `plot()`, `hexify_heatmap()` - `vignette("theory")` - Mathematical foundations (ISEA projection, apertures)