## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----inspect------------------------------------------------------------------ # raw <- import_steps_data("my_data.sav") # cols <- detect_steps_columns(raw) # # # See all detected columns # str(cols[!sapply(cols, is.null)]) # # # See what was NOT detected # names(cols[sapply(cols, is.null)]) ## ----convert------------------------------------------------------------------ # # Glucose: mg/dL to mmol/L (divide by 18) # raw$b5 <- raw$b5 / 18 # # # Cholesterol: mg/dL to mmol/L (divide by 38.67) # raw$b8 <- raw$b8 / 38.67 ## ----merge-------------------------------------------------------------------- # step1 <- haven::read_spss("step1_interview.sav") # step2 <- haven::read_spss("step2_measurements.sav") # step3 <- haven::read_spss("step3_biochemistry.sav") # # combined <- dplyr::left_join(step1, step2, by = "pid") |> # dplyr::left_join(step3, by = "pid") # # # Save combined file # haven::write_sav(combined, file.path(tempdir(), "steps_combined.sav")) # # # Or import directly # raw <- combined |> janitor::clean_names() ## ----diagnostic--------------------------------------------------------------- # library(stepssurvey) # # raw <- import_steps_data("my_data.sav") # cols <- detect_steps_columns(raw) # # # Summary # cat("Rows:", nrow(raw), "\n") # cat("Columns:", ncol(raw), "\n") # cat("Detected:", sum(!sapply(cols, is.null)), "/", length(cols), "\n") # # # Check key variables # if (!is.null(cols$age)) { # cat("\nAge range:", range(raw[[cols$age]], na.rm = TRUE), "\n") # cat("Age NAs:", sum(is.na(raw[[cols$age]])), "\n") # } # if (!is.null(cols$sex)) { # cat("\nSex distribution:\n") # print(table(raw[[cols$sex]], useNA = "ifany")) # } # if (!is.null(cols$weight_step1)) { # wt <- raw[[cols$weight_step1]] # cat("\nWeight range:", round(range(wt, na.rm = TRUE), 3), "\n") # cat("Weight NAs:", sum(is.na(wt)), "\n") # } # # # List undetected variables # missing <- names(cols[sapply(cols, is.null)]) # cat("\nUndetected variables (", length(missing), "):\n") # cat(paste(" ", missing, collapse = "\n"), "\n")