## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----libraries, message = FALSE, warning = FALSE------------------------------ library(dplyr) library(rbmi) library(rbmiUtils) ## ----load-data---------------------------------------------------------------- data("ADMI", package = "rbmiUtils") # Full imputed dataset data("ADEFF", package = "rbmiUtils") # Original data with missing values # Check dimensions cat("Full imputed dataset (ADMI):", nrow(ADMI), "rows\n") cat("Number of imputations:", length(unique(ADMI$IMPID)), "\n") ## ----prepare-original--------------------------------------------------------- original <- ADEFF |> mutate( TRT = TRT01P, USUBJID = as.character(USUBJID) ) # Count missing values n_missing <- sum(is.na(original$CHG)) cat("Missing values in original data:", n_missing, "\n") ## ----define-vars-------------------------------------------------------------- vars <- set_vars( subjid = "USUBJID", visit = "AVISIT", group = "TRT", outcome = "CHG" ) ## ----reduce------------------------------------------------------------------- reduced <- reduce_imputed_data(ADMI, original, vars) cat("Full imputed rows:", nrow(ADMI), "\n") cat("Reduced rows:", nrow(reduced), "\n") cat("Compression ratio:", round(100 * nrow(reduced) / nrow(ADMI), 1), "%\n") ## ----examine-reduced---------------------------------------------------------- # First few rows head(reduced) # Structure matches original imputed data cat("\nColumns in reduced data:\n") cat(paste(names(reduced), collapse = ", ")) ## ----expand------------------------------------------------------------------- expanded <- expand_imputed_data(reduced, original, vars) cat("Expanded rows:", nrow(expanded), "\n") cat("Original ADMI rows:", nrow(ADMI), "\n") ## ----verify------------------------------------------------------------------- # Sort both datasets for comparison admi_sorted <- ADMI |> arrange(IMPID, USUBJID, AVISIT) expanded_sorted <- expanded |> arrange(IMPID, USUBJID, AVISIT) # Compare CHG values all_equal <- all.equal( admi_sorted$CHG, expanded_sorted$CHG, tolerance = 1e-10 ) cat("Data integrity check:", all_equal, "\n") ## ----save-workflow, eval = FALSE---------------------------------------------- # # After imputation # impute_obj <- impute(draws_obj, references = c("Placebo" = "Placebo", "Drug A" = "Placebo")) # full_imputed <- get_imputed_data(impute_obj) # # # Reduce for storage # reduced <- reduce_imputed_data(full_imputed, original_data, vars) # # # Save both (reduced is much smaller) # saveRDS(reduced, "imputed_reduced.rds") # saveRDS(original_data, "original_data.rds") ## ----load-workflow, eval = FALSE---------------------------------------------- # # Load saved data # reduced <- readRDS("imputed_reduced.rds") # original_data <- readRDS("original_data.rds") # # # Expand when needed for analysis # full_imputed <- expand_imputed_data(reduced, original_data, vars) # # # Run analysis # ana_obj <- analyse_mi_data( # data = full_imputed, # vars = vars, # method = method, # fun = ancova # ) ## ----no-missing, eval = FALSE------------------------------------------------- # # If original has no missing values # reduced <- reduce_imputed_data(full_imputed, complete_data, vars) # nrow(reduced) # #> [1] 0 # # # expand_imputed_data handles this correctly # expanded <- expand_imputed_data(reduced, complete_data, vars) # # Returns original data with IMPID = "1"