--- title: "Cross-Validation and Advanced Features in missoNet" author: "Yixiao Zeng, Celia M. T. Greenwood" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 number_sections: true fig_width: 7 fig_height: 5 vignette: > %\VignetteIndexEntry{Cross-Validation and Advanced Features in missoNet} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center", out.width = "90%", dpi = 90, message = FALSE, warning = FALSE ) options(width = 100) ``` # Introduction This vignette covers advanced features of **missoNet**: * Cross-validation for optimal parameter selection * 1-SE rule for enhanced regularization * Parallel processing for large-scale problems * Adaptive search strategies * Convergence diagnostics and troubleshooting ```{r} library(missoNet) ``` # Cross-Validation Framework ## Basic Cross-Validation Cross-validation provides data-driven selection of regularization parameters: ```{r} # Generate example data sim <- generateData(n = 300, p = 30, q = 12, rho = 0.1, missing.type = "MCAR") # 5-fold cross-validation cvfit <- cv.missoNet( X = sim$X, Y = sim$Z, kfold = 5, compute.1se = TRUE, # Also compute 1-SE models seed = 123, # For reproducibility verbose = 1 ) # ====== Cross-Validation Summary ====== { cat("\nNumber of folds:", cvfit$kfold, "\n") cat("Lambda pairs evaluated:", length(cvfit$param_set$cv.errors.mean), "\n") cat("Minimum CV error:", round(min(cvfit$param_set$cv.errors.mean), 4), "\n") } # ====== Optimal parameters (min CV error) ====== { cat("\nlambda.beta:", cvfit$est.min$lambda.beta, "\n") cat("lambda.theta:", cvfit$est.min$lambda.theta, "\n") } # ====== 1-SE parameters ====== { cat("\nlambda.beta:", cvfit$est.1se.beta$lambda.beta, "\n") cat("lambda.theta:", cvfit$est.1se.theta$lambda.theta, "\n") } ``` ## Understanding the 1-SE Rule The 1-SE rule selects simpler models within one standard error of minimum: ```{r} # Compare three models models <- list( "Minimum CV" = cvfit$est.min, "1SE Beta" = cvfit$est.1se.beta, "1SE Theta" = cvfit$est.1se.theta ) # Create comparison table model_comparison <- data.frame( Model = names(models), Lambda.Beta = numeric(3), Lambda.Theta = numeric(3), Nonzero.Coef = numeric(3), Network.Edges = numeric(3) ) for (i in 1:3) { if (!is.null(models[[i]])) { model_comparison$Lambda.Beta[i] <- models[[i]]$lambda.beta model_comparison$Lambda.Theta[i] <- models[[i]]$lambda.theta model_comparison$Nonzero.Coef[i] <- sum(abs(models[[i]]$Beta) > 1e-8) model_comparison$Network.Edges[i] <- sum(abs(models[[i]]$Theta[upper.tri(models[[i]]$Theta)]) > 1e-8) } } print(model_comparison, digits = 4) ``` ## Visualizing CV Results ```{r} # Heatmap of CV errors plot(cvfit, type = "heatmap", detailed.axes = FALSE) ``` ```{r} # 3D surface plot plot(cvfit, type = "scatter", plt.surf = TRUE) ``` ## CV Error Analysis ```{r} # Extract and analyze CV errors cv_errors <- cvfit$param_set$cv.errors.mean cv_errors_sd <- (cvfit$param_set$cv.errors.upper - cvfit$param_set$cv.errors.lower) / 2 # Create error surface data lambda_beta_unique <- unique(cvfit$param_set$cv.grid.beta) lambda_theta_unique <- unique(cvfit$param_set$cv.grid.theta) # Find optimal region min_error <- min(cv_errors) near_optimal <- which(cv_errors <= min_error * 1.05) # Within 5% of minimum # ====== Regularization path statistics ====== { cat("\n Error range: [", round(min(cv_errors), 4), ", ", round(max(cv_errors), 4), "]\n", sep = "") cat(" Models within 5% of minimum:", length(near_optimal), "\n") cat(" This suggests", ifelse(length(near_optimal) > 10, "stable", "sensitive"), "parameter selection\n") } ``` # Parallel Processing ## Setting Up Parallel CV Parallel processing is most beneficial for: * Large datasets (max(p, q) > 100) * Many folds (k > 5) * Dense lambda grids ```{r, eval = FALSE} library(parallel) # Detect available cores n_cores <- detectCores() cat("Available cores:", n_cores, "\n") # Create cluster, use 2 cores for illustration cl <- makeCluster(min(n_cores - 1, 2)) # Run parallel cross-validation cvfit_parallel <- cv.missoNet( X = sim$X, Y = sim$Z, kfold = 5, parallel = TRUE, cl = cl, verbose = 1 ) # Clean up stopCluster(cl) ``` # Advanced Parameter Tuning ## Adaptive Grid Search ```{r} # Standard grid search time_standard <- system.time({ fit_standard <- missoNet( X = sim$X, Y = sim$Z, adaptive.search = FALSE, verbose = 0 ) }) # Adaptive search (faster) time_adaptive <- system.time({ fit_adaptive <- missoNet( X = sim$X, Y = sim$Z, adaptive.search = TRUE, verbose = 0 ) }) # ====== Computation time comparison ====== { cat("\n Standard search:", round(time_standard[3], 2), "seconds\n") cat(" Adaptive search:", round(time_adaptive[3], 2), "seconds\n") cat(" Speedup:", round(time_standard[3] / time_adaptive[3], 1), "x\n") } ``` # Custom Penalty Factors ```{r} p <- ncol(sim$X) q <- ncol(sim$Z) # Scenario: Prior knowledge about important predictors # Group 1: Known important (predictors 1-10) - light penalty # Group 2: Possibly important (predictors 11-30) - moderate penalty # Group 3: Likely unimportant (predictors 31-60) - heavy penalty beta.pen.factor <- matrix(1, p, q) beta.pen.factor[1:10, ] <- 0.1 # Light penalty beta.pen.factor[11:20, ] <- 1.0 # Standard penalty beta.pen.factor[21:p, ] <- 10.0 # Heavy penalty # Network: Encourage block structure theta.pen.factor <- matrix(1, q, q) # Block 1: responses 1-6 theta.pen.factor[1:6, 1:6] <- 0.5 # Block 2: responses 7-12 theta.pen.factor[7:12, 7:12] <- 0.5 # Between blocks: heavy penalty theta.pen.factor[1:6, 7:12] <- 2.0 theta.pen.factor[7:12, 1:6] <- 2.0 # Fit with structured penalties fit_structured <- missoNet( X = sim$X, Y = sim$Z, beta.pen.factor = beta.pen.factor, theta.pen.factor = theta.pen.factor, verbose = 0 ) # Analyze selection pattern selected_groups <- c( Group1 = sum(rowSums(abs(fit_structured$est.min$Beta[1:10, ])) > 1e-8), Group2 = sum(rowSums(abs(fit_structured$est.min$Beta[11:20, ])) > 1e-8), Group3 = sum(rowSums(abs(fit_structured$est.min$Beta[21:p, ])) > 1e-8) ) # Penalty factors successfully guided selection toward prior knowledge # Selected predictors by group: print(selected_groups) ``` # Multi-Resolution Lambda Grids ```{r, eval = FALSE} # Create focused grid around expected optimal region # Based on preliminary analysis or domain knowledge # Log-scale grid with concentration create_focused_grid <- function(center, width, n_points) { log_center <- log10(center) log_seq <- seq(log_center - width/2, log_center + width/2, length.out = n_points) return(10^log_seq) } # Focused grids based on problem characteristics lambda.beta.focused <- create_focused_grid( center = 0.05, # Expected optimal region width = 1.5, # Log10 scale width n_points = 30 ) lambda.theta.focused <- create_focused_grid( center = 0.01, width = 1.0, n_points = 30 ) # Fit with focused grid fit_focused <- missoNet( X = sim$X, Y = sim$Z, lambda.beta = lambda.beta.focused, lambda.theta = lambda.theta.focused ) ``` # Convergence Diagnostics ## Monitoring Convergence ```{r} # Fit with detailed verbosity fit_verbose <- missoNet( X = sim$X, Y = sim$Z, lambda.beta = 0.05, lambda.theta = 0.01, verbose = 2 # Detailed output ) # Check convergence information if (!is.null(fit_verbose$est.min$converged)) { cat("\nConvergence Status:\n") cat(" Beta optimization:", ifelse(fit_verbose$est.min$converged$Beta, "✓ Converged", "✗ Not converged"), "\n") cat(" Theta optimization:", ifelse(fit_verbose$est.min$converged$Theta, "✓ Converged", "✗ Not converged"), "\n") } if (!is.null(fit_verbose$est.min$iterations)) { cat("\nIterations used:\n") cat(" Beta:", fit_verbose$est.min$iterations$Beta, "\n") cat(" Theta:", fit_verbose$est.min$iterations$Theta, "\n") } ``` ## Handling Convergence Issues Strategies for difficult problems: * Relax convergence tolerances * Increase maximum iterations * Use adaptive search for better initialization ```{r, eval = FALSE} # Simulate a difficult problem sim_difficult <- generateData( n = 100, p = 150, # p > n (high-dimensional) q = 20, rho = 0.25, # Higher missing rate missing.type = "MAR" ) # Strategy 1: Relax tolerances fit_relaxed <- missoNet( X = sim_difficult$X, Y = sim_difficult$Z, beta.tol = 1e-3, theta.tol = 1e-3, verbose = 0 ) # Strategy 2: Increase iterations fit_more_iter <- missoNet( X = sim_difficult$X, Y = sim_difficult$Z, beta.max.iter = 10000, theta.max.iter = 10000, verbose = 0 ) # Strategy 3: Use adaptive search for better initialization fit_adaptive_init <- missoNet( X = sim_difficult$X, Y = sim_difficult$Z, adaptive.search = TRUE, verbose = 0 ) ``` # Relaxed Network Estimation ## De-biased Precision Matrix ```{r} # Fit with relaxed network option fit_relax <- missoNet( X = sim$X, Y = sim$Z, relax.net = TRUE, # Refit network without penalty verbose = 0 ) # Compare penalized vs relaxed estimates if (!is.null(fit_relax$est.min$relax.net)) { Theta_pen <- fit_relax$est.min$Theta Theta_relax <- fit_relax$est.min$relax.net # Extract edges edges_pen <- Theta_pen[upper.tri(Theta_pen)] edges_relax <- Theta_relax[upper.tri(Theta_relax)] # Select non-zero edges from penalized model active_edges <- abs(edges_pen) > 1e-8 # === Relaxed network estimation === cat("\n Active edges:", sum(active_edges), "\n") cat(" Mean |edge| (penalized):", round(mean(abs(edges_pen[active_edges])), 4), "\n") cat(" Mean |edge| (relaxed):", round(mean(abs(edges_relax[active_edges])), 4), "\n") cat(" Relaxed estimates are typically larger (less shrinkage)\n") } ``` # Model Selection Strategies ## Nested Cross-Validation ```{r} # Nested CV for unbiased performance estimation # Outer loop: Performance evaluation # Inner loop: Parameter selection evaluate_nested_cv <- function(X, Y, outer_folds = 5, inner_folds = 5) { n <- nrow(X) outer_fold_ids <- rep(1:outer_folds, length.out = n) outer_fold_ids <- sample(outer_fold_ids) test_errors <- numeric(outer_folds) for (k in 1:outer_folds) { # Split data test_idx <- which(outer_fold_ids == k) train_idx <- setdiff(1:n, test_idx) # Inner CV for parameter selection cvfit_inner <- cv.missoNet( X = X[train_idx, ], Y = Y[train_idx, ], kfold = inner_folds, verbose = 0 ) # Predict on test fold y_pred <- predict(cvfit_inner, newx = X[test_idx, ]) y_true <- Y[test_idx, ] # Calculate error (ignoring missing values) mask <- !is.na(y_true) test_errors[k] <- mean((y_pred[mask] - y_true[mask])^2) } return(list( mean_error = mean(test_errors), se_error = sd(test_errors) / sqrt(outer_folds), fold_errors = test_errors )) } # Run nested CV nested_results <- evaluate_nested_cv( sim$X, sim$Z, outer_folds = 3, inner_folds = 3 ) { cat("\nNested Cross-Validation Results:\n") cat(" Mean test error:", round(nested_results$mean_error, 4), "\n") cat(" Standard error:", round(nested_results$se_error, 4), "\n") cat(" 95% CI: [", round(nested_results$mean_error - 1.96 * nested_results$se_error, 4), ", ", round(nested_results$mean_error + 1.96 * nested_results$se_error, 4), "]\n", sep = "") } ``` # Best Practices Summary ## Recommended Workflow ```{r, echo = FALSE, include = TRUE} { cat("=== Recommended missoNet Workflow ===\n\n") cat("1. Data Preparation:\n") cat(" - Check missing patterns\n") cat(" - Consider QC (usually recommended)\n") cat(" - Assess dimensionality (p vs n)\n\n") cat("2. Initial Exploration:\n") cat(" - Quick fit with adaptive.search = TRUE\n") cat(" - Examine selected lambda ranges\n") cat(" - Check convergence\n\n") cat("3. Cross-Validation:\n") cat(" - Use 5-10 folds depending on sample size\n") cat(" - Enable compute.1se = TRUE\n") cat(" - Consider parallel processing for large data\n\n") cat("4. Model Selection:\n") cat(" - Use eBIC for high-dimensional settings\n") cat(" - Consider 1-SE models for better generalization\n") cat(" - Validate on independent test set if available\n\n") cat("5. Diagnostics:\n") cat(" - Check convergence status\n") cat(" - Examine sparsity patterns\n") cat(" - Validate predictions\n") } ``` ## Performance Tips ```{r, echo = FALSE, include = TRUE} # Create performance recommendation table tips <- data.frame( Scenario = c( "Large n (>1000)", "Large p (>500)", "Large q (>50)", "High missing (>30%)", "Dense solutions expected" ), Recommendation = c( "Use parallel processing, adaptive search", "Reduce n.lambda.beta, use eBIC", "Reduce n.lambda.theta, consider diagonal penalty", "Increase tolerances, check MAR assumption", "Reduce lambda ranges, focus grid" ) ) print(tips, right = FALSE) ```