## ----setup, include = FALSE----------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center", out.width = "90%", dpi = 90, message = FALSE, warning = FALSE ) options(width = 100) ## ----eval = FALSE--------------------------------------------------------------------------------- # # Install from CRAN (when available) # install.packages("missoNet") # # # Or install development version from GitHub # devtools::install_github("yixiao-zeng/missoNet") ## ------------------------------------------------------------------------------------------------- # Load the package library(missoNet) ## ------------------------------------------------------------------------------------------------- # Generate synthetic data sim <- generateData( n = 200, # Sample size p = 50, # Number of predictors q = 10, # Number of responses rho = 0.1, # Missing rate (10%) missing.type = "MCAR" # Missing completely at random ) # Examine the data structure str(sim, max.level = 1) ## ----echo = TRUE, include = TRUE------------------------------------------------------------------ # Check dimensions cat("Predictors (X):", dim(sim$X), "\n") cat("Complete responses (Y):", dim(sim$Y), "\n") cat("Observed responses (Z):", dim(sim$Z), "\n") cat("Missing rate:", sprintf("%.1f%%", mean(is.na(sim$Z)) * 100), "\n") ## ------------------------------------------------------------------------------------------------- # Fit missoNet with automatic parameter selection fit <- missoNet( X = sim$X, Y = sim$Z, # Use observed responses with missing values GoF = "BIC" # Goodness-of-fit criterion ) # Extract optimal estimates Beta.hat <- fit$est.min$Beta Theta.hat <- fit$est.min$Theta mu.hat <- fit$est.min$mu ## ----echo = TRUE, include = TRUE------------------------------------------------------------------ # Model summary cat("Selected lambda.beta:", fit$est.min$lambda.beta, "\n") cat("Selected lambda.theta:", fit$est.min$lambda.theta, "\n") cat("Active predictors:", sum(rowSums(abs(Beta.hat)) > 1e-8), "/", nrow(Beta.hat), "\n") cat("Network edges:", sum(abs(Theta.hat[upper.tri(Theta.hat)]) > 1e-8), "/", ncol(Theta.hat) * (ncol(Theta.hat)-1) / 2, "\n") ## ------------------------------------------------------------------------------------------------- # Visualize the regularization path plot(fit, type = "heatmap") ## ------------------------------------------------------------------------------------------------- # Visualize the regularization path in a scatter plot plot(fit, type = "scatter") ## ------------------------------------------------------------------------------------------------- # Split data for demonstration train_idx <- 1:150 test_idx <- 151:200 # Refit on training data fit_train <- missoNet( X = sim$X[train_idx, ], Y = sim$Z[train_idx, ], GoF = "BIC", verbose = 0 # Suppress output ) # Predict on test data Y_pred <- predict(fit_train, newx = sim$X[test_idx, ]) # Evaluate predictions (using complete data for comparison) mse <- mean((Y_pred - sim$Y[test_idx, ])^2) cat("Test set MSE:", round(mse, 4), "\n") ## ------------------------------------------------------------------------------------------------- # Generate data with different missing mechanisms n <- 300; p <- 30; q <- 8; rho <- 0.15 sim_mcar <- generateData(n, p, q, rho, missing.type = "MCAR") sim_mar <- generateData(n, p, q, rho, missing.type = "MAR") sim_mnar <- generateData(n, p, q, rho, missing.type = "MNAR") # Visualize missing patterns par(mfrow = c(1, 3), mar = c(4, 4, 3, 1)) # MCAR pattern image(1:q, 1:n, t(is.na(sim_mcar$Z)), col = c("white", "darkred"), xlab = "Response", ylab = "Observation", main = "MCAR: Random Pattern") # MAR pattern image(1:q, 1:n, t(is.na(sim_mar$Z)), col = c("white", "darkred"), xlab = "Response", ylab = "Observation", main = "MAR: Depends on X") # MNAR pattern image(1:q, 1:n, t(is.na(sim_mnar$Z)), col = c("white", "darkred"), xlab = "Response", ylab = "Observation", main = "MNAR: Depends on Y") ## ------------------------------------------------------------------------------------------------- # Fit with different criteria criteria <- c("AIC", "BIC", "eBIC") results <- list() for (crit in criteria) { results[[crit]] <- missoNet( X = sim$X, Y = sim$Z, GoF = crit, verbose = 0 ) } # Compare selected models comparison <- data.frame( Criterion = criteria, Lambda.Beta = sapply(results, function(x) x$est.min$lambda.beta), Lambda.Theta = sapply(results, function(x) x$est.min$lambda.theta), Active.Predictors = sapply(results, function(x) sum(rowSums(abs(x$est.min$Beta)) > 1e-8)), Network.Edges = sapply(results, function(x) sum(abs(x$est.min$Theta[upper.tri(x$est.min$Theta)]) > 1e-8)), GoF.Value = sapply(results, function(x) x$est.min$gof) ) print(comparison, digits = 4) ## ------------------------------------------------------------------------------------------------- # Define custom regularization paths lambda.beta <- 10^seq(0, -2, length.out = 15) lambda.theta <- 10^seq(0, -2, length.out = 15) # Fit with custom grid fit_custom <- missoNet( X = sim$X, Y = sim$Z, lambda.beta = lambda.beta, lambda.theta = lambda.theta, verbose = 0 ) ## ----echo = TRUE, include = TRUE------------------------------------------------------------------ # Grid coverage summary cat(" Beta range: [", sprintf("%.4f", min(fit_custom$param_set$gof.grid.beta)), ", ", sprintf("%.4f", max(fit_custom$param_set$gof.grid.beta)), "]\n", sep = "") cat(" Theta range: [", sprintf("%.4f", min(fit_custom$param_set$gof.grid.theta)), ", ", sprintf("%.4f", max(fit_custom$param_set$gof.grid.theta)), "]\n", sep = "") cat(" Total models evaluated:", length(fit_custom$param_set$gof), "\n") ## ------------------------------------------------------------------------------------------------- # Create data with variable missing rates across responses n <- 300; p <- 30; q <- 8; rho <- 0.15 rho_vec <- seq(0.05, 0.30, length.out = q) sim_var <- generateData( n = 300, p = 30, q = 8, rho = rho_vec, # Different missing rate for each response missing.type = "MAR" ) # Examine missing patterns miss_summary <- data.frame( Response = paste0("Y", 1:q), Target = rho_vec, Actual = colMeans(is.na(sim_var$Z)) ) print(miss_summary, digits = 3) # Fit model accounting for variable missingness fit_var <- missoNet( X = sim_var$X, Y = sim_var$Z, adaptive.search = TRUE, # Fast adaptive search verbose = 0 ) # Visualize plot(fit_var) ## ----eval = FALSE--------------------------------------------------------------------------------- # # Use penalty factors to incorporate prior information # p <- ncol(sim$X) # q <- ncol(sim$Z) # # # Example: We know predictors 1-10 are important # beta.pen.factor <- matrix(1, p, q) # beta.pen.factor[1:10, ] <- 0.1 # Lighter penalty for known important predictors # # # Example: We expect certain response pairs to be connected # theta.pen.factor <- matrix(1, q, q) # theta.pen.factor[1, 2] <- theta.pen.factor[2, 1] <- 0.1 # theta.pen.factor[3, 4] <- theta.pen.factor[4, 3] <- 0.1 # # # Fit with prior information # fit_prior <- missoNet( # X = sim$X, # Y = sim$Z, # beta.pen.factor = beta.pen.factor, # theta.pen.factor = theta.pen.factor # ) ## ----eval = FALSE--------------------------------------------------------------------------------- # # Standardization is recommended (default: TRUE) # # for numerical stability and comparable penalties # fit_std <- missoNet(X = sim$X, Y = sim$Z, # standardize = TRUE, # standardize.response = TRUE) # # # Without standardization (for pre-scaled data) # fit_no_std <- missoNet(X = scale(sim$X), Y = scale(sim$Z), # standardize = FALSE, # standardize.response = FALSE) # ## ----eval = FALSE--------------------------------------------------------------------------------- # # Adjust convergence settings based on problem difficulty and time constraints # fit_tight <- missoNet( # X = sim$X, # Y = sim$Z, # beta.tol = 1e-6, # Tighter tolerance # theta.tol = 1e-6, # beta.max.iter = 10000, # More iterations allowed # theta.max.iter = 10000 # ) # # # For quick exploration, use looser settings # fit_quick <- missoNet( # X = sim$X, # Y = sim$Z, # beta.tol = 1e-3, # Looser tolerance # theta.tol = 1e-3, # beta.max.iter = 1000, # Fewer iterations # theta.max.iter = 1000, # adaptive.search = TRUE # Fast adaptive search # )