## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, echo = TRUE, comment = "#>", fig.align = "center") # Set to TRUE to regenerate long-running simulation results run_long <- FALSE library(maskedcauses) old_opts <- options(digits = 4) ## ----hom-setup---------------------------------------------------------------- m <- 5 theta_hom <- c(k = 1.5, beta1 = 300, beta2 = 400, beta3 = 500, beta4 = 600, beta5 = 700) model_hom <- wei_series_homogeneous_md_c1_c2_c3() # Right-censoring time at the 80th percentile of the system lifetime beta_sys <- wei_series_system_scale(theta_hom[1], theta_hom[-1]) tau <- qweibull(0.8, shape = theta_hom[1], scale = beta_sys) cat("System scale:", round(beta_sys, 1), "\n") cat("Right-censoring time (q = 0.8):", round(tau, 1), "\n") ## ----hom-generate------------------------------------------------------------- set.seed(2024) gen_hom <- rdata(model_hom) df <- gen_hom(theta_hom, n = 300, p = 0.2, observe = observe_right_censor(tau = tau)) cat("Observation types:\n") print(table(df$omega)) ## ----hom-fit-all, warning=FALSE----------------------------------------------- # Exponential (m params) model_exp <- exp_series_md_c1_c2_c3() fit_e <- fit(model_exp)(df, par = rep(0.002, m), method = "Nelder-Mead") # Homogeneous Weibull (m+1 params) fit_h <- fit(model_hom)(df, par = c(1, rep(500, m)), method = "Nelder-Mead", control = list(maxit = 5000)) # Heterogeneous Weibull (2m params) — warm-start from hom fit model_het <- wei_series_md_c1_c2_c3() het_start <- as.numeric(rbind(rep(fit_h$par[1], m), fit_h$par[-1])) fit_w <- fit(model_het)(df, par = het_start, method = "Nelder-Mead", control = list(maxit = 10000)) ## ----hom-lrt------------------------------------------------------------------ np_exp <- m np_hom <- m + 1 np_het <- 2 * m n_obs <- nrow(df) # LRT: heterogeneous vs homogeneous LRT_shape <- -2 * (fit_h$loglik - fit_w$loglik) df_shape <- np_het - np_hom # m - 1 = 4 p_shape <- pchisq(LRT_shape, df = df_shape, lower.tail = FALSE) # LRT: homogeneous vs exponential LRT_aging <- -2 * (fit_e$loglik - fit_h$loglik) df_aging <- np_hom - np_exp # 1 p_aging <- pchisq(LRT_aging, df = df_aging, lower.tail = FALSE) # AIC and BIC aic_val <- function(ll, k) -2 * ll + 2 * k bic_val <- function(ll, k, n) -2 * ll + log(n) * k comparison <- data.frame( Model = c("Exponential", "Hom. Weibull", "Het. Weibull"), Params = c(np_exp, np_hom, np_het), LogLik = c(fit_e$loglik, fit_h$loglik, fit_w$loglik), AIC = c(aic_val(fit_e$loglik, np_exp), aic_val(fit_h$loglik, np_hom), aic_val(fit_w$loglik, np_het)), BIC = c(bic_val(fit_e$loglik, np_exp, n_obs), bic_val(fit_h$loglik, np_hom, n_obs), bic_val(fit_w$loglik, np_het, n_obs)) ) knitr::kable(comparison, digits = 2, caption = "Model comparison (true model: homogeneous Weibull)", col.names = c("Model", "# Params", "Log-Lik", "AIC", "BIC")) ## ----hom-lrt-results---------------------------------------------------------- cat("Step 1: Het vs Hom (shape heterogeneity)\n") cat(sprintf(" LRT = %.2f, df = %d, p = %.4f\n", LRT_shape, df_shape, p_shape)) if (p_shape < 0.05) { cat(" Decision: Reject homogeneous -> adopt heterogeneous model\n\n") } else { cat(" Decision: Fail to reject -> retain homogeneous model\n\n") } cat("Step 2: Hom vs Exp (aging)\n") cat(sprintf(" LRT = %.2f, df = %d, p = %.4f\n", LRT_aging, df_aging, p_aging)) if (p_aging < 0.05) { cat(" Decision: Reject exponential -> adopt homogeneous model\n") } else { cat(" Decision: Fail to reject -> retain exponential model\n") } ## ----het-setup---------------------------------------------------------------- # Shapes span DFR to strong IFR theta_het_true <- c(0.8, 300, # DFR (electronics) 1.0, 400, # CFR (random) 1.5, 500, # mild IFR (seals) 2.0, 600, # moderate IFR (bearings) 2.5, 700) # strong IFR (wear-out) model_het <- wei_series_md_c1_c2_c3() gen_het <- rdata(model_het) set.seed(7231) df_het <- gen_het(theta_het_true, n = 300, p = 0.2, observe = observe_right_censor(tau = tau)) cat("Observation types:\n") print(table(df_het$omega)) ## ----het-fit-all, warning=FALSE----------------------------------------------- fit_e2 <- fit(model_exp)(df_het, par = rep(0.002, m), method = "Nelder-Mead") fit_h2 <- fit(model_hom)(df_het, par = c(1, rep(500, m)), method = "Nelder-Mead", control = list(maxit = 5000)) het_start2 <- as.numeric(rbind(rep(fit_h2$par[1], m), fit_h2$par[-1])) fit_w2 <- fit(model_het)(df_het, par = het_start2, method = "Nelder-Mead", control = list(maxit = 10000)) ## ----het-lrt------------------------------------------------------------------ # LRT: heterogeneous vs homogeneous LRT_shape2 <- -2 * (fit_h2$loglik - fit_w2$loglik) p_shape2 <- pchisq(LRT_shape2, df = m - 1, lower.tail = FALSE) # LRT: homogeneous vs exponential LRT_aging2 <- -2 * (fit_e2$loglik - fit_h2$loglik) p_aging2 <- pchisq(LRT_aging2, df = 1, lower.tail = FALSE) comparison2 <- data.frame( Model = c("Exponential", "Hom. Weibull", "Het. Weibull"), Params = c(np_exp, np_hom, np_het), LogLik = c(fit_e2$loglik, fit_h2$loglik, fit_w2$loglik), AIC = c(aic_val(fit_e2$loglik, np_exp), aic_val(fit_h2$loglik, np_hom), aic_val(fit_w2$loglik, np_het)), BIC = c(bic_val(fit_e2$loglik, np_exp, nrow(df_het)), bic_val(fit_h2$loglik, np_hom, nrow(df_het)), bic_val(fit_w2$loglik, np_het, nrow(df_het))) ) knitr::kable(comparison2, digits = 2, caption = "Model comparison (true model: heterogeneous Weibull)", col.names = c("Model", "# Params", "Log-Lik", "AIC", "BIC")) cat(sprintf("\nStep 1: Het vs Hom — LRT = %.2f, df = %d, p = %.4f\n", LRT_shape2, m - 1, p_shape2)) cat(sprintf("Step 2: Hom vs Exp — LRT = %.2f, df = %d, p = %.4f\n", LRT_aging2, 1, p_aging2)) ## ----load-precomputed-mc, include=FALSE, eval=!run_long----------------------- mc_data <- readRDS("precomputed_model_selection.rds") ## ----mc-setup, eval=run_long-------------------------------------------------- # set.seed(42) # # m <- 5 # base_k <- 1.5 # base_scales <- c(300, 400, 500, 600, 700) # cv_levels <- c(0, 0.05, 0.10, 0.20, 0.30, 0.50) # n_reps <- 200 # n_obs <- 500 # p_mask <- 0.2 # alpha <- 0.05 # # # Fixed offsets: mean 0, sd 1 # offsets <- c(-2, -1, 0, 1, 2) # offsets <- offsets / sd(offsets) # # # Right-censoring time (based on homogeneous baseline at CV = 0) # beta_sys <- wei_series_system_scale(base_k, base_scales) # tau <- qweibull(0.8, shape = base_k, scale = beta_sys) # # # Models and solvers # model_exp <- exp_series_md_c1_c2_c3() # model_hom <- wei_series_homogeneous_md_c1_c2_c3() # model_het <- wei_series_md_c1_c2_c3() # # solver_exp <- fit(model_exp) # solver_hom <- fit(model_hom) # solver_het <- fit(model_het) # # gen_het <- rdata(model_het) # # np_exp <- m # np_hom <- m + 1 # np_het <- 2 * m ## ----mc-run, eval=run_long, cache=TRUE, warning=FALSE, message=FALSE---------- # results_list <- vector("list", length(cv_levels) * n_reps) # idx <- 1 # # for (cv in cv_levels) { # shapes <- base_k * (1 + cv * offsets) # theta <- as.numeric(rbind(shapes, base_scales)) # # for (r in seq_len(n_reps)) { # df_r <- gen_het(theta, n = n_obs, p = p_mask, # observe = observe_right_censor(tau = tau)) # # row <- list(cv = cv, rep = r, # reject_het_vs_hom = NA, reject_hom_vs_exp = NA, # ll_exp = NA, ll_hom = NA, ll_het = NA, # aic_exp = NA, aic_hom = NA, aic_het = NA, # bic_exp = NA, bic_hom = NA, bic_het = NA, # converged_all = FALSE) # # tryCatch({ # fe <- solver_exp(df_r, par = rep(0.002, m), method = "Nelder-Mead") # # # Two-phase for hom: Nelder-Mead warm-up -> L-BFGS-B refinement # fh_nm <- solver_hom(df_r, par = c(1, rep(500, m)), # method = "Nelder-Mead", # control = list(maxit = 5000)) # fh <- solver_hom(df_r, par = fh_nm$par, # method = "L-BFGS-B", lower = rep(1e-6, np_hom)) # # # Two-phase for het: warm-start from hom, Nelder-Mead -> L-BFGS-B # het_start <- as.numeric(rbind(rep(fh$par[1], m), fh$par[-1])) # fw_nm <- solver_het(df_r, par = het_start, # method = "Nelder-Mead", # control = list(maxit = 10000)) # fw <- solver_het(df_r, par = fw_nm$par, # method = "L-BFGS-B", lower = rep(1e-6, np_het)) # # if (fe$converged && fh$converged && fw$converged) { # row$ll_exp <- fe$loglik # row$ll_hom <- fh$loglik # row$ll_het <- fw$loglik # # row$aic_exp <- -2 * fe$loglik + 2 * np_exp # row$aic_hom <- -2 * fh$loglik + 2 * np_hom # row$aic_het <- -2 * fw$loglik + 2 * np_het # # row$bic_exp <- -2 * fe$loglik + log(n_obs) * np_exp # row$bic_hom <- -2 * fh$loglik + log(n_obs) * np_hom # row$bic_het <- -2 * fw$loglik + log(n_obs) * np_het # # LRT_het <- -2 * (fh$loglik - fw$loglik) # LRT_hom <- -2 * (fe$loglik - fh$loglik) # # row$reject_het_vs_hom <- pchisq(LRT_het, df = m - 1, # lower.tail = FALSE) < alpha # row$reject_hom_vs_exp <- pchisq(LRT_hom, df = 1, # lower.tail = FALSE) < alpha # row$converged_all <- TRUE # } # }, error = function(e) NULL) # # results_list[[idx]] <- row # idx <- idx + 1 # } # # cat(sprintf("CV = %.0f%% complete\n", cv * 100)) # } # # power_study <- do.call(rbind, lapply(results_list, as.data.frame)) ## ----mc-save, eval=run_long, include=FALSE------------------------------------ # mc_data <- list( # power_study = power_study, # config = list(m = m, n = n_obs, p = p_mask, tau = tau, # n_reps = n_reps, alpha = alpha, # base_k = base_k, base_scales = base_scales, # offsets = offsets) # ) # saveRDS(mc_data, "precomputed_model_selection.rds") ## ----mc-rejection-rates------------------------------------------------------- ps <- mc_data$power_study ps_conv <- ps[ps$converged_all, ] rej_summary <- do.call(rbind, lapply(split(ps_conv, ps_conv$cv), function(d) { data.frame( CV_pct = d$cv[1] * 100, n_converged = nrow(d), reject_het_vs_hom = mean(d$reject_het_vs_hom), reject_hom_vs_exp = mean(d$reject_hom_vs_exp) ) })) knitr::kable(rej_summary, digits = 3, row.names = FALSE, caption = sprintf( "Rejection rates at alpha = %.2f (n = %d, m = %d)", mc_data$config$alpha, mc_data$config$n, mc_data$config$m), col.names = c("Shape CV (%)", "Converged", "Reject Hom (Het vs Hom)", "Reject Exp (Hom vs Exp)")) ## ----mc-power-curve, fig.width=7, fig.height=5-------------------------------- plot(rej_summary$CV_pct, rej_summary$reject_het_vs_hom, type = "b", pch = 19, col = "steelblue", lwd = 2, xlab = "Coefficient of Variation of Shapes (%)", ylab = "Rejection Rate", ylim = c(0, 1), main = "Power of the LRT Cascade") lines(rej_summary$CV_pct, rej_summary$reject_hom_vs_exp, type = "b", pch = 17, col = "firebrick", lwd = 2, lty = 2) abline(h = mc_data$config$alpha, lty = 3, col = "gray50") legend("right", c("Het vs Hom (shape heterogeneity)", "Hom vs Exp (aging)", expression(alpha == 0.05)), col = c("steelblue", "firebrick", "gray50"), lty = c(1, 2, 3), pch = c(19, 17, NA), lwd = 2, cex = 0.85) grid() ## ----mc-aic-accuracy---------------------------------------------------------- aic_correct <- do.call(rbind, lapply(split(ps_conv, ps_conv$cv), function(d) { cv <- d$cv[1] aic_choice <- ifelse(d$aic_het < d$aic_hom & d$aic_het < d$aic_exp, "het", ifelse(d$aic_hom < d$aic_exp, "hom", "exp")) bic_choice <- ifelse(d$bic_het < d$bic_hom & d$bic_het < d$bic_exp, "het", ifelse(d$bic_hom < d$bic_exp, "hom", "exp")) correct_model <- if (cv == 0) "hom" else "het" data.frame( CV_pct = cv * 100, AIC_correct = mean(aic_choice == correct_model), BIC_correct = mean(bic_choice == correct_model) ) })) knitr::kable(aic_correct, digits = 3, row.names = FALSE, caption = "Proportion selecting the correct model", col.names = c("Shape CV (%)", "AIC Correct", "BIC Correct")) ## ----cleanup, include=FALSE--------------------------------------------------- options(old_opts)