## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, echo = TRUE, comment = "#>", fig.align = "center") # Set to TRUE to regenerate long-running simulation results run_long <- FALSE library(maskedcauses) old_opts <- options(digits = 4) ## ----define-theta------------------------------------------------------------- theta <- c(0.7, 200, # component 1: DFR (electronics) 1.0, 150, # component 2: CFR (seals) 2.0, 100) # component 3: IFR (bearing) m <- length(theta) / 2 ## ----hazard-plot, fig.width=7, fig.height=4.5--------------------------------- model <- wei_series_md_c1_c2_c3( shapes = theta[seq(1, 6, 2)], scales = theta[seq(2, 6, 2)] ) h1 <- component_hazard(model, 1) h2 <- component_hazard(model, 2) h3 <- component_hazard(model, 3) t_grid <- seq(0.5, 120, length.out = 300) plot(t_grid, h1(t_grid, theta), type = "l", col = "steelblue", lwd = 2, ylim = c(0, 0.05), xlab = "Time", ylab = "Hazard rate h(t)", main = "Component hazard functions") lines(t_grid, h2(t_grid, theta), col = "forestgreen", lwd = 2, lty = 2) lines(t_grid, h3(t_grid, theta), col = "firebrick", lwd = 2, lty = 3) legend("topright", legend = c("Comp 1: DFR (k=0.7)", "Comp 2: CFR (k=1.0)", "Comp 3: IFR (k=2.0)"), col = c("steelblue", "forestgreen", "firebrick"), lty = 1:3, lwd = 2, cex = 0.85) grid() ## ----cause-prob-plot, fig.width=7, fig.height=4.5----------------------------- ccp_fn <- conditional_cause_probability(model) probs <- ccp_fn(t_grid, theta) plot(t_grid, probs[, 1], type = "l", col = "steelblue", lwd = 2, ylim = c(0, 1), xlab = "Time", ylab = "P(K = j | T = t)", main = "Conditional cause-of-failure probability") lines(t_grid, probs[, 2], col = "forestgreen", lwd = 2, lty = 2) lines(t_grid, probs[, 3], col = "firebrick", lwd = 2, lty = 3) legend("right", legend = c("Comp 1: DFR", "Comp 2: CFR", "Comp 3: IFR"), col = c("steelblue", "forestgreen", "firebrick"), lty = 1:3, lwd = 2, cex = 0.85) grid() ## ----integrand-demo----------------------------------------------------------- shapes <- theta[seq(1, 6, 2)] scales <- theta[seq(2, 6, 2)] cind <- c(TRUE, TRUE, FALSE) # candidate set = {1, 2} # Evaluate the integrand h_c(t) * S(t) at a few time points t_eval <- c(10, 50, 100) vals <- maskedcauses:::wei_series_integrand( t_eval, shapes, scales, cind ) names(vals) <- paste0("t=", t_eval) vals ## ----integrand-plot, fig.width=7, fig.height=4-------------------------------- t_fine <- seq(0.5, 150, length.out = 500) # Full candidate set: all components vals_full <- maskedcauses:::wei_series_integrand( t_fine, shapes, scales, cind = rep(TRUE, 3) ) plot(t_fine, vals_full, type = "l", col = "black", lwd = 2, xlab = "Time", ylab = expression(h[c](t) %.% S(t)), main = "Weibull series integrand (full candidate set)") grid() ## ----timing-comparison-------------------------------------------------------- set.seed(123) gen <- rdata(model) ll_fn <- loglik(model) # Exact + right only df_exact <- gen(theta, n = 100, tau = 120, p = 0.3, observe = observe_right_censor(tau = 120)) # Mixed: includes left and interval censoring df_mixed <- gen(theta, n = 100, p = 0.3, observe = observe_mixture( observe_right_censor(tau = 120), observe_left_censor(tau = 80), observe_periodic(delta = 20, tau = 120), weights = c(0.5, 0.25, 0.25) )) cat("Observation type counts (exact/right):\n") print(table(df_exact$omega)) cat("\nObservation type counts (mixed):\n") print(table(df_mixed$omega)) t_exact <- system.time(replicate(5, ll_fn(df_exact, theta)))["elapsed"] t_mixed <- system.time(replicate(5, ll_fn(df_mixed, theta)))["elapsed"] cat(sprintf("\nLoglik time (exact/right only): %.4f s per eval\n", t_exact / 5)) cat(sprintf("Loglik time (mixed censoring): %.4f s per eval\n", t_mixed / 5)) cat(sprintf("Slowdown factor: %.1fx\n", t_mixed / t_exact)) ## ----mle-generate------------------------------------------------------------- set.seed(7231) n_mle <- 300 # Right-censoring only (fast, closed-form likelihood) gen <- rdata(model) df <- gen(theta, n = n_mle, tau = 120, p = 0.3, observe = observe_right_censor(tau = 120)) cat("Observation types:\n") print(table(df$omega)) cat("\nFirst few rows:\n") print(head(df), row.names = FALSE) ## ----mle-fit, warning=FALSE--------------------------------------------------- solver <- fit(model) # Initial guess: all shapes = 1, scales = 100 theta0 <- rep(c(1, 100), m) estimate <- solver(df, par = theta0, method = "Nelder-Mead") print(estimate) ## ----mle-recovery------------------------------------------------------------- # Parameter recovery table par_names <- paste0( rep(c("k", "beta"), m), "_", rep(1:m, each = 2) ) recovery <- data.frame( Parameter = par_names, True = theta, Estimate = estimate$par, SE = sqrt(diag(estimate$vcov)), Rel_Error_Pct = 100 * (estimate$par - theta) / theta ) knitr::kable(recovery, digits = 3, caption = paste0("Parameter recovery (n = ", n_mle, ")"), col.names = c("Parameter", "True", "MLE", "SE", "Rel. Error %")) ## ----model-comparison-generate------------------------------------------------ set.seed(999) # Generate from the TRUE heterogeneous DGP df_comp <- gen(theta, n = 800, tau = 150, p = 0.2, observe = observe_right_censor(tau = 150)) cat("Observation types:\n") print(table(df_comp$omega)) ## ----model-comparison-fit, warning=FALSE-------------------------------------- # Fit heterogeneous model (6 parameters) model_het <- wei_series_md_c1_c2_c3() solver_het <- fit(model_het) fit_het <- solver_het(df_comp, par = rep(c(1, 120), m), method = "Nelder-Mead") # Fit homogeneous model (4 parameters) model_hom <- wei_series_homogeneous_md_c1_c2_c3() solver_hom <- fit(model_hom) theta0_hom <- c(1, 120, 120, 120) fit_hom <- solver_hom(df_comp, par = theta0_hom, method = "Nelder-Mead") cat("Heterogeneous model (2m = 6 params):\n") cat(" Log-likelihood:", fit_het$loglik, "\n") cat(" Parameters:", round(fit_het$par, 2), "\n\n") cat("Homogeneous model (m+1 = 4 params):\n") cat(" Log-likelihood:", fit_hom$loglik, "\n") cat(" Parameters:", round(fit_hom$par, 2), "\n") ## ----model-comparison-lrt----------------------------------------------------- # Likelihood ratio test # H0: k1 = k2 = k3 (homogeneous, 4 params) # H1: k1, k2, k3 free (heterogeneous, 6 params) LRT <- 2 * (fit_het$loglik - fit_hom$loglik) df_lrt <- 6 - 4 # difference in parameters p_value <- pchisq(LRT, df = df_lrt, lower.tail = FALSE) comparison <- data.frame( Model = c("Heterogeneous (2m)", "Homogeneous (m+1)"), Params = c(6, 4), LogLik = c(fit_het$loglik, fit_hom$loglik), AIC = c(-2 * fit_het$loglik + 2 * 6, -2 * fit_hom$loglik + 2 * 4) ) knitr::kable(comparison, digits = 2, caption = "Model comparison: heterogeneous vs homogeneous", col.names = c("Model", "# Params", "Log-Lik", "AIC")) cat(sprintf("\nLikelihood ratio statistic: %.2f (df = %d, p = %.4f)\n", LRT, df_lrt, p_value)) ## ----load-precomputed, include=FALSE, eval=!run_long-------------------------- results <- readRDS("precomputed_weibull.rds") ## ----mc-setup, eval=run_long-------------------------------------------------- # set.seed(42) # # theta_mc <- c(0.8, 150, 1.5, 120, 2.0, 100) # m_mc <- length(theta_mc) / 2 # n_mc <- 1000 # B <- 100 # p_mc <- 0.2 # tau_mc <- 200 # alpha <- 0.05 # # model_mc <- wei_series_md_c1_c2_c3() # gen_mc <- rdata(model_mc) # solver_mc <- fit(model_mc) # # # Storage # estimates <- matrix(NA, nrow = B, ncol = 2 * m_mc) # se_estimates <- matrix(NA, nrow = B, ncol = 2 * m_mc) # ci_lower <- matrix(NA, nrow = B, ncol = 2 * m_mc) # ci_upper <- matrix(NA, nrow = B, ncol = 2 * m_mc) # converged <- logical(B) # logliks <- numeric(B) ## ----mc-run, eval=run_long, cache=TRUE, warning=FALSE, message=FALSE---------- # theta0_mc <- rep(c(1, 130), m_mc) # # for (b in seq_len(B)) { # df_b <- gen_mc(theta_mc, n = n_mc, tau = tau_mc, p = p_mc, # observe = observe_right_censor(tau = tau_mc)) # # tryCatch({ # fit_b <- solver_mc(df_b, par = theta0_mc, # method = "L-BFGS-B", # lower = rep(1e-6, 2 * m_mc)) # estimates[b, ] <- fit_b$par # se_estimates[b, ] <- sqrt(diag(fit_b$vcov)) # logliks[b] <- fit_b$loglik # # z <- qnorm(1 - alpha / 2) # ci_lower[b, ] <- fit_b$par - z * se_estimates[b, ] # ci_upper[b, ] <- fit_b$par + z * se_estimates[b, ] # converged[b] <- fit_b$converged # }, error = function(e) { # converged[b] <<- FALSE # }) # # if (b %% 20 == 0) cat(sprintf("Replication %d/%d\n", b, B)) # } # # cat("Convergence rate:", mean(converged, na.rm = TRUE), "\n") ## ----mc-save, eval=run_long, include=FALSE------------------------------------ # results <- list( # estimates = estimates, se_estimates = se_estimates, # ci_lower = ci_lower, ci_upper = ci_upper, converged = converged, # logliks = logliks, B = B, alpha = alpha, # theta_mc = theta_mc, m_mc = m_mc, n_mc = n_mc, # p_mc = p_mc, tau_mc = tau_mc # ) # saveRDS(results, "precomputed_weibull.rds") ## ----mc-results--------------------------------------------------------------- theta_mc <- results$theta_mc m_mc <- results$m_mc alpha <- results$alpha valid <- results$converged & !is.na(results$estimates[, 1]) est_valid <- results$estimates[valid, , drop = FALSE] bias <- colMeans(est_valid) - theta_mc variance <- apply(est_valid, 2, var) mse <- bias^2 + variance rmse <- sqrt(mse) par_names_mc <- paste0( rep(c("k", "beta"), m_mc), "_", rep(1:m_mc, each = 2) ) results_df <- data.frame( Parameter = par_names_mc, True = theta_mc, Mean_Est = colMeans(est_valid), Bias = bias, Variance = variance, MSE = mse, RMSE = rmse, Rel_Bias_Pct = 100 * bias / theta_mc ) knitr::kable(results_df, digits = 4, caption = paste0("Monte Carlo results (n = ", results$n_mc, ", B = ", sum(valid), " converged)"), col.names = c("Parameter", "True", "Mean Est.", "Bias", "Variance", "MSE", "RMSE", "Rel. Bias %")) ## ----mc-coverage-------------------------------------------------------------- coverage <- numeric(2 * m_mc) for (j in seq_len(2 * m_mc)) { valid_j <- valid & !is.na(results$ci_lower[, j]) & !is.na(results$ci_upper[, j]) covered <- (results$ci_lower[valid_j, j] <= theta_mc[j]) & (theta_mc[j] <= results$ci_upper[valid_j, j]) coverage[j] <- mean(covered) } mean_width <- colMeans( results$ci_upper[valid, ] - results$ci_lower[valid, ], na.rm = TRUE ) coverage_df <- data.frame( Parameter = par_names_mc, True = theta_mc, Coverage = coverage, Nominal = 1 - alpha, Mean_Width = mean_width ) knitr::kable(coverage_df, digits = 4, caption = paste0(100 * (1 - alpha), "% CI coverage (nominal = ", 1 - alpha, ")"), col.names = c("Parameter", "True", "Coverage", "Nominal", "Mean Width")) ## ----mc-sampling-dist, fig.width=8, fig.height=6------------------------------ oldpar <- par(mfrow = c(2, 3), mar = c(4, 4, 2, 1)) on.exit(par(oldpar)) for (j in seq_len(2 * m_mc)) { hist(est_valid[, j], breaks = 20, probability = TRUE, main = par_names_mc[j], xlab = expression(hat(theta)), col = "lightblue", border = "white") abline(v = theta_mc[j], col = "red", lwd = 2, lty = 2) abline(v = mean(est_valid[, j]), col = "blue", lwd = 2) legend("topright", legend = c("True", "Mean"), col = c("red", "blue"), lty = c(2, 1), lwd = 2, cex = 0.7) } ## ----cleanup, include=FALSE--------------------------------------------------- options(old_opts)