## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, echo = TRUE, comment = "#>") # Set to TRUE to regenerate long-running simulation results run_long <- FALSE # maskedcauses re-exports generics from likelihood.model # (loglik, score, hess_loglik, fit, assumptions, fim) library(maskedcauses) # md_encode_matrix and md_boolean_matrix_to_charsets are provided by this package old_opts <- options(digits = 4) ## ----------------------------------------------------------------------------- theta <- c(1, # component 1 failure rate 1.1, # 3 0.95, # 5 1.15, # 6 1.1) # 7 m <- length(theta) ## ----------------------------------------------------------------------------- set.seed(7231) # set seed for reproducibility n <- 7500 comp_times <- matrix(nrow=n,ncol=m) for (j in 1:m) comp_times[,j] <- rexp(n,theta[j]) comp_times <- md_encode_matrix(comp_times,"t") head(comp_times, 4) ## ----right-censoring---------------------------------------------------------- q <- 0.25 tau <- rep(-(1/sum(theta))*log(q),n) data <- comp_times |> md_series_lifetime_right_censoring(tau) latent <- attr(data, "latent") head(data[, !colnames(data) %in% latent], 4) ## ----bernoulli-cand, warning=F, message=F------------------------------------- p <- .3 data <- data |> md_bernoulli_cand_c1_c2_c3(p) head(data[, paste0("q", 1:m)], 4) ## ----cand-sampler------------------------------------------------------------- data <- data |> md_cand_sampler() data$omega <- ifelse(data$delta, "exact", "right") display <- md_boolean_matrix_to_charsets(data, drop_set = TRUE) latent <- attr(display, "latent") head(display[, !colnames(display) %in% latent], 6) ## ----likelihood-model--------------------------------------------------------- model <- exp_series_md_c1_c2_c3() ## ----loglike-function--------------------------------------------------------- ll <- loglik(model) ## ----loglike-eval------------------------------------------------------------- ll(data, theta) ## ----score-function, message=FALSE, warning=FALSE----------------------------- grad <- score(model) ## ----score-eval--------------------------------------------------------------- grad(data, theta) ## ----fit-model, warning=FALSE------------------------------------------------- # Get the solver from the model using generic dispatch solver <- fit(model) # Solve for MLE with initial guess theta0 <- rep(1, m) estimate <- solver(data, par = theta0, method = "Nelder-Mead") ## ----mle-results-------------------------------------------------------------- # Print summary with confidence intervals print(estimate) ## ----mle-accessors------------------------------------------------------------ # Point estimate theta.hat <- estimate$par cat("MLE:", round(theta.hat, 4), "\n") # Standard errors (sqrt of diagonal of variance-covariance matrix) cat("SE:", round(sqrt(diag(estimate$vcov)), 4), "\n") # Log-likelihood at MLE cat("Log-likelihood:", round(estimate$loglik, 4), "\n") ## ----observe-functors--------------------------------------------------------- # Periodic inspection every 0.5 time units, study ends at tau = 5 obs_periodic <- observe_periodic(delta = 0.5, tau = 5) # Single inspection at tau = 3 obs_left <- observe_left_censor(tau = 3) # Mix of continuous and periodic monitoring obs_mixed <- observe_mixture( observe_right_censor(tau = 5), observe_left_censor(tau = 3), weights = c(0.7, 0.3) ) ## ----mixed-censoring-data----------------------------------------------------- gen <- rdata(model) # Periodic inspections set.seed(7231) df_periodic <- gen(theta, n = 500, p = 0.3, observe = observe_periodic(delta = 0.5, tau = 5)) cat("Periodic inspection observation types:\n") print(table(df_periodic$omega)) # Mixed monitoring set.seed(7231) df_mixed <- gen(theta, n = 500, p = 0.3, observe = observe_mixture( observe_right_censor(tau = 5), observe_left_censor(tau = 3), weights = c(0.7, 0.3) )) cat("\nMixed monitoring observation types:\n") print(table(df_mixed$omega)) ## ----mixed-censoring-eval----------------------------------------------------- ll_fn <- loglik(model) scr_fn <- score(model) hess_fn <- hess_loglik(model) # Evaluate at true parameters ll_val <- ll_fn(df_periodic, theta) scr_val <- scr_fn(df_periodic, theta) hess_val <- hess_fn(df_periodic, theta) cat("Log-likelihood (periodic):", round(ll_val, 4), "\n") cat("Score (periodic):", round(scr_val, 4), "\n") cat("Hessian eigenvalues:", round(eigen(hess_val)$values, 4), "\n") ## ----score-verify------------------------------------------------------------- scr_numerical <- numDeriv::grad( func = function(th) ll_fn(df_periodic, th), x = theta ) cat("Max |analytical - numerical| score:", formatC(max(abs(scr_val - scr_numerical)), format = "e", digits = 2), "\n") ## ----load-precomputed, include=FALSE, eval=!run_long-------------------------- # Load pre-computed simulation results when not re-running list2env(readRDS("precomputed_results.rds"), envir = environment()) ## ----monte-carlo-setup, cache=TRUE, eval=run_long----------------------------- # set.seed(7231) # # # Simulation parameters # B <- 200 # Number of Monte Carlo replications # alpha <- 0.05 # Significance level for CIs # # # Storage for results # estimates <- matrix(NA, nrow = B, ncol = m) # se_estimates <- matrix(NA, nrow = B, ncol = m) # ci_lower <- matrix(NA, nrow = B, ncol = m) # ci_upper <- matrix(NA, nrow = B, ncol = m) # converged <- logical(B) ## ----monte-carlo-run, cache=TRUE, warning=FALSE, eval=run_long---------------- # for (b in 1:B) { # # Generate component lifetimes # comp_times_b <- matrix(nrow = n, ncol = m) # for (j in 1:m) { # comp_times_b[, j] <- rexp(n, theta[j]) # } # comp_times_b <- md_encode_matrix(comp_times_b, "t") # # # Apply masking pipeline # data_b <- comp_times_b |> # md_series_lifetime_right_censoring(tau) |> # md_bernoulli_cand_c1_c2_c3(p) |> # md_cand_sampler() # data_b$omega <- ifelse(data_b$delta, "exact", "right") # # # Fit model # tryCatch({ # result_b <- solver(data_b, par = theta0, method = "Nelder-Mead") # estimates[b, ] <- result_b$par # se_estimates[b, ] <- sqrt(diag(result_b$vcov)) # # # Asymptotic Wald CIs # z <- qnorm(1 - alpha/2) # ci_lower[b, ] <- result_b$par - z * sqrt(diag(result_b$vcov)) # ci_upper[b, ] <- result_b$par + z * sqrt(diag(result_b$vcov)) # converged[b] <- result_b$converged # }, error = function(e) { # converged[b] <<- FALSE # }) # } # # cat("Convergence rate:", mean(converged, na.rm = TRUE), "\n") ## ----monte-carlo-results------------------------------------------------------ # Compute summary statistics (only for converged runs) valid <- converged & !is.na(estimates[, 1]) est_valid <- estimates[valid, , drop = FALSE] bias <- colMeans(est_valid) - theta variance <- apply(est_valid, 2, var) mse <- bias^2 + variance rmse <- sqrt(mse) # Create results table results_df <- data.frame( Component = 1:m, True = theta, Mean_Est = colMeans(est_valid), Bias = bias, Variance = variance, MSE = mse, RMSE = rmse, Rel_Bias_Pct = 100 * bias / theta ) knitr::kable(results_df, digits = 4, caption = "Monte Carlo Results: Bias, Variance, and MSE", col.names = c("Component", "True θ", "Mean θ̂", "Bias", "Variance", "MSE", "RMSE", "Rel. Bias %")) ## ----ci-coverage-------------------------------------------------------------- # Compute coverage for each component coverage <- numeric(m) for (j in 1:m) { valid_j <- valid & !is.na(ci_lower[, j]) & !is.na(ci_upper[, j]) covered <- (ci_lower[valid_j, j] <= theta[j]) & (theta[j] <= ci_upper[valid_j, j]) coverage[j] <- mean(covered) } # Mean CI width mean_width <- colMeans(ci_upper[valid, ] - ci_lower[valid, ], na.rm = TRUE) coverage_df <- data.frame( Component = 1:m, True = theta, Coverage = coverage, Nominal = 1 - alpha, Mean_Width = mean_width ) knitr::kable(coverage_df, digits = 4, caption = paste0("Coverage Probability of ", 100*(1-alpha), "% Confidence Intervals"), col.names = c("Component", "True θ", "Coverage", "Nominal", "Mean Width")) ## ----sampling-dist-plot, fig.width=8, fig.height=4---------------------------- # Plot sampling distributions oldpar <- par(mfrow = c(1, min(m, 5)), mar = c(4, 4, 2, 1)) on.exit(par(oldpar)) for (j in 1:min(m, 5)) { hist(est_valid[, j], breaks = 20, probability = TRUE, main = paste0("Component ", j), xlab = expression(hat(theta)[j]), col = "lightblue", border = "white") abline(v = theta[j], col = "red", lwd = 2, lty = 2) abline(v = mean(est_valid[, j]), col = "blue", lwd = 2) legend("topright", legend = c("True", "Mean Est."), col = c("red", "blue"), lty = c(2, 1), lwd = 2, cex = 0.7) } ## ----masking-sensitivity-setup, cache=TRUE, eval=run_long--------------------- # set.seed(7231) # # # Use smaller sample for sensitivity study # n_sens <- 500 # B_sens <- 100 # # # Masking probabilities to test # p_values <- seq(0, 0.5, by = 0.1) # # # Fixed right-censoring (25% censored) # q_sens <- 0.25 # tau_sens <- rep(-(1/sum(theta))*log(q_sens), n_sens) # # # Storage # mask_results <- list() ## ----masking-sensitivity-run, cache=TRUE, warning=FALSE, message=FALSE, eval=run_long---- # for (p_idx in seq_along(p_values)) { # p_curr <- p_values[p_idx] # est_p <- matrix(NA, nrow = B_sens, ncol = m) # # for (b in 1:B_sens) { # # Generate data # comp_b <- matrix(nrow = n_sens, ncol = m) # for (j in 1:m) comp_b[, j] <- rexp(n_sens, theta[j]) # comp_b <- md_encode_matrix(comp_b, "t") # # data_b <- comp_b |> # md_series_lifetime_right_censoring(tau_sens) |> # md_bernoulli_cand_c1_c2_c3(p_curr) |> # md_cand_sampler() # data_b$omega <- ifelse(data_b$delta, "exact", "right") # # tryCatch({ # fit_b <- solver(data_b, par = theta0, method = "Nelder-Mead") # if (fit_b$converged) est_p[b, ] <- fit_b$par # }, error = function(e) NULL) # } # # # Compute statistics # valid_p <- !is.na(est_p[, 1]) # mask_results[[p_idx]] <- list( # p = p_curr, # bias = colMeans(est_p[valid_p, , drop = FALSE]) - theta, # variance = apply(est_p[valid_p, , drop = FALSE], 2, var), # mse = (colMeans(est_p[valid_p, , drop = FALSE]) - theta)^2 + # apply(est_p[valid_p, , drop = FALSE], 2, var) # ) # } ## ----masking-sensitivity-plot, fig.width=8, fig.height=5---------------------- # Extract bias and MSE for plotting bias_mat <- sapply(mask_results, function(x) mean(abs(x$bias))) mse_mat <- sapply(mask_results, function(x) mean(x$mse)) oldpar <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) on.exit(par(oldpar)) # Mean absolute bias vs masking probability plot(p_values, bias_mat, type = "b", pch = 19, col = "blue", xlab = "Masking Probability (p)", ylab = "Mean Absolute Bias", main = "Bias vs Masking Probability") grid() # Mean MSE vs masking probability plot(p_values, mse_mat, type = "b", pch = 19, col = "red", xlab = "Masking Probability (p)", ylab = "Mean MSE", main = "MSE vs Masking Probability") grid() ## ----masking-sensitivity-table------------------------------------------------ mask_df <- data.frame( p = p_values, Mean_Abs_Bias = bias_mat, Mean_MSE = mse_mat, Mean_RMSE = sqrt(mse_mat) ) knitr::kable(mask_df, digits = 4, caption = "Effect of Masking Probability on Estimation Accuracy", col.names = c("Masking Prob.", "Mean |Bias|", "Mean MSE", "Mean RMSE")) ## ----censoring-sensitivity-setup, cache=TRUE, eval=run_long------------------- # set.seed(7231) # # # Censoring quantiles (proportion surviving past tau) # # q = 0.1 means 10% survive past tau (light censoring, ~10% censored) # # q = 0.9 means 90% survive past tau (heavy censoring, ~90% censored) # q_values <- seq(0.1, 0.9, by = 0.1) # Survival probabilities (matches simulation framework) # # # Fixed masking probability # p_cens <- 0.2 # # # Storage # cens_results <- list() ## ----censoring-sensitivity-run, cache=TRUE, warning=FALSE, message=FALSE, eval=run_long---- # for (q_idx in seq_along(q_values)) { # q_curr <- q_values[q_idx] # tau_curr <- rep(-(1/sum(theta))*log(q_curr), n_sens) # est_q <- matrix(NA, nrow = B_sens, ncol = m) # # for (b in 1:B_sens) { # # Generate data # comp_b <- matrix(nrow = n_sens, ncol = m) # for (j in 1:m) comp_b[, j] <- rexp(n_sens, theta[j]) # comp_b <- md_encode_matrix(comp_b, "t") # # data_b <- comp_b |> # md_series_lifetime_right_censoring(tau_curr) |> # md_bernoulli_cand_c1_c2_c3(p_cens) |> # md_cand_sampler() # data_b$omega <- ifelse(data_b$delta, "exact", "right") # # tryCatch({ # fit_b <- solver(data_b, par = theta0, method = "Nelder-Mead") # if (fit_b$converged) est_q[b, ] <- fit_b$par # }, error = function(e) NULL) # } # # # Compute statistics # valid_q <- !is.na(est_q[, 1]) # cens_rate <- mean(data_b$omega == "right") # Actual censoring rate # cens_results[[q_idx]] <- list( # q = q_curr, # cens_rate = cens_rate, # bias = colMeans(est_q[valid_q, , drop = FALSE]) - theta, # variance = apply(est_q[valid_q, , drop = FALSE], 2, var), # mse = (colMeans(est_q[valid_q, , drop = FALSE]) - theta)^2 + # apply(est_q[valid_q, , drop = FALSE], 2, var) # ) # } ## ----censoring-sensitivity-plot, fig.width=8, fig.height=5-------------------- # Extract statistics cens_rates <- sapply(cens_results, function(x) x$cens_rate) bias_cens <- sapply(cens_results, function(x) mean(abs(x$bias))) mse_cens <- sapply(cens_results, function(x) mean(x$mse)) oldpar <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) on.exit(par(oldpar)) # Mean absolute bias vs censoring rate plot(cens_rates * 100, bias_cens, type = "b", pch = 19, col = "blue", xlab = "Censoring Rate (%)", ylab = "Mean Absolute Bias", main = "Bias vs Censoring Rate") grid() # Mean MSE vs censoring rate plot(cens_rates * 100, mse_cens, type = "b", pch = 19, col = "red", xlab = "Censoring Rate (%)", ylab = "Mean MSE", main = "MSE vs Censoring Rate") grid() ## ----censoring-sensitivity-table---------------------------------------------- cens_df <- data.frame( Cens_Rate_Pct = round(cens_rates * 100, 1), Mean_Abs_Bias = bias_cens, Mean_MSE = mse_cens, Mean_RMSE = sqrt(mse_cens) ) knitr::kable(cens_df, digits = 4, caption = "Effect of Right-Censoring Rate on Estimation Accuracy", col.names = c("Censoring %", "Mean |Bias|", "Mean MSE", "Mean RMSE")) ## ----save-precomputed, include=FALSE, eval=run_long--------------------------- # # Save simulation results for future fast vignette builds # saveRDS(list( # estimates = estimates, se_estimates = se_estimates, # ci_lower = ci_lower, ci_upper = ci_upper, converged = converged, # B = B, alpha = alpha, # mask_results = mask_results, p_values = p_values, # cens_results = cens_results, q_values = q_values, # theta = theta, m = m, n_sens = n_sens, B_sens = B_sens, p_cens = p_cens # ), "precomputed_results.rds") ## ----cleanup, include=FALSE--------------------------------------------------- options(old_opts)