## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, comment = NA, collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, warning = FALSE, message = FALSE, dev = 'png', fig.align = 'center', dpi = 96, out.width = "95%", results = "hide" ) ## ----setup, message=FALSE, warning=FALSE-------------------------------------- library(Rtwalk) library(mvtnorm) library(coda) ## ----analysis-functions------------------------------------------------------- calculate_diagnostics <- function(samples, burnin_frac = 0.2, param_names = NULL, title = "") { n_iter <- nrow(samples) n_burnin <- floor(burnin_frac * n_iter) post_burnin_samples <- samples[(n_burnin + 1):n_iter, , drop = FALSE] n_param <- ncol(samples) if (is.null(param_names)) { param_names <- paste0("theta", 1:n_param) } means <- apply(post_burnin_samples, 2, mean) sds <- apply(post_burnin_samples, 2, sd) quantiles <- apply(post_burnin_samples, 2, quantile, probs = c(0.025, 0.5, 0.975)) chain <- coda::as.mcmc(post_burnin_samples) ess <- coda::effectiveSize(chain) results_table <- data.frame( Parameter = param_names, Mean = round(means, 4), SD = round(sds, 4), Q2.5 = round(quantiles["2.5%", ], 4), Median = round(quantiles["50%", ], 4), Q97.5 = round(quantiles["97.5%", ], 4), ESS = round(ess, 0) ) cat("\nCONVERGENCE DIAGNOSTICS -", title, "\n") print(results_table, row.names = FALSE) return(invisible(results_table)) } visualize_results <- function(samples, true_values = NULL, title = "Results", burnin_frac = 0.2, true_covariance = NULL, show_acf = TRUE) { n_total <- nrow(samples) start_index <- ceiling(n_total * burnin_frac) + 1 analysis_samples <- samples[start_index:n_total, , drop = FALSE] n_dim <- ncol(analysis_samples) oldpar <- par(no.readonly = TRUE) on.exit(par(oldpar)) colors <- c("#2E86AB", "#A23B72", "#F18F01", "#C73E1D", "#7209B7", "#2F9599") bg_color <- "#FAFAFA" if (n_dim == 1) { layout(matrix(c(1, 2, 3, 3), 2, 2, byrow = TRUE)) par(mar = c(4, 4, 3, 1), bg = bg_color, col.axis = "gray30", col.lab = "gray30", col.main = "gray20") hist(analysis_samples[,1], main = paste(title, "- Distribution"), xlab = bquote(theta[1]), freq = FALSE, col = colors[1], border = "white", breaks = 30, las = 1) plot(samples[start_index:n_total, 1], type = 'l', col = colors[1], lwd = 0.8, main = paste(title, "- Trace"), xlab = "Iteration", ylab = bquote(theta[1]), las = 1) if (show_acf) { acf(analysis_samples[,1], main = paste(title, "- Autocorrelation"), col = colors[2], lwd = 2, las = 1) } } else if (n_dim == 2) { layout(matrix(c(1, 2, 3, 4), 2, 2, byrow = TRUE)) par(mar = c(4, 4, 3, 1), bg = bg_color, col.axis = "gray30", col.lab = "gray30", col.main = "gray20") plot(analysis_samples, pch = ".", col = adjustcolor(colors[1], alpha = 0.6), main = title, xlab = bquote(theta[1]), ylab = bquote(theta[2]), las = 1, cex = 1.2) if (!is.null(true_covariance) && requireNamespace("ellipse", quietly = TRUE)) { lines(ellipse::ellipse(true_covariance, centre = true_values), col = colors[2], lwd = 2) } plot(samples[start_index:n_total, 1], type = 'l', col = colors[1], lwd = 0.8, main = bquote(paste("Trace ", theta[1])), xlab = "Iteration", ylab = bquote(theta[1]), las = 1) plot(samples[start_index:n_total, 2], type = 'l', col = colors[2], lwd = 0.8, main = bquote(paste("Trace ", theta[2])), xlab = "Iteration", ylab = bquote(theta[2]), las = 1) dens1 <- density(analysis_samples[,1]) plot(dens1, main = bquote(paste("Marginal Density ", theta[1])), xlab = bquote(theta[1]), col = colors[1], lwd = 3, las = 1, zero.line = FALSE) polygon(dens1, col = adjustcolor(colors[1], alpha = 0.3), border = NA) } else { n_plots <- min(6, n_dim) if (n_plots <= 4) { layout(matrix(1:4, 2, 2, byrow = TRUE)) } else { layout(matrix(1:6, 3, 2, byrow = TRUE)) } par(mar = c(4, 4, 3, 1), bg = bg_color, col.axis = "gray30", col.lab = "gray30", col.main = "gray20") for (i in 1:n_plots) { if (i <= 3) { plot(samples[start_index:n_total, i], type = 'l', col = colors[i], lwd = 0.8, main = bquote(paste("Trace ", theta[.(i)])), xlab = "Iteration", ylab = bquote(theta[.(i)]), las = 1) } else { dens <- density(analysis_samples[,i]) plot(dens, main = bquote(paste("Density ", theta[.(i)])), xlab = bquote(theta[.(i)]), col = colors[i], lwd = 3, las = 1, zero.line = FALSE) polygon(dens, col = adjustcolor(colors[i], alpha = 0.3), border = NA) } } if (n_dim > 6) { cat("\nNote: Only showing first 6 dimensions. Total dimensions:", n_dim, "\n") } } } ## ----eval=TRUE, cache=FALSE--------------------------------------------------- N_ITER_VIGNETTE <- 5000 BURN_FRAC <- 0.2 # --- TEST 1: Standard Univariate Normal --- log_posterior_1 <- function(x) dnorm(x, log = TRUE) result_1 <- twalk(log_posterior_1, n_iter = N_ITER_VIGNETTE, x0 = -2, xp0 = 2) calculate_diagnostics(result_1$all_samples, BURN_FRAC, "theta", "Standard Normal") visualize_results(result_1$all_samples, 0, "Test 1: Standard Normal") # --- TEST 2: Correlated Bivariate Normal --- true_mean_2 <- c(1, -0.5) true_cov_2 <- matrix(c(4, 0.7*2*1.5, 0.7*2*1.5, 2.25), 2, 2) log_posterior_2 <- function(x) mvtnorm::dmvnorm(x, mean = true_mean_2, sigma = true_cov_2, log = TRUE) result_2 <- twalk(log_posterior_2, n_iter = N_ITER_VIGNETTE, x0 = c(0,0), xp0 = c(2,-1)) calculate_diagnostics(result_2$all_samples, BURN_FRAC, c("theta1", "theta2"), "Bivariate Normal") visualize_results(result_2$all_samples, true_mean_2, "Test 2: Bivariate Normal", true_covariance = true_cov_2) # --- TEST 3: Funnel Distribution --- log_posterior_3 <- function(x) { x1 <- x[1] x2 <- x[2] log_prior_x1 <- dnorm(x1, mean = 0, sd = 3, log = TRUE) log_lik_x2 <- dnorm(x2, mean = 0, sd = exp(x1 / 2), log = TRUE) return(log_prior_x1 + log_lik_x2) } result_3 <- twalk(log_posterior_3, n_iter = N_ITER_VIGNETTE, x0 = c(-1.5, -0.2), xp0 = c(1.5, -0.2)) calculate_diagnostics(result_3$all_samples, BURN_FRAC, c("theta1", "theta2"), "Funnel") visualize_results(result_3$all_samples, NULL, "Test 3: Funnel") # --- TEST 4: Rosenbrock Distribution --- log_posterior_4 <- function(x) { x1 <- x[1] x2 <- x[2] k <- 1 / 20 return(-k * (100 * (x2 - x1^2)^2 + (1 - x1)^2)) } result_4 <- twalk(log_posterior_4, n_iter = 100000, x0 = c(0,0), xp0 = c(-1,1)) calculate_diagnostics(result_4$all_samples, BURN_FRAC, c("theta1", "theta2"), "Rosenbrock") visualize_results(result_4$all_samples, c(1,1), "Test 4: Rosenbrock") # --- TEST 5: Gaussian Mixture --- weight1 <- 0.7; mean1 <- c(6, 0); sigma1_1 <- 4; sigma1_2 <- 5; rho1 <- 0.8 cov1 <- matrix(c(sigma1_1^2, rho1*sigma1_1*sigma1_2, rho1*sigma1_1*sigma1_2, sigma1_2^2), nrow=2) weight2 <- 0.3; mean2 <- c(-3, 10); sigma2_1 <- 1; sigma2_2 <- 1; rho2 <- 0.1 cov2 <- matrix(c(sigma2_1^2, rho2*sigma2_1*sigma2_2, rho2*sigma2_1*sigma2_2, sigma2_2^2), nrow=2) log_posterior_5 <- function(x) { log(weight1 * mvtnorm::dmvnorm(x, mean1, cov1) + weight2 * mvtnorm::dmvnorm(x, mean2, cov2)) } result_5 <- twalk(log_posterior_5, n_iter = 100000, x0 = mean1, xp0 = mean2) calculate_diagnostics(result_5$all_samples, BURN_FRAC, c("theta1", "theta2"), "Gaussian Mixture") visualize_results(result_5$all_samples, NULL, "Test 5: Gaussian Mixture") # --- TEST 6: High Dimensionality (10D) --- n_dim_6 <- 10; true_mean_6 <- 1:n_dim_6; rho <- 0.7 true_cov_6 <- matrix(rho^abs(outer(1:n_dim_6, 1:n_dim_6, "-")), n_dim_6, n_dim_6) log_posterior_6 <- function(x) mvtnorm::dmvnorm(x, mean = true_mean_6, sigma = true_cov_6, log = TRUE) result_6 <- twalk(log_posterior_6, n_iter = N_ITER_VIGNETTE, x0 = rep(0, n_dim_6), xp0 = rep(2, n_dim_6)) calculate_diagnostics(result_6$all_samples, BURN_FRAC, paste0("theta", 1:n_dim_6), "10D Normal") visualize_results(result_6$all_samples, true_mean_6, "Test 6: 10D Normal") # --- TEST 7: Bayesian Logistic Regression --- set.seed(123); n_obs <- 2000 true_beta <- c(0.5, -1.2, 0.8) X <- cbind(1, rnorm(n_obs, 0, 1), rnorm(n_obs, 0, 1.5)) eta <- X %*% true_beta; prob <- 1 / (1 + exp(-eta)); y <- rbinom(n_obs, 1, prob) log_posterior_7 <- function(beta, X, y) { eta <- X %*% beta log_lik <- sum(y * eta - log(1 + exp(eta))) log_prior <- sum(dnorm(beta, 0, 5, log = TRUE)) return(log_lik + log_prior) } result_7 <- twalk(log_posterior_7, n_iter = N_ITER_VIGNETTE, x0 = c(0,0,0), xp0 = c(0.2,-0.2,0.1), X = X, y = y) calculate_diagnostics(result_7$all_samples, BURN_FRAC, c("beta0", "beta1", "beta2"), "Logistic Regression") visualize_results(result_7$all_samples, true_beta, "Test 7: Logistic Regression")