## ----setup, include = FALSE--------------------------------------------------- #file.edit(normalizePath("~/.Renviron")) LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") #LOCAL=TRUE knitr::opts_chunk$set(purl = LOCAL) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----cache=TRUE, eval=LOCAL--------------------------------------------------- set.seed(1) library(gamlss) library(SelectBoost.gamlss) n <- 400 x1 <- rnorm(n); x2 <- rnorm(n); x3 <- rnorm(n) mu <- 1 + 1.5*x1 - 1.2*x3 y <- gamlss.dist::rNO(n, mu = mu, sigma = 1) dat <- data.frame(y, x1, x2, x3) fit <- sb_gamlss( y ~ 1, mu_scope = ~ x1 + x2 + x3, sigma_scope = ~ x1 + x2, family = gamlss.dist::NO(), data = dat, B = 50, sample_fraction = 0.7, pi_thr = 0.6, k = 2, pre_standardize = TRUE, trace = FALSE ) fit$final_formula sel <- selection_table(fit) sel <- sel[order(sel$parameter, -sel$prop), , drop = FALSE] head(sel, n = min(8L, nrow(sel))) stable <- sel[sel$prop >= fit$pi_thr, , drop = FALSE] if (nrow(stable)) { stable } else { cat("No terms reached the stability threshold of", fit$pi_thr, "for this run.\n") } plot_sb_gamlss(fit, top = 10) if (requireNamespace("ggplot2", quietly = TRUE)) { print(effect_plot(fit, "x1", dat, what = "mu")) } ## ----cache=TRUE, eval=LOCAL--------------------------------------------------- set.seed(2) f <- factor(sample(letters[1:3], n, TRUE)) dat2 <- transform(dat, f = f, x4 = rnorm(n)) fit_grouped <- sb_gamlss( y ~ 1, mu_scope = ~ f + x1 + pb(x2) + x3 + x1:f, sigma_scope = ~ f + x1 + pb(x2), family = gamlss.dist::NO(), data = dat2, engine = "grpreg", # μ: grouped penalty engine_sigma = "sgl", # σ: sparse group lasso engine_tau = "glmnet", # τ (if present) would fall back automatically grpreg_penalty = "grLasso", sgl_alpha = 0.7, B = 40, pi_thr = 0.6, pre_standardize = TRUE, trace = FALSE ) sel_grouped <- selection_table(fit_grouped) sel_grouped <- sel_grouped[order(sel_grouped$parameter, -sel_grouped$prop), , drop = FALSE] head(sel_grouped, n = min(8L, nrow(sel_grouped))) stable_grouped <- sel_grouped[sel_grouped$prop >= fit_grouped$pi_thr, , drop = FALSE] if (nrow(stable_grouped)) { stable_grouped } else { cat("No terms reached the stability threshold of", fit_grouped$pi_thr, "for this run.\n") } ## ----cache=TRUE, eval=LOCAL--------------------------------------------------- grid_obj <- sb_gamlss_c0_grid( y ~ 1, data = dat, family = gamlss.dist::NO(), mu_scope = ~ x1 + x2 + x3, sigma_scope = ~ x1 + x2, c0_grid = seq(0.2, 0.8, by = 0.2), B = 30, pi_thr = 0.6, pre_standardize = TRUE, trace = FALSE, progress = TRUE ) plot(grid_obj) confidence_table(grid_obj) auto_obj <- autoboost_gamlss( y ~ 1, data = dat, family = gamlss.dist::NO(), mu_scope = ~ x1 + x2 + x3, sigma_scope = ~ x1 + x2, c0_grid = seq(0.2, 0.8, by = 0.2), B = 30, pi_thr = 0.6, pre_standardize = TRUE, trace = FALSE ) attr(auto_obj, "chosen_c0") fast_obj <- fastboost_gamlss( y ~ 1, data = dat, family = gamlss.dist::NO(), mu_scope = ~ x1 + x2 + x3, sigma_scope = ~ x1 + x2, B = 30, sample_fraction = 0.6, pi_thr = 0.6, pre_standardize = TRUE, trace = FALSE ) plot_sb_gamlss(fast_obj) ## ----cache=TRUE, eval=LOCAL--------------------------------------------------- cf <- confidence_functionals( grid_obj, pi_thr = 0.6, q = c(0.5, 0.8, 0.9), conservative = TRUE, B = 30 ) head(cf) plot(cf, top = 8) plot_stability_curves(grid_obj, terms = c("x1", "x3"), parameter = "mu") ## ----cache=TRUE, eval=LOCAL--------------------------------------------------- configs <- list( list(engine = "stepGAIC"), list(engine = "glmnet", glmnet_alpha = 1), list(engine = "grpreg", grpreg_penalty = "grLasso", engine_sigma = "sgl", sgl_alpha = 0.8) ) baseline <- list( formula = y ~ 1, data = dat2, family = gamlss.dist::NO(), mu_scope = ~ f + x1 + pb(x2) + x3 + x1:f, sigma_scope = ~ f + x1 + pb(x2), pi_thr = 0.6, pre_standardize = TRUE, sample_fraction = 0.7, trace = FALSE, parallel = "auto" ) tuned <- tune_sb_gamlss(configs, base_args = baseline, B_small = 25, metric = "stability", progress = TRUE) tuned$best_config ## ----eval=FALSE--------------------------------------------------------------- # # Example: compare deviance routes on a fitted model # fit_ga <- gamlss::gamlss(y ~ x1 + x2, sigma.formula = ~ x1, data = dat, family = gamlss.dist::NO()) # fast_vs_generic_ll(fit_ga, newdata = dat, reps = 30) # check_fast_vs_generic(fit_ga, newdata = dat, tol = 1e-6) ## ----eval=FALSE--------------------------------------------------------------- # future::plan(multisession, workers = 4) # fit_parallel <- sb_gamlss( # y ~ 1, # mu_scope = ~ x1 + x2 + x3, # sigma_scope = ~ x1 + x2, # family = gamlss.dist::NO(), # data = dat, # B = 200, # pi_thr = 0.6, # pre_standardize = TRUE, # parallel = "auto", # progress = TRUE, # trace = FALSE # )