--- title: "Benchmark: Stepwise vs Grouped vs Glmnet Engines" shorttitle: "Benchmark: Stepwise vs Grouped vs Glmnet Engines" author: - name: "Frédéric Bertrand" affiliation: - Cedric, Cnam, Paris email: frederic.bertrand@lecnam.net date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Benchmark: Stepwise vs Grouped vs Glmnet Engines} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} #file.edit(normalizePath("~/.Renviron")) LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") #LOCAL=TRUE knitr::opts_chunk$set(purl = LOCAL) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette provides quick timing comparisons across engines on a synthetic dataset. Timings are indicative (single run) and depend on your machine and BLAS. ## What you'll learn * How to benchmark `sb_gamlss()` with different `engine` settings (stepwise, glmnet, grpreg, sgl). * How to collect elapsed timings via `system.time()` and visualise them with base R plots. * How to extend the template to include your own custom configurations (e.g., alternative `glmnet_alpha`). ```{r, cache=TRUE, eval=LOCAL} library(gamlss) library(SelectBoost.gamlss) set.seed(123) n <- 800 p <- 30 X <- replicate(p, rnorm(n)) colnames(X) <- paste0("x", 1:p) eta <- 1 + X[,1]*1.0 - X[,3]*1.2 + X[,5]*0.8 y <- gamlss.dist::rNO(n, mu = eta, sigma = 1) dat <- data.frame(y, X) engines <- list( list(name="stepGAIC", args=list(engine="stepGAIC")), list(name="glmnet-lasso", args=list(engine="glmnet", glmnet_alpha=1)), list(name="grpreg", args=list(engine="grpreg", grpreg_penalty="grLasso")), list(name="sgl", args=list(engine="sgl", sgl_alpha=0.9)) ) res <- data.frame(engine=character(), elapsed=numeric(), stringsAsFactors = FALSE) for (e in engines) { cat("Running", e$name, "...\n") t <- system.time({ fit <- sb_gamlss( y ~ 1, data = dat, family = gamlss.dist::NO(), mu_scope = as.formula(paste("~", paste(colnames(X), collapse = " + "))), B = 40, pi_thr = 0.6, pre_standardize = TRUE, trace = FALSE ) # merge engine-specific args and refit quickly with small B to avoid overuse fit <- do.call(sb_gamlss, modifyList(list( formula = y ~ 1, data = dat, family = gamlss.dist::NO(), mu_scope = as.formula(paste("~", paste(colnames(X), collapse = " + "))), B = 40, pi_thr = 0.6, pre_standardize = TRUE, trace = FALSE ), e$args)) }) res <- rbind(res, data.frame(engine=e$name, elapsed=t[["elapsed"]])) } print(res) # simple barplot op <- par(mar=c(8,4,2,1)); barplot(res$elapsed, names.arg = res$engine, las = 2, ylab = "Elapsed (s)", main = "Engine wall time (n=800, p=30, B=40)"); par(op) ```