Fast Deviance: Microbenchmarks

Frédéric Bertrand

Cedric, Cnam, Paris
frederic.bertrand@lecnam.net

2025-11-21

This vignette compares the fast deviance evaluator (loglik_gamlss_newdata_fast) with the generic route for several common families. Results will vary by machine/BLAS and by R package versions.

Recent releases added optimized shortcuts for additional families (LOGITNO, GEOM, LO, BE) and automatic routing through native gamlss.dist densities for zero-adjusted/zero-inflated variants (e.g., ZAGA, ZIPF, BETA4). Pair this benchmark with check_fast_vs_generic() for numerical validation and fast_vs_generic_ll() for micro-timing comparisons.

What you’ll learn

library(gamlss)
library(SelectBoost.gamlss)

set.seed(2025)
n <- 5000  # larger n makes the differences clearer

families <- list(
  NO = gamlss.dist::NO(),
  PO = gamlss.dist::PO(),
  LOGNO = gamlss.dist::LOGNO(),
  GA = gamlss.dist::GA(),
  IG = gamlss.dist::IG(),
  LO = gamlss.dist::LO(),
  LOGITNO = gamlss.dist::LOGITNO(),
  GEOM = gamlss.dist::GEOM(),
  BE = gamlss.dist::BE()
)

gen_fun <- list(
  NO = function(n) gamlss.dist::rNO(n, mu = 0, sigma = 1),
  PO = function(n) gamlss.dist::rPO(n, mu = 2.5),
  LOGNO = function(n) gamlss.dist::rLOGNO(n, mu = 0, sigma = 0.6),
  GA = function(n) gamlss.dist::rGA(n, mu = 2, sigma = 0.5),
  IG = function(n) gamlss.dist::rIG(n, mu = 2, sigma = 0.5),
  LO = function(n) gamlss.dist::rLO(n, mu = 0, sigma = 1),
  LOGITNO = function(n) gamlss.dist::rLOGITNO(n, mu = 0, sigma = 1),
  GEOM = function(n) gamlss.dist::rGEOM(n, mu = 3),
  BE = function(n) gamlss.dist::rBE(n, mu = 0.4, sigma = 0.2)
)

bench_one <- function(fname) {
  fam <- families[[fname]]
  gen <- gen_fun[[fname]]
  if (is.null(fam) || is.null(gen)) return(NULL)

  y <- gen(n)
  dat <- data.frame(y = y)

  fit <- try(gamlss::gamlss(y ~ 1, data = dat, family = fam), silent = TRUE)
  if (inherits(fit, "try-error")) return(NULL)

  # ensure predictions are available on newdata (same data is fine)
  fast_vs_generic_ll(fit, newdata = dat, reps = 50)
}

res_list <- lapply(names(families), bench_one)
#> GAMLSS-RS iteration 1: Global Deviance = 14284.31 
#> GAMLSS-RS iteration 2: Global Deviance = 14284.31
#> Warning in microbenchmark::microbenchmark(fast = f_fast(), generic = f_gen(), :
#> less accurate nanosecond times to avoid potential integer overflows
#> GAMLSS-RS iteration 1: Global Deviance = 18412.08 
#> GAMLSS-RS iteration 2: Global Deviance = 18412.08 
#> GAMLSS-RS iteration 1: Global Deviance = 9135.418 
#> GAMLSS-RS iteration 2: Global Deviance = 9135.418 
#> GAMLSS-RS iteration 1: Global Deviance = 13230.48 
#> GAMLSS-RS iteration 2: Global Deviance = 13230.48 
#> GAMLSS-RS iteration 1: Global Deviance = 14685.78 
#> GAMLSS-RS iteration 2: Global Deviance = 14685.78 
#> GAMLSS-RS iteration 1: Global Deviance = 20001.95 
#> GAMLSS-RS iteration 2: Global Deviance = 20001.95 
#> GAMLSS-RS iteration 1: Global Deviance = 22420.06 
#> GAMLSS-RS iteration 1: Global Deviance = -8775.955 
#> GAMLSS-RS iteration 2: Global Deviance = -9082.524 
#> GAMLSS-RS iteration 3: Global Deviance = -9082.579 
#> GAMLSS-RS iteration 4: Global Deviance = -9082.579
names(res_list) <- names(families)
res_list <- Filter(Negate(is.null), res_list)

# Present results
res <- do.call(rbind, lapply(names(res_list), function(nm) {
  df <- res_list[[nm]]
  df$family <- nm
  df
}))

res
#>     method   median unit rel_speed family
#> 1     fast 2517.933   us 0.8576360     NO
#> 2  generic 2159.470   us 1.1659958     NO
#> 3     fast 2028.454   us 1.2275415     PO
#> 4  generic 2490.012   us 0.8146364     PO
#> 5     fast 2606.739   us 1.0451171  LOGNO
#> 6  generic 2724.347   us 0.9568306  LOGNO
#> 7     fast 2659.609   us 1.0337298     GA
#> 8  generic 2749.316   us 0.9673708     GA
#> 9     fast 2666.476   us 1.0182668     IG
#> 10 generic 2715.184   us 0.9820609     IG
#> 11    fast 2565.698   us 0.8574259     LO
#> 12 generic 2199.896   us 1.1662815     LO
#> 13    fast 1953.445   us 1.2350824   GEOM
#> 14 generic 2412.666   us 0.8096626   GEOM
#> 15    fast 2907.392   us 0.8569777     BE
#> 16 generic 2491.570   us 1.1668916     BE
if (nrow(res)) {
  # Plot median times by family (if microbenchmark unit)
  if (!is.null(attr(res_list[[1]], "unit"))) {
    # simple barplot: generic vs fast for each family
    op <- par(mfrow=c(1,1), mar=c(8,4,2,1))
    fams <- unique(res$family)
    med_fast <- tapply(res$median[res$method=="fast"], res$family[res$method=="fast"], median)
    med_gen  <- tapply(res$median[res$method=="generic"], res$family[res$method=="generic"], median)
    mat <- rbind(med_gen[fams], med_fast[fams])
    barplot(mat, beside=TRUE, las=2, legend.text = c("generic","fast"),
            main="Median time (microbenchmark units)", ylab=attr(res_list[[1]], "unit"))
    par(op)
  }
}