## ----------------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "figures/benchmark-short-", fig.width = 6, fig.height = 4, dpi = 150, message = FALSE, warning = FALSE ) LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- library(bigPLSR) library(ggplot2) library(dplyr) library(tidyr) data("external_pls_benchmarks", package = "bigPLSR") str(external_pls_benchmarks) ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- summ_best <- external_pls_benchmarks %>% group_by(task, n, p, q, ncomp) %>% mutate( rank_time = rank(median_time_s, ties.method = "min"), rank_mem = rank(mem_alloc_bytes, ties.method = "min") ) %>% ungroup() best_time <- summ_best %>% filter(rank_time == 1L) %>% count(task, package, algorithm, name = "n_best_time") best_mem <- summ_best %>% filter(rank_mem == 1L) %>% count(task, package, algorithm, name = "n_best_mem") best_time best_mem ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- example_pls1 <- external_pls_benchmarks %>% filter(task == "pls1") %>% group_by(n, p, q) %>% filter(n == first(n), p == first(p), q == first(q)) %>% ungroup() example_pls1_size <- example_pls1 %>% count(n, p, q, sort = TRUE) %>% slice(1L) %>% select(n, p, q) example_pls1 <- external_pls_benchmarks %>% semi_join(example_pls1_size, by = c("n", "p", "q")) %>% filter(task == "pls1") ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- ggplot(example_pls1, aes(x = ncomp, y = median_time_s, colour = package, linetype = algorithm)) + geom_line() + geom_point() + scale_y_log10() + labs( x = "Number of components", y = "Median runtime (seconds, log scale)", title = "PLS1 benchmark, fixed (n, p, q)", subtitle = "Comparison across packages and algorithms" ) + theme_minimal() ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- ggplot(example_pls1, aes(x = ncomp, y = mem_alloc_bytes / 1024^2, colour = package, linetype = algorithm)) + geom_line() + geom_point() + labs( x = "Number of components", y = "Memory allocated (MiB)", title = "PLS1 benchmark, fixed (n, p, q)" ) + theme_minimal() ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- example_pls2 <- external_pls_benchmarks %>% filter(task == "pls2") %>% group_by(n, p, q) %>% filter(n == first(n), p == first(p), q == first(q)) %>% ungroup() example_pls2_size <- example_pls2 %>% count(n, p, q, sort = TRUE) %>% slice(1L) %>% select(n, p, q) example_pls2 <- external_pls_benchmarks %>% semi_join(example_pls2_size, by = c("n", "p", "q")) %>% filter(task == "pls2") ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- ggplot(example_pls2, aes(x = ncomp, y = median_time_s, colour = package, linetype = algorithm)) + geom_line() + geom_point() + scale_y_log10() + labs( x = "Number of components", y = "Median runtime (seconds, log scale)", title = "PLS2 benchmark, fixed (n, p, q)", subtitle = "Comparison across packages and algorithms" ) + theme_minimal() ## ----eval=LOCAL, cache=TRUE--------------------------------------------------- ggplot(example_pls2, aes(x = ncomp, y = mem_alloc_bytes / 1024^2, colour = package, linetype = algorithm)) + geom_line() + geom_point() + labs( x = "Number of components", y = "Memory allocated (MiB)", title = "PLS2 benchmark, fixed (n, p, q)" ) + theme_minimal()