pkg_root <- if (file.exists("DESCRIPTION")) "." else ".."
if (requireNamespace("pkgload", quietly = TRUE) &&
file.exists(file.path(pkg_root, "DESCRIPTION"))) {
pkgload::load_all(pkg_root, export_all = FALSE, helpers = FALSE, quiet = TRUE)
}
library(growkar)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(knitr)
data(yeast_growth_data)High-throughput microbial growth assays are widely used to characterize phenotypic responses to genetic perturbations, environmental conditions, and drug treatments. These assays generate time-resolved measurements across large numbers of samples, analogous to other high-throughput experimental platforms in functional genomics.
While tools exist for transcriptomic and epigenomic data analysis within the Bioconductor ecosystem, there is a relative lack of standardized infrastructure for analyzing growth-based phenotypic data and integrating it with omics datasets.
The growkar package addresses this gap by providing a scalable and reproducible framework for high-throughput microbial phenotyping from growth assays. It uses Bioconductor data structures to represent assay measurements and derived phenotypes, while still accepting tidy and wide inputs as import adapters for exploratory analysis and visualization.
The primary workflow is:
SummarizedExperiment.For Bioconductor-oriented workflows, growth measurements can be represented as a SummarizedExperiment, with OD values stored in the assay, time values in rowData(), sample annotations in colData(), and derived summaries in metadata().
growkar_obj <- as_growkar(yeast_growth_data)
se <- methods::as(growkar_obj, "SummarizedExperiment")
se <- growth_metrics(se, method = "rolling_window", average_replicates = TRUE)
S4Vectors::metadata(se)$growth_metrics
#> # A tibble: 3 × 10
#> sample mu start_time end_time r_squared method n_points degraded note
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <int> <lgl> <chr>
#> 1 Cg 0.569 4.5 6.5 1.000 rolling_… 5 FALSE roll…
#> 2 CgFlu 0.404 4.5 6.5 1.000 rolling_… 5 FALSE roll…
#> 3 YPD 0.00128 12.5 14.5 0.500 rolling_… 5 FALSE roll…
#> # ℹ 1 more variable: doubling_time <dbl>This workflow is useful when growth phenotypes need to be carried forward into other Bioconductor analyses or linked to omics-derived sample annotations.
The canonical SE layout in growkar is:
assay(se, "od"): OD matrix with timepoints in rows and samples in columnsrowData(se): timepoint metadatacolData(se): sample metadatametadata(se): derived results such as growth metrics, phase windows, and fitsHelper accessors make these components easier to inspect:
growth_assay(se)[1:3, 1:3]
#> Cg_R1 Cg_R2 Cg_R3
#> 0 0.115 0.116 0.117
#> 0.5 0.116 0.118 0.117
#> 1 0.118 0.119 0.118
timepoints(se)
#> # A tibble: 49 × 1
#> time
#> <dbl>
#> 1 0
#> 2 0.5
#> 3 1
#> 4 1.5
#> 5 2
#> 6 2.5
#> 7 3
#> 8 3.5
#> 9 4
#> 10 4.5
#> # ℹ 39 more rows
sample_data(se)
#> # A tibble: 9 × 3
#> sample condition replicate
#> <chr> <fct> <fct>
#> 1 Cg_R1 Cg R1
#> 2 Cg_R2 Cg R2
#> 3 Cg_R3 Cg R3
#> 4 CgFlu_R1 CgFlu R1
#> 5 CgFlu_R2 CgFlu R2
#> 6 CgFlu_R3 CgFlu R3
#> 7 YPD_R1 YPD R1
#> 8 YPD_R2 YPD R2
#> 9 YPD_R3 YPD R3tidy_growth <- as_tidy_growth_data(yeast_growth_data)
head(tidy_growth)
#> # A tibble: 6 × 5
#> time sample od condition replicate
#> <dbl> <chr> <dbl> <chr> <chr>
#> 1 0 Cg_R1 0.115 Cg R1
#> 2 0 Cg_R2 0.116 Cg R2
#> 3 0 Cg_R3 0.117 Cg R3
#> 4 0 CgFlu_R1 0.131 CgFlu R1
#> 5 0 CgFlu_R2 0.133 CgFlu R2
#> 6 0 CgFlu_R3 0.132 CgFlu R3The canonical columns are sample, time, and od. Additional metadata such as condition and replicate can be carried alongside them.
validate_growth_data(tidy_growth)
#> # A tibble: 441 × 5
#> time sample od condition replicate
#> <dbl> <chr> <dbl> <chr> <chr>
#> 1 0 Cg_R1 0.115 Cg R1
#> 2 0 Cg_R2 0.116 Cg R2
#> 3 0 Cg_R3 0.117 Cg R3
#> 4 0 CgFlu_R1 0.131 CgFlu R1
#> 5 0 CgFlu_R2 0.133 CgFlu R2
#> 6 0 CgFlu_R3 0.132 CgFlu R3
#> 7 0 YPD_R1 0.105 YPD R1
#> 8 0 YPD_R2 0.105 YPD R2
#> 9 0 YPD_R3 0.104 YPD R3
#> 10 0.5 Cg_R1 0.116 Cg R1
#> # ℹ 431 more rowsThe same tidy data can also be converted back into a SummarizedExperiment whenever a container-based workflow is needed.
Observed growth curves can be plotted directly from the canonical SummarizedExperiment:
plot_growth_curve(
se,
average_replicates = TRUE,
colour_col = "condition",
palette_name = "Dark2"
)Replicate-level faceting remains available when averaging is disabled:
plot_growth_curve(
se,
average_replicates = FALSE,
colour_col = "condition",
facet_col = "replicate",
palette_name = "Dark2"
)metrics <- summarize_growth_metrics(
se,
method = "rolling_window",
average_replicates = TRUE
)
knitr::kable(metrics, digits = 3)| sample | mu | start_time | end_time | r_squared | method | n_points | degraded | note | doubling_time |
|---|---|---|---|---|---|---|---|---|---|
| Cg | 0.569 | 4.5 | 6.5 | 1.0 | rolling_window | 5 | FALSE | rolling_window_ranked | 1.219 |
| CgFlu | 0.404 | 4.5 | 6.5 | 1.0 | rolling_window | 5 | FALSE | rolling_window_ranked | 1.714 |
| YPD | 0.001 | 12.5 | 14.5 | 0.5 | rolling_window | 5 | FALSE | rolling_window_ranked | 539.788 |
For a single sample, the empirical growth-rate estimate can be inspected in more detail:
gr <- compute_growth_rate(se, method = "rolling_window")
#> Warning: Sample `YPD_R1`: Exponential phase detection did not yield a positive
#> growth slope (rolling_window_ranked).
#> Warning: Sample `YPD_R2`: Exponential phase detection did not yield a positive
#> growth slope (rolling_window_ranked).
knitr::kable(head(gr), digits = 3)| sample | mu | start_time | end_time | r_squared | method | n_points | degraded | note |
|---|---|---|---|---|---|---|---|---|
| Cg_R1 | 0.576 | 4.5 | 6.5 | 1.000 | rolling_window | 5 | FALSE | rolling_window_ranked |
| Cg_R2 | 0.560 | 4.5 | 6.5 | 1.000 | rolling_window | 5 | FALSE | rolling_window_ranked |
| Cg_R3 | 0.571 | 4.5 | 6.5 | 0.999 | rolling_window | 5 | FALSE | rolling_window_ranked |
| CgFlu_R1 | 0.403 | 4.5 | 6.5 | 1.000 | rolling_window | 5 | FALSE | rolling_window_ranked |
| CgFlu_R2 | 0.407 | 4.5 | 6.5 | 1.000 | rolling_window | 5 | FALSE | rolling_window_ranked |
| CgFlu_R3 | 0.403 | 4.5 | 6.5 | 1.000 | rolling_window | 5 | FALSE | rolling_window_ranked |
The doubling time helper is useful when you already have growth-rate values:
knitr::kable(
tibble(
sample = gr$sample,
growth_rate = gr$mu,
doubling_time = compute_doubling_time(gr$mu)
),
digits = 3
)| sample | growth_rate | doubling_time |
|---|---|---|
| Cg_R1 | 0.576 | 1.204 |
| Cg_R2 | 0.560 | 1.238 |
| Cg_R3 | 0.571 | 1.214 |
| CgFlu_R1 | 0.403 | 1.719 |
| CgFlu_R2 | 0.407 | 1.702 |
| CgFlu_R3 | 0.403 | 1.721 |
| YPD_R1 | NA | NA |
| YPD_R2 | NA | NA |
| YPD_R3 | 0.004 | 179.350 |
detect_exponential_phase() returns the ranked candidate windows and metadata describing whether the chosen interval required any degraded fallback.
| sample | rank | start_time | end_time | slope | r_squared | n_points | selection_reason | degraded |
|---|---|---|---|---|---|---|---|---|
| Cg_R1 | 1 | 4.5 | 6.5 | 0.576 | 1.000 | 5 | rolling_window_ranked | FALSE |
| Cg_R1 | 2 | 4.0 | 6.0 | 0.551 | 0.997 | 5 | rolling_window_ranked | FALSE |
| Cg_R1 | 3 | 5.0 | 7.0 | 0.551 | 0.997 | 5 | rolling_window_ranked | FALSE |
| Cg_R1 | 4 | 3.5 | 5.5 | 0.490 | 0.990 | 5 | rolling_window_ranked | FALSE |
| Cg_R1 | 5 | 5.5 | 7.5 | 0.490 | 0.991 | 5 | rolling_window_ranked | FALSE |
| Cg_R1 | 6 | 6.0 | 8.0 | 0.414 | 0.987 | 5 | rolling_window_ranked | FALSE |
For a full-curve model-based summary, fit one of the supported parametric models:
sample_id <- unique(gr$sample)[1]
fit_input <- as_tidy_growth_data(se) |>
filter(sample == sample_id)
cg_fit <- fit_growth_curve(fit_input, model = "logistic")
extract_params(cg_fit)
#> # A tibble: 1 × 6
#> sample model asymptote r t0 doubling_time_model
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Cg_R1 logistic 2.03 0.777 6.97 0.892
summary(cg_fit)
#> # A tibble: 1 × 9
#> sample model converged status message n_points rss aic bic
#> <chr> <chr> <lgl> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 Cg_R1 logistic TRUE converged <NA> 49 0.0688 -175. -167.Failed fits remain machine-readable and do not crash downstream helpers:
flat_fit <- fit_growth_curve(
tibble(
sample = "flat",
time = 0:4,
od = rep(0.2, 5)
),
model = "logistic"
)
summary(flat_fit)
#> # A tibble: 1 × 9
#> sample model converged status message n_points rss aic bic
#> <chr> <chr> <lgl> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 flat logistic FALSE flat_curve Model fitting… 5 NA NA NAObserved and fitted values can be visualized together:
The supported SE-native interface includes:
as_tidy_growth_data()validate_growth_data()compute_growth_rate()summarize_growth_metrics()detect_exponential_phase()fit_growth_curve()growkar_fit object with status and diagnostics rather than throwing a cryptic nls error.sessionInfo()
#> R version 4.6.0 RC (2026-04-17 r89917)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_GB LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] knitr_1.51 dplyr_1.2.1 growkar_0.99.0 testthat_3.3.2
#>
#> loaded via a namespace (and not attached):
#> [1] utf8_1.2.6 tidyr_1.3.2
#> [3] sass_0.4.10 generics_0.1.4
#> [5] SparseArray_1.12.2 lattice_0.22-9
#> [7] digest_0.6.39 magrittr_2.0.5
#> [9] evaluate_1.0.5 grid_4.6.0
#> [11] RColorBrewer_1.1-3 pkgload_1.5.2
#> [13] fastmap_1.2.0 rprojroot_2.1.1
#> [15] jsonlite_2.0.0 Matrix_1.7-5
#> [17] pkgbuild_1.4.8 brio_1.1.5
#> [19] purrr_1.2.2 scales_1.4.0
#> [21] jquerylib_0.1.4 abind_1.4-8
#> [23] cli_3.6.6 rlang_1.2.0
#> [25] XVector_0.52.0 Biobase_2.72.0
#> [27] withr_3.0.2 cachem_1.1.0
#> [29] DelayedArray_0.38.1 yaml_2.3.12
#> [31] otel_0.2.0 S4Arrays_1.12.0
#> [33] tools_4.6.0 ggplot2_4.0.3
#> [35] SummarizedExperiment_1.42.0 BiocGenerics_0.58.0
#> [37] vctrs_0.7.3 R6_2.6.1
#> [39] matrixStats_1.5.0 stats4_4.6.0
#> [41] lifecycle_1.0.5 Seqinfo_1.2.0
#> [43] S4Vectors_0.50.0 IRanges_2.46.0
#> [45] pkgconfig_2.0.3 desc_1.4.3
#> [47] bslib_0.10.0 pillar_1.11.1
#> [49] gtable_0.3.6 glue_1.8.1
#> [51] xfun_0.57 tibble_3.3.1
#> [53] GenomicRanges_1.64.0 tidyselect_1.2.1
#> [55] dichromat_2.0-0.1 MatrixGenerics_1.24.0
#> [57] farver_2.1.2 htmltools_0.5.9
#> [59] labeling_0.4.3 rmarkdown_2.31
#> [61] compiler_4.6.0 S7_0.2.2