Contents

1 Overview

RBedMethyl provides disk-backed access to nanoporetech modkit bedMethyl files generated from ONT data, enabling ONT-scale workflows without loading full tables into memory. This vignette demonstrates a typical workflow: ingest, subset by region, compute per-region summaries, and coerce to downstream Bioconductor classes.

2 Ingest a bedMethyl file

# Example bedMethyl-like content (small for illustration; headerless)
lines <- c(
  paste("chr1", 0, 1, "m", 0, "+", 0, 1, 0, 10, 0.5, 5, 5, 0, 0, 0, 0, 0, sep = "\t"),
  paste("chr1", 10, 11, "m", 0, "+", 10, 11, 0, 20, 0.25, 5, 15, 0, 0, 0, 0, 0, sep = "\t"),
  paste("chr2", 0, 1, "m", 0, "-", 0, 1, 0, 8, 0.375, 3, 5, 0, 0, 0, 0, 0, sep = "\t")
)

bed <- tempfile(fileext = ".bed")
writeLines(lines, bed)

bm <- readBedMethyl(bed, mod = "m", fields = c("coverage", "pct", "mod_reads"))

bm
#> An object of class "RBedMethyl"
#> Slot "assays":
#> List of length 7
#> names(7): chrom chromStart chromEnd strand coverage pct mod_reads
#> 
#> Slot "chrom_levels":
#> [1] "chr1" "chr2"
#> 
#> Slot "strand_levels":
#> [1] "+" "-"
#> 
#> Slot "chr_index":
#>      starts ends
#> chr1      1    2
#> chr2      3    3
#> 
#> Slot "index":
#> [1] 1 2 3
#> 
#> Slot "mod":
#> [1] "m"

3 Region-level summaries

regions <- GenomicRanges::GRanges(
  seqnames = c("chr1", "chr2"),
  ranges = IRanges::IRanges(start = c(1, 1), end = c(12, 2))
)

summarizeByRegion(bm, regions)
#> DataFrame with 2 rows and 4 columns
#>    coverage mod_reads      beta   n_sites
#>   <numeric> <numeric> <numeric> <integer>
#> 1        30        10  0.333333         2
#> 2         8         3  0.375000         1

4 Coercion to downstream classes

# RangedSummarizedExperiment
rse <- as(bm, "RangedSummarizedExperiment")
rse
#> class: RangedSummarizedExperiment 
#> dim: 3 1 
#> metadata(0):
#> assays(3): coverage mod_reads pct
#> rownames: NULL
#> rowData names(0):
#> colnames: NULL
#> colData names(0):

# bsseq (if installed)
if (requireNamespace("bsseq", quietly = TRUE) &&
    methods::hasMethod("coerce", c("RBedMethyl", "BSseq"))) {
  bs <- as(bm, "BSseq")
  bs
} else {
  message("bsseq is not installed or BSseq coercion is unavailable; skipping BSseq coercion.")
}
#> An object of type 'BSseq' with
#>   3 methylation loci
#>   1 samples
#> has not been smoothed
#> All assays are in-memory

4.1 Session info

sessionInfo()
#> R Under development (unstable) (2026-03-05 r89546)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] RBedMethyl_0.99.0 BiocStyle_2.39.0 
#> 
#> loaded via a namespace (and not attached):
#>  [1] SummarizedExperiment_1.41.1 rjson_0.2.23               
#>  [3] xfun_0.56                   bslib_0.10.0               
#>  [5] rhdf5_2.55.13               Biobase_2.71.0             
#>  [7] lattice_0.22-9              bitops_1.0-9               
#>  [9] rhdf5filters_1.23.3         tools_4.6.0                
#> [11] generics_0.1.4              curl_7.0.0                 
#> [13] stats4_4.6.0                parallel_4.6.0             
#> [15] R.oo_1.27.1                 Matrix_1.7-4               
#> [17] data.table_1.18.2.1         BSgenome_1.79.1            
#> [19] RColorBrewer_1.1-3          cigarillo_1.1.0            
#> [21] S4Vectors_0.49.0            sparseMatrixStats_1.23.0   
#> [23] lifecycle_1.0.5             compiler_4.6.0             
#> [25] farver_2.1.2                Rsamtools_2.27.1           
#> [27] Biostrings_2.79.5           statmod_1.5.1              
#> [29] Seqinfo_1.1.0               codetools_0.2-20           
#> [31] permute_0.9-10              htmltools_0.5.9            
#> [33] sass_0.4.10                 RCurl_1.98-1.17            
#> [35] yaml_2.3.12                 crayon_1.5.3               
#> [37] jquerylib_0.1.4             R.utils_2.13.0             
#> [39] BiocParallel_1.45.0         DelayedArray_0.37.0        
#> [41] cachem_1.1.0                limma_3.67.0               
#> [43] abind_1.4-8                 gtools_3.9.5               
#> [45] locfit_1.5-9.12             digest_0.6.39              
#> [47] restfulr_0.0.16             bookdown_0.46              
#> [49] fastmap_1.2.0               grid_4.6.0                 
#> [51] cli_3.6.5                   SparseArray_1.11.11        
#> [53] S4Arrays_1.11.1             XML_3.99-0.22              
#> [55] dichromat_2.0-0.1           h5mread_1.3.2              
#> [57] DelayedMatrixStats_1.33.0   scales_1.4.0               
#> [59] httr_1.4.8                  rmarkdown_2.30             
#> [61] XVector_0.51.0              matrixStats_1.5.0          
#> [63] otel_0.2.0                  R.methodsS3_1.8.2          
#> [65] beachmat_2.27.3             HDF5Array_1.39.0           
#> [67] evaluate_1.0.5              knitr_1.51                 
#> [69] BiocIO_1.21.0               GenomicRanges_1.63.1       
#> [71] IRanges_2.45.0              rtracklayer_1.71.3         
#> [73] rlang_1.1.7                 Rcpp_1.1.1                 
#> [75] glue_1.8.0                  BiocManager_1.30.27        
#> [77] BiocGenerics_0.57.0         bsseq_1.47.1               
#> [79] jsonlite_2.0.0              R6_2.6.1                   
#> [81] Rhdf5lib_1.33.0             GenomicAlignments_1.47.0   
#> [83] MatrixGenerics_1.23.0