## ----style, echo = FALSE, results = 'asis', message=FALSE---------------- BiocStyle::markdown() library(knitr) ## ----loadingPackage, warning=FALSE, message=FALSE------------------------ library(consensusSeekeR) ## ----input, echo=FALSE--------------------------------------------------- ### Load dataset data(A549_FOSL2_01_NarrowPeaks_partial) ### Remove dataset metadata field "name" A549_FOSL2_01_NarrowPeaks_partial$name <- NULL A549_FOSL2_01_NarrowPeaks_partial$score <- NULL A549_FOSL2_01_NarrowPeaks_partial$qValue <- NULL A549_FOSL2_01_NarrowPeaks_partial$pValue <- NULL A549_FOSL2_01_NarrowPeaks_partial$signalValue <- NULL A549_FOSL2_01_NarrowPeaks_partial$peak <- NULL ## ----setName, collapse=TRUE---------------------------------------------- ### Initial dataset without metadata field head(A549_FOSL2_01_NarrowPeaks_partial, n = 3) ### Adding a new metadata field "name" unique to each entry A549_FOSL2_01_NarrowPeaks_partial$name <- paste0("FOSL2_01_entry_", 1:length(A549_FOSL2_01_NarrowPeaks_partial)) ### Assing the same row name to each entry names(A549_FOSL2_01_NarrowPeaks_partial) <- rep("FOSL2_01", length(A549_FOSL2_01_NarrowPeaks_partial)) ### Final dataset with metadata field 'name' and row names head(A549_FOSL2_01_NarrowPeaks_partial, n = 3) ## ----inputClean, echo=FALSE, message=FALSE------------------------------- ### Remove dataset rm(A549_FOSL2_01_NarrowPeaks_partial) ## ----knownGenome, collapse=TRUE------------------------------------------ ### Import library library(GenomeInfoDb) ### Get the information for Human genome version 19 hg19Info <- Seqinfo(genome="hg19") ### Subset the object to keep only the analysed chromosomes hg19Subset <- hg19Info[c("chr1", "chr10", "chrX")] ## ----newGenome, collapse=FALSE------------------------------------------- ### Create an Seqinfo Object chrInfo <- Seqinfo(seqnames=c("chr1", "chr2", "chr3"), seqlengths=c(1000, 2000, 1500), isCircular=c(FALSE, FALSE, FALSE), genome="BioconductorAlien") ## ----deleteChr, echo=FALSE, message=FALSE-------------------------------- rm(chrInfo) rm(hg19Subset) rm(hg19Info) ## ----rtracklayer, collapse=TRUE------------------------------------------ ### Load the needed packages library(rtracklayer) library(GenomicRanges) ### Demo file contain inside the consensusSeekeR package file_narrowPeak <- system.file("extdata", "A549_FOSL2_ENCSR000BQO_MZW_part_chr_1_and_12.narrowPeak", package = "consensusSeekeR") ### Information about the extra columns present in the file need ### to be specified extraCols <- c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer") ### Create genomic ranges for the regions regions <- import(file_narrowPeak, format = "BED", extraCols = extraCols) ### Create genomic ranges for the peaks peaks <- regions ranges(peaks) <- IRanges(start = (start(regions) + regions$peak), width = rep(1, length(regions$peak))) ### First rows of each GRanges object head(regions, n = 2) head(peaks, n = 2) ## ----secretRM, echo=FALSE, collapse=TRUE--------------------------------- rm(peaks) rm(regions) ## ----readNarrowPeak_1, collapse=TRUE------------------------------------- library(consensusSeekeR) ### Demo file contain inside the consensusSeekeR package file_narrowPeak <- system.file("extdata", "A549_FOSL2_ENCSR000BQO_MZW_part_chr_1_and_12.narrowPeak", package = "consensusSeekeR") ### Create genomic ranges for both the regions and the peaks result <- readNarrowPeakFile(file_narrowPeak, extractRegions = TRUE, extractPeaks = TRUE) ### First rows of each GRanges object head(result$narrowPeak, n = 2) head(result$peak, n = 2) ## ----secretRM_02, echo=FALSE, collapse=TRUE------------------------------ rm(result) ## ----libraryLoadNucl, warning=FALSE, message=FALSE----------------------- library(consensusSeekeR) ## ----loadingDatasetsNucl------------------------------------------------- ### Loading dataset from NOrMAL data(NOrMAL_nucleosome_positions) ; data(NOrMAL_nucleosome_ranges) ### Loading dataset from PING data(PING_nucleosome_positions) ; data(PING_nucleosome_ranges) ### Loading dataset from NucPosSimulator data(NucPosSimulator_nucleosome_positions) ; data(NucPosSimulator_nucleosome_ranges) ## ----prepareData, echo=FALSE--------------------------------------------- rownames(NOrMAL_nucleosome_positions) <- NULL rownames(NOrMAL_nucleosome_ranges) <- NULL ## ----datasetNuc, collapse=TRUE------------------------------------------- ### Each entry in the positions dataset has an equivalent metadata ### "name" entry in the ranges dataset head(NOrMAL_nucleosome_positions, n = 2) head(NOrMAL_nucleosome_ranges, n = 2) ## ----nuclAssignment, collapse=TRUE--------------------------------------- ### Assigning software name "NOrMAL" names(NOrMAL_nucleosome_positions) <- rep("NOrMAL", length(NOrMAL_nucleosome_positions)) names(NOrMAL_nucleosome_ranges) <- rep("NOrMAL", length(NOrMAL_nucleosome_ranges)) ### Assigning experiment name "PING" names(PING_nucleosome_positions) <- rep("PING", length(PING_nucleosome_positions)) names(PING_nucleosome_ranges) <- rep("PING", length(PING_nucleosome_ranges)) ### Assigning experiment name "NucPosSimulator" names(NucPosSimulator_nucleosome_positions) <- rep("NucPosSimulator", length(NucPosSimulator_nucleosome_positions)) names(NucPosSimulator_nucleosome_ranges) <- rep("NucPosSimulator", length(NucPosSimulator_nucleosome_ranges)) ### Row names are unique to each software head(NOrMAL_nucleosome_positions, n = 2) head(PING_nucleosome_positions, n = 2) head(NucPosSimulator_nucleosome_positions, n = 2) ## ----nuclConsensus, collapse=FALSE--------------------------------------- ### Only choromsome 1 is going to be analysed chrList <- Seqinfo("chr1", 135534747, NA) ### Find consensus regions with both replicates inside it results <- findConsensusPeakRegions( narrowPeaks = c(NOrMAL_nucleosome_ranges, PING_nucleosome_ranges, NucPosSimulator_nucleosome_ranges), peaks = c(NOrMAL_nucleosome_positions, PING_nucleosome_positions, NucPosSimulator_nucleosome_positions), chrInfo = chrList, extendingSize = 25, expandToFitPeakRegion = TRUE, shrinkToFitPeakRegion = TRUE, minNbrExp = 2, nbrThreads = 1) ## ----nuclResults, collapse=TRUE------------------------------------------ ### Print the call results$call ### Print the 3 first consensus regions head(results$consensusRanges, n = 3) ## ----removeDatasetsNucl, echo=FALSE-------------------------------------- ### Remove dataset from NOrMAL rm(NOrMAL_nucleosome_positions) rm(NOrMAL_nucleosome_ranges) ### Remove dataset from PING rm(PING_nucleosome_positions) rm(PING_nucleosome_ranges) ### Remove dataset from NucPosSimulator rm(NucPosSimulator_nucleosome_positions) rm(NucPosSimulator_nucleosome_ranges) ## ----libraryLoad, warning=FALSE, message=FALSE--------------------------- library(consensusSeekeR) ## ----loadingDatasets----------------------------------------------------- ### Loading datasets data(A549_CTCF_MYN_NarrowPeaks_partial) ; data(A549_CTCF_MYN_Peaks_partial) data(A549_CTCF_MYJ_NarrowPeaks_partial) ; data(A549_CTCF_MYJ_Peaks_partial) ## ----repAssignment------------------------------------------------------- ### Assigning experiment name "rep01" to the first replicate names(A549_CTCF_MYJ_NarrowPeaks_partial) <- rep("rep01", length(A549_CTCF_MYJ_NarrowPeaks_partial)) names(A549_CTCF_MYJ_Peaks_partial) <- rep("rep01", length(A549_CTCF_MYJ_Peaks_partial)) ### Assigning experiment name "rep02" to the second replicate names(A549_CTCF_MYN_NarrowPeaks_partial) <- rep("rep02", length(A549_CTCF_MYN_NarrowPeaks_partial)) names(A549_CTCF_MYN_Peaks_partial) <- rep("rep02", length(A549_CTCF_MYN_Peaks_partial)) ## ----replicateConsensus, collapse=FALSE---------------------------------- ### Only choromsome 10 is going to be analysed chrList <- Seqinfo("chr10", 135534747, NA) ### Find consensus regions with both replicates inside it results <- findConsensusPeakRegions( narrowPeaks = c(A549_CTCF_MYJ_NarrowPeaks_partial, A549_CTCF_MYN_NarrowPeaks_partial), peaks = c(A549_CTCF_MYJ_Peaks_partial, A549_CTCF_MYN_Peaks_partial), chrInfo = chrList, extendingSize = 100, expandToFitPeakRegion = TRUE, shrinkToFitPeakRegion = TRUE, minNbrExp = 2, nbrThreads = 1) ## ----replicateResults, collapse=TRUE------------------------------------- ### Print the call results$call ### Print the 3 first consensus regions head(results$consensusRanges, n = 3) ## ----rmCurrentDatasets, echo=FALSE, message=FALSE------------------------ ### Remove datasets rm(A549_CTCF_MYN_NarrowPeaks_partial) rm(A549_CTCF_MYN_Peaks_partial) rm(A549_CTCF_MYJ_NarrowPeaks_partial) rm(A549_CTCF_MYJ_Peaks_partial) ## ----libLoad, warning=FALSE, message=FALSE------------------------------- library(consensusSeekeR) ## ----loadingDatasetsExp-------------------------------------------------- ### Loading datasets data(A549_NR3C1_CFQ_NarrowPeaks_partial) ; data(A549_NR3C1_CFQ_Peaks_partial) data(A549_NR3C1_CFR_NarrowPeaks_partial) ; data(A549_NR3C1_CFR_Peaks_partial) data(A549_NR3C1_CFS_NarrowPeaks_partial) ; data(A549_NR3C1_CFS_Peaks_partial) ## ----expAssignment------------------------------------------------------- ### Assigning experiment name "ENCFF002CFQ" to the first experiment names(A549_NR3C1_CFQ_NarrowPeaks_partial) <- rep("ENCFF002CFQ", length(A549_NR3C1_CFQ_NarrowPeaks_partial)) names(A549_NR3C1_CFQ_Peaks_partial) <- rep("ENCFF002CFQ", length(A549_NR3C1_CFQ_Peaks_partial)) ### Assigning experiment name "ENCFF002CFQ" to the second experiment names(A549_NR3C1_CFR_NarrowPeaks_partial) <- rep("ENCFF002CFR", length(A549_NR3C1_CFR_NarrowPeaks_partial)) names(A549_NR3C1_CFR_Peaks_partial) <- rep("ENCFF002CFR", length(A549_NR3C1_CFR_Peaks_partial)) ### Assigning experiment name "ENCFF002CFQ" to the third experiment names(A549_NR3C1_CFS_NarrowPeaks_partial) <- rep("ENCFF002CFS", length(A549_NR3C1_CFS_NarrowPeaks_partial)) names(A549_NR3C1_CFS_Peaks_partial) <- rep("ENCFF002CFS", length(A549_NR3C1_CFS_Peaks_partial)) ## ----rowNames------------------------------------------------------------ ### Assigning specific name to each entry of to first experiment ### NarrowPeak name must fit Peaks name for same experiment A549_NR3C1_CFQ_NarrowPeaks_partial$name <- paste0("NR3C1_CFQ_region_", 1:length(A549_NR3C1_CFQ_NarrowPeaks_partial)) A549_NR3C1_CFQ_Peaks_partial$name <- paste0("NR3C1_CFQ_region_", 1:length(A549_NR3C1_CFQ_NarrowPeaks_partial)) ### Assigning specific name to each entry of to second experiment ### NarrowPeak name must fit Peaks name for same experiment A549_NR3C1_CFR_NarrowPeaks_partial$name <- paste0("NR3C1_CFR_region_", 1:length(A549_NR3C1_CFR_NarrowPeaks_partial)) A549_NR3C1_CFR_Peaks_partial$name <- paste0("NR3C1_CFR_region_", 1:length(A549_NR3C1_CFR_Peaks_partial)) ### Assigning specific name to each entry of to third experiment ### NarrowPeak name must fit Peaks name for same experiment A549_NR3C1_CFS_NarrowPeaks_partial$name <- paste0("NR3C1_CFS_region_", 1:length(A549_NR3C1_CFS_NarrowPeaks_partial)) A549_NR3C1_CFS_Peaks_partial$name <- paste0("NR3C1_CFS_region_", 1:length(A549_NR3C1_CFS_Peaks_partial)) ## ----experimentConsensus, collapse=FALSE--------------------------------- ### Only choromsome 2 is going to be analysed chrList <- Seqinfo("chr2", 243199373, NA) ### Find consensus regions with both replicates inside it results <- findConsensusPeakRegions( narrowPeaks = c(A549_NR3C1_CFQ_NarrowPeaks_partial, A549_NR3C1_CFR_NarrowPeaks_partial, A549_NR3C1_CFS_NarrowPeaks_partial), peaks = c(A549_NR3C1_CFQ_Peaks_partial, A549_NR3C1_CFR_Peaks_partial, A549_NR3C1_CFS_Peaks_partial), chrInfo = chrList, extendingSize = 200, expandToFitPeakRegion = FALSE, shrinkToFitPeakRegion = TRUE, minNbrExp = 2, nbrThreads = 1) ## ----experimentResults, collapse=TRUE------------------------------------ ### Print the call results$call ### Print the 3 first consensus regions head(results$consensusRanges, n = 3) ## ----rmDatasetsExp, echo=FALSE, message=FALSE---------------------------- ### Remove datasets rm(A549_NR3C1_CFQ_NarrowPeaks_partial) rm(A549_NR3C1_CFQ_Peaks_partial) rm(A549_NR3C1_CFR_NarrowPeaks_partial) rm(A549_NR3C1_CFR_Peaks_partial) rm(A549_NR3C1_CFS_NarrowPeaks_partial) rm(A549_NR3C1_CFS_Peaks_partial) ## ----sizeEffect, collapse=TRUE, eval=FALSE------------------------------- ## ### Set different values for the extendingSize parameter ## size <- c(1, 10, 50, 100, 300, 500, 750, 1000) ## ## ### Only chrompsome 10 is going to be analysed ## chrList <- Seqinfo("chr10", 135534747, NA) ## ## ### Find consensus regions using all the size values ## resultsBySize <- lapply(size, FUN = function(size) findConsensusPeakRegions( ## narrowPeaks = c(A549_CTCF_MYJ_NarrowPeaks_partial, A549_CTCF_MYN_NarrowPeaks_partial), ## peaks = c(A549_CTCF_MYJ_Peaks_partial, A549_CTCF_MYN_Peaks_partial), ## chrInfo = chrList, ## extendingSize = size, ## expandToFitPeakRegion = TRUE, ## shrinkToFitPeakRegion = TRUE, ## minNbrExp = 2, ## nbrThreads = 1)) ## ## ### Extract the number of consensus regions obtained for each extendingSize ## nbrRegions <- mapply(resultsBySize, FUN = function(x) return(length(x$consensusRanges))) ## ## ----graphSizeEffect, warning=FALSE, eval=FALSE-------------------------- ## ## library(ggplot2) ## ## data <- data.frame(extendingSize = size, nbrRegions = nbrRegions) ## ## ggplot(data, aes(extendingSize, nbrRegions)) + scale_x_log10("Extending size") + ## stat_smooth(se = FALSE, method = "loess", size=1.4) + ## ylab("Number of consensus regions") + ## ggtitle(paste0("Number of consensus regions in function of the extendingSize")) ## ## ----parallelExample, eval=FALSE, collapse=TRUE-------------------------- ## ### Load data ## data(A549_FOSL2_01_NarrowPeaks_partial) ; data(A549_FOSL2_01_Peaks_partial) ## data(A549_FOXA1_01_NarrowPeaks_partial) ; data(A549_FOXA1_01_Peaks_partial) ## ## ### Assigning name "FOSL2" ## names(A549_FOSL2_01_NarrowPeaks_partial) <- rep("FOSL2", length(A549_FOSL2_01_NarrowPeaks_partial)) ## names(A549_FOSL2_01_Peaks_partial) <- rep("FOSL2", length(A549_FOSL2_01_Peaks_partial)) ## ## ### Assigning name "FOXA1" ## names(A549_FOXA1_01_NarrowPeaks_partial) <- rep("FOXA1", length(A549_FOXA1_01_NarrowPeaks_partial)) ## names(A549_FOXA1_01_Peaks_partial) <- rep("FOXA1", length(A549_FOXA1_01_Peaks_partial)) ## ## ### Two chromosomes to analyse ## chrList <- Seqinfo(paste0("chr", c(1,10)), c(249250621, 135534747), NA) ## ## ### Find consensus regions using 2 threads ## results <- findConsensusPeakRegions( ## narrowPeaks = c(A549_FOSL2_01_NarrowPeaks_partial, A549_FOXA1_01_Peaks_partial), ## peaks = c(A549_FOSL2_01_Peaks_partial, A549_FOXA1_01_NarrowPeaks_partial), ## chrInfo = chrList, extendingSize = 200, minNbrExp = 2, ## expandToFitPeakRegion = FALSE, shrinkToFitPeakRegion = FALSE, ## nbrThreads = 4) ## ----sessionInfo, echo=FALSE--------------------------------------------- sessionInfo()