## ----code, echo = FALSE-------------------------------------------------- date = "`r doc_date()`" pkg = "`r pkg_ver('CNEr')`" ## ----global_options, echo=FALSE------------------------------------------ short=TRUE #if short==TRUE, do not echo code chunks debug=FALSE knitr::opts_chunk$set(echo=!short, warning=debug, message=debug, error=FALSE, cache.path = "cache/", fig.path = "figures/") ## ----Axt, eval=TRUE, echo=TRUE------------------------------------------- library(CNEr) axtFilesHg19DanRer7 <- file.path(system.file("extdata", package="CNEr"), "hg19.danRer7.net.axt") axtFilesDanRer7Hg19 <- file.path(system.file("extdata", package="CNEr"), "danRer7.hg19.net.axt") ## ----readAxt, eval=FALSE, echo=TRUE-------------------------------------- # axtHg19DanRer7 <- readAxt(axtFilesHg19DanRer7) # axtDanRer7Hg19 <- readAxt(axtFilesDanRer7Hg19) ## ----loadAxt, eval=TRUE, echo=FALSE-------------------------------------- data(axtHg19DanRer7) data(axtDanRer7Hg19) ## ----showAxt, eval=TRUE, echo=TRUE--------------------------------------- axtHg19DanRer7 axtDanRer7Hg19 ## ----UCSC, eval=FALSE, echo=TRUE----------------------------------------- # ## To fetch rmak table from UCSC # library(rtracklayer) # mySession <- browserSession("UCSC") # genome(mySession) <- "hg19" # hg19.rmsk <- getTable(ucscTableQuery(mySession, track="RepeatMasker", # table="rmsk")) # hg19.rmskGRanges <- GRanges(seqnames=hg19.rmsk$genoName, # ## The UCSC coordinate is 0-based. # ranges=IRanges(start=hg19.rmsk$genoStart+1, # end=hg19.rmsk$genoEnd), # strand=hg19.rmsk$strand) # ## To fetch ensembl gene exons from BioMart # library(biomaRt) # ensembl <- useMart("ensembl", host="feb2014.archive.ensembl.org") # ensembl <- useDataset("hsapiens_gene_ensembl",mart=ensembl) # attributes <- listAttributes(ensembl) # exons <- getBM(attributes=c("chromosome_name", "exon_chrom_start", "exon_chrom_end", "strand"), # mart = ensembl) # exonsRanges <- GRanges(seqnames=exons$chromosome_name, # ranges=IRanges(start=exons$exon_chrom_start, # end=exons$exon_chrom_end), # strand=ifelse(exons$strand==1L, "+", "-") # ) # ## Use the existing Bioconductor annotation package # library(TxDb.Hsapiens.UCSC.hg19.knownGene) # exonsRanges <- exons(TxDb.Hsapiens.UCSC.hg19.knownGene) ## ----Bed, eval=TRUE, echo=TRUE------------------------------------------- bedHg19Fn <- file.path(system.file("extdata", package="CNEr"), "filter_regions.hg19.bed") bedHg19 <- readBed(bedHg19Fn) bedHg19 bedDanRer7Fn <- file.path(system.file("extdata", package="CNEr"), "filter_regions.danRer7.bed") bedDanRer7 <- readBed(bedDanRer7Fn) bedDanRer7 ## ----chromSizes, eval=FALSE, echo=TRUE----------------------------------- # qSizesHg19 <- fetchChromSizes("hg19") # qSizesDanRer7 <- fetchChromSizes("danRer7") ## ----chromSizesData, eval=TRUE, echo=FALSE------------------------------- data(qSizesHg19) data(qSizesDanRer7) ## ----showchromSizesData, eval=TRUE, echo=TRUE---------------------------- qSizesHg19 qSizesDanRer7 ## ----CNEScan, eval=TRUE, echo=TRUE--------------------------------------- ## axt, GRanges objects as input CNEHg19DanRer7 <- ceScan(axts=axtHg19DanRer7, tFilter=bedHg19, qFilter=bedDanRer7, qSizes=qSizesDanRer7, thresholds=c("45_50", "48_50", "49_50")) CNEDanRer7Hg19 <- ceScan(axts=axtDanRer7Hg19, tFilter=bedDanRer7, qFilter=bedHg19, qSizes=qSizesHg19, thresholds=c("45_50", "48_50", "49_50")) ## axt and bed files as input CNEHg19DanRer7 <- ceScan(axts=axtFilesHg19DanRer7, tFilter=bedHg19Fn, qFilter=bedDanRer7Fn, qSizes=qSizesDanRer7, thresholds=c("45_50", "48_50", "49_50")) CNEDanRer7Hg19 <- ceScan(axts=axtFilesDanRer7Hg19, tFilter=bedDanRer7Fn, qFilter=bedHg19Fn, qSizes=qSizesHg19, thresholds=c("45_50", "48_50", "49_50")) ## ----CNEScanHead, eval=TRUE, echo=TRUE----------------------------------- lapply(CNEHg19DanRer7, head) ## ----CNEMerge, eval=TRUE, echo=TRUE-------------------------------------- cneMergedDanRer7Hg19 <- mapply(cneMerge, CNEDanRer7Hg19, CNEHg19DanRer7, SIMPLIFY=FALSE) lapply(cneMergedDanRer7Hg19, head) ## ----CNERealignment, eval=FALSE, echo=TRUE------------------------------- # assemblyHg19Twobit <- "/Users/gtan/CSC/CNEr/2bit/hg19.2bit" # assemblyDanRer7Twobit <- "/Users/gtan/CSC/CNEr/2bit/danRer7.2bit" # cneBlatedDanRer7Hg19 <- list() # for(i in 1:length(cneMergedDanRer7Hg19)){ # cneBlatedDanRer7Hg19[[names(cneMergedDanRer7Hg19)[i]]] <- # blatCNE(cneMergedDanRer7Hg19[[i]], # as.integer(sub(".+_.+_\\d+_", "", names(cneMergedDanRer7Hg19)[i])), # cutoffs1=8L, cutoffs2=8L, # assembly1Twobit=assemblyDanRer7Twobit, # assembly2Twobit=assemblyHg19Twobit, # blatBinary="blat") # } ## ----ceScanOneStep, eval=FALSE, echo=TRUE-------------------------------- # assemblyHg19Twobit <- "/Users/gtan/CSC/CNEr/2bit/hg19.2bit" # assemblyDanRer7Twobit <- "/Users/gtan/CSC/CNEr/2bit/danRer7.2bit" # finalCNE <- ceScanOneStep(axt1=axtHg19DanRer7, filter1=bedHg19, # sizes1=qSizesHg19, assembly1="hg19", # twoBit1=assemblyHg19Twobit, # axt2=axtDanRer7Hg19, filter2=bedDanRer7, # sizes2=qSizesDanRer7, assembly2="danRer7", # twoBit2=assemblyDanRer7Twobit, # thresholds=c("45_50", "48_50", "49_50"), # blatBinary="blat", blatCutoff1=8L, blatCutoff2=8L) ## ----saveCNE, eval=TRUE, echo=TRUE--------------------------------------- ## on individual tables dbName <- tempfile() data(cneBlatedDanRer7Hg19) for(i in 1:length(cneBlatedDanRer7Hg19)){ tableName <- paste("danRer7_hg19", names(cneBlatedDanRer7Hg19)[i], sep="_") saveCNEToSQLite(cneBlatedDanRer7Hg19[[i]], dbName, tableName, overwrite=TRUE) } ## on CNE class data(finalCNE) saveCNEToSQLite(finalCNE, dbName=dbName, overwrite=TRUE) ## ----queryCNE, eval=TRUE, echo=TRUE-------------------------------------- chr <- "chr11" start <- 31000000L end <- 33000000L minLength <- 50L tableName <- "danRer7_hg19_45_50" fetchedCNERanges <- readCNERangesFromSQLite(dbName, tableName, chr, start, end, whichAssembly="L", minLength=minLength) ## ----queryUCSC, eval=FALSE, echo=TRUE------------------------------------ # library(Gviz) # genome <- "hg19" # chr <- "chr11" # start <- 31000000L # end <- 33000000L # axisTrack <- GenomeAxisTrack() # ideoTrack <- IdeogramTrack(genome=genome, chromosome=chr) # cpgIslands <- UcscTrack(genome=genome, chromosome=chr, # track="cpgIslandExt", from=start, to=end, # trackType="AnnotationTrack", start="chromStart", # end="chromEnd", id="name", shape="box", # fill="#006400", name="CpG Islands") # refGenes <- UcscTrack(genome="hg19", chromosome=chr, # track="refGene", from=start, to=end, # trackType="GeneRegionTrack", rstarts="exonStarts", # rends="exonEnds", gene="name2", symbol="name2", # transcript="name", strand="strand", fill="#8282d2", # name="refSeq Genes", collapseTranscripts=TRUE) # biomTrack <- BiomartGeneRegionTrack(genome="hg19", chromosome=chr, # start=start , end=end, name="Ensembl") ## ----loadAnnotation, eval=TRUE, echo=FALSE------------------------------- data(axisTrack) data(ideoTrack) data(cpgIslands) data(refGenes) ## ----plotAnnotation, eval=TRUE, echo=TRUE, fig.height=10, fig.width=7.5---- library(Gviz) plotTracks(list(axisTrack, ideoTrack, cpgIslands, refGenes), collapseTranscripts=TRUE, shape="arrow", showId=TRUE, transcriptAnnotation="symbol") ## ----CNEDensity, eval=TRUE, echo=TRUE------------------------------------ dbName <- file.path(system.file("extdata", package="CNEr"), "cne.sqlite") windowSize <- 300L minLength <- 50L cneHg19DanRer7_45_50 <- CNEDensity(dbName=dbName, tableName="danRer7_hg19_45_50", assembly1="hg19", chr=chr, start=start, end=end, windowSize=windowSize, minLength=minLength) cneHg19DanRer7_48_50 <- CNEDensity(dbName=dbName, tableName="danRer7_hg19_48_50", assembly1="hg19", chr=chr, start=start, end=end, windowSize=windowSize, minLength=minLength) cneHg19DanRer7_49_50 <- CNEDensity(dbName=dbName, tableName="danRer7_hg19_49_50", assembly1="hg19", chr=chr, start=start, end=end, windowSize=windowSize, minLength=minLength) ## ----GvizDataTrack, eval=TRUE, echo=TRUE--------------------------------- data(cneHg19DanRer7_45_50) data(cneHg19DanRer7_48_50) data(cneHg19DanRer7_49_50) genome <- "hg19" chr <- "chr11" start <- 31000000L end <- 33000000L #axisTrack = GenomeAxisTrack() #ideoTrack = IdeogramTrack(genome=genome, chromosome=chr) strand <- "+" dataMatrix <- cneHg19DanRer7_45_50 dTrack1 <- DataTrack(start=dataMatrix[ ,1], end=dataMatrix[ ,1], data=dataMatrix[ ,2], chromosome=chr, strand=strand, genome=genome, type="horiz", horizon.scale=0.1, fill.horizon=c("#B41414", "#E03231", "#F7A99C", "yellow", "orange", "red"), name="danRer7 45/50") dataMatrix <- cneHg19DanRer7_48_50 dTrack2 <- DataTrack(start=dataMatrix[ ,1], end=dataMatrix[ ,1], data=dataMatrix[ ,2], chromosome=chr, strand=strand, genome=genome, type="horiz", horizon.scale=0.1, fill.horizon=c("#B41414", "#E03231", "#F7A99C", "yellow", "orange", "red"), name="danRer7 48/50") dataMatrix <- cneHg19DanRer7_49_50 dTrack3 <- DataTrack(start=dataMatrix[ ,1], end=dataMatrix[ ,1], data=dataMatrix[ ,2], chromosome=chr, strand=strand, genome=genome, type="horiz", horizon.scale=0.1, fill.horizon=c("#B41414", "#E03231", "#F7A99C", "yellow", "orange", "red"), name="danRer7 49/50") ## ----plotCNE, eval=TRUE, echo=TRUE, fig.height=15, fig.width=7.5--------- plotTracks(list(axisTrack, ideoTrack, cpgIslands, refGenes, dTrack1, dTrack2, dTrack3), collapseTranscripts=TRUE, shape="arrow", showId=TRUE, transcriptAnnotation="symbol") ## ----sessionInfo, eval=TRUE, echo=TRUE----------------------------------- sessionInfo()