###################################################
### chunk number 1: setup
###################################################
#line 18 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
options(width = 64)


###################################################
### chunk number 2: loadGenomicFeatures
###################################################
#line 69 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
library(GenomicFeatures)
nrow(supportedUCSCtables())
head(supportedUCSCtables(), 10)


###################################################
### chunk number 3: makeUCSC eval=FALSE
###################################################
## #line 81 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## mm9KG <-
##   makeTranscriptDbFromUCSC(genome = "mm9",
##                            tablename = "knownGene")


###################################################
### chunk number 4: saveFeatures eval=FALSE
###################################################
## #line 90 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## saveFeatures(mm9KG, file="mm9_knownGene.sqlite")


###################################################
### chunk number 5: loadFeatures
###################################################
#line 93 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
txdb <-
  loadFeatures(system.file("extdata", "mm9_knownGene.sqlite",
                           package = "SeattleIntro2010"))


###################################################
### chunk number 6: TranscriptDb
###################################################
#line 105 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
txdb


###################################################
### chunk number 7: transcripts
###################################################
#line 124 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
tx <- transcripts(txdb)
length(tx)
head(tx, 5)


###################################################
### chunk number 8: exonsBy
###################################################
#line 144 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
txExons <- exonsBy(txdb)
txIntrons <- intronsByTranscript(txdb)
txExons[6]


###################################################
### chunk number 9: accessors
###################################################
#line 162 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
head(start(tx))
head(ranges(txExons), n=1)
head(elementMetadata(tx), n=2)


###################################################
### chunk number 10: exer1
###################################################
#line 188 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
library(GenomicFeatures)
txdb <-
  loadFeatures(system.file("extdata", "mm9_knownGene.sqlite",
                           package = "SeattleIntro2010"))

myTranscripts <- transcriptsBy(txdb, by="gene")


###################################################
### chunk number 11: findOverlaps-help eval=FALSE
###################################################
## #line 213 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## findOverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
##              type = c("any", "start", "end"),
##              select = c("all", "first"))


###################################################
### chunk number 12: countOverlaps
###################################################
#line 222 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
grngs <- GRanges("chr1", gaps(ranges(txIntrons[[7]])), "-")
countOverlaps(grngs, tx)


###################################################
### chunk number 13: subselect
###################################################
#line 239 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
library(GenomicFeatures)
txdb <- makeTranscriptDbFromUCSC(genome="sacCer2", tablename="sgdGene")
saveFeatures(txdb, "sacCer2_sgdGene.sqlite")
txdb <- loadFeatures(system.file("extdata", "sacCer2_sgdGene.sqlite",
                 package = "SeattleIntro2010"))
t <- transcripts(txdb)
seqlengths(t)
## length of chr3 and 5 is 316617 and 576869
chr3Len <- seqlengths(t)["chrIII"]
chr5Len <- seqlengths(t)["chrV"]
## Define a GRanges object that describes the sequences we want to read in:
gr <- GRanges(seqnames = Rle(c("chrIII","chrV")),
              ranges = IRanges(1,width=c(chr3Len,chr5Len)),
              seqlengths = c(chr3Len, chr5Len))
## And of course we also need a bam file
fl <- system.file("extdata", "SRR002051.chrI-V.bam",
                 package = "SeattleIntro2010")


###################################################
### chunk number 14: readGappedAlignments1
###################################################
#line 321 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## A convenient way:
library(ShortRead)
grGappedBam <- readGappedAlignments(fl)
grbam <- grg(grGappedBam)
grbam <- grbam[seqnames(grbam) %in% c("chrIII","chrV")]
seqlengths(grbam) <- c(chr3Len, chr5Len)

## Look at the help here
# ?readGappedAlignments
## and here
# ?readBamGappedAlignments


###################################################
### chunk number 15: readGappedAlignments2
###################################################
#line 349 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## The most convenient way
grGappedBam <- readGappedAlignments(fl, index=fl, 
                                    which=gr)
grbam <- grg(grGappedBam)
seqlengths(grbam) <- c(chr3Len, chr5Len)
## I saved this for you
save(grbam, file= "bamReadsChr3andChr5.rda")


###################################################
### chunk number 16: GetAnnotations
###################################################
#line 376 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
load(system.file("data", "bamReadsChr3andChr5.rda",
                 package = "SeattleIntro2010"))
## 1st get the annotations for all exons grouped by gene
library(GenomicFeatures)
txdb <- loadFeatures(system.file("extdata", "sacCer2_sgdGene.sqlite",
                 package = "SeattleIntro2010"))

txs <- transcriptsBy(txdb, by="gene")


###################################################
### chunk number 17: GetAnnotations2
###################################################
#line 419 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## Check if the gene IDs "YEL021W" & "YEL020W-A" are present
c("YEL021W","YEL020W-A") %in% names(txs)

## Lets extract only two genes
ind = names(txs) %in% c("YEL021W","YEL020W-A")
txpair = txs[ind]

## drop strand info. (so we will match both + and - strands)
sgrbam <- grbam
strand(sgrbam) <- rep("*",length(strand(sgrbam)))


###################################################
### chunk number 18: GetAnnotations3
###################################################
#line 442 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
OL <- findOverlaps(sgrbam, txpair)
head(matchMatrix(OL))
ind <- matchMatrix(OL)[,"query"]

## Subset to just the data that overlapped
subData <- grbam[ind]
seqlengths(subData) <- chr5Len


###################################################
### chunk number 19: GetAnnotations4
###################################################
#line 462 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
##starting with the data object
subData

## call findOverlaps()
OLrev <- findOverlaps(subData, txs)
matchMatrix(OLrev)

## Then extract the genes that overlapped
ind <- unique(matchMatrix(OLrev)[,"subject"])
txs[ind]


###################################################
### chunk number 20: exer3a
###################################################
#line 490 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
load(system.file("data", "nbTopTable.rda",
                 package = "SeattleIntro2010"))


###################################################
### chunk number 21: exer3b
###################################################
#line 505 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## 1st lets see if any of these genes is even named
ids = head(nbTopTable)[,1]
library(org.Sc.sgd.db)
mget(ids, org.Sc.sgdGENENAME, ifnotfound=NA)

## Now lets get the ranges
ind = names(txs) %in% ids
txSub = txs[ind]


###################################################
### chunk number 22: BrowseData1
###################################################
#line 534 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
library(rtracklayer)
## construct a range that describes where you want to look
range <- GRangesForUCSCGenome(genome="sacCer2", chrom="chrV", 
                              ranges=ranges(subData),
                              strand = strand(subData))
# browseGenome(range = range)


###################################################
### chunk number 23: BrowseData2
###################################################
#line 557 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## You can export your tracks (bed, gff and wig are supported)
export( range, "GeneRange.bed")
## We also saved this for you 
loadedBedFile <- system.file("extdata", "GeneRange.bed",
                 package = "SeattleIntro2010")
loadedBedFile
## Can upload this into genome browser 


###################################################
### chunk number 24: BrowseData3
###################################################
#line 578 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
ind = names(txs) %in% "YEL022W"
tx = txs[ind]
OL <- findOverlaps(grbam, tx)
ind <- matchMatrix(OL)[,"query"]
subDat <- grbam[ind]


###################################################
### chunk number 25: BrowseData4
###################################################
#line 600 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## a shift solution
cov <- coverage(subData, shift=list(chrV=1-min(start(subData))))
x <- cov[["chrV"]]

## or a slice solution
x <- slice(coverage(subData), 1L)[["chrV"]][[1]]


###################################################
### chunk number 26: BrowseData5
###################################################
#line 624 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
plotCoverage <- function(x, xlab = "Position", ylab = "Coverage",
 ...) {
 plot(c(start(x)), c(runValue(x)), type = "s", col = "blue", 
      xlab = xlab, ylab = ylab, ...)
}

## then call out plot function
plotCoverage(x)


###################################################
### chunk number 27: BrowseData6
###################################################
#line 646 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
sd <- GRangesForUCSCGenome(genome="sacCer2", chrom="chrV", 
                           ranges=ranges(subData),
                           strand = strand(subData))
export(sd, "subData.bed")


###################################################
### chunk number 28: exer4
###################################################
#line 678 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## Get annots
ind = names(txs) %in% c("YCL018W","YCL017C")
txpair = txs[ind]

## Overlap and subset
OL <- findOverlaps(grbam, txpair)
ind <- matchMatrix(OL)[,"query"]
subData <- grbam[ind]
seqlengths(subData) <- chr3Len

## Save as a track:
range <- GRangesForUCSCGenome(genome="sacCer2", chrom="chrIII", 
                              ranges=ranges(subData),
                              strand = strand(subData))
export( range, "GeneRange2.bed")

## or just plot
x <- slice(coverage(subData), 1L)[["chrIII"]][[1]]
plotCoverage(x)


###################################################
### chunk number 29: retrieveTracksAsdataframes
###################################################
#line 712 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## create a session object
session <- browserSession()
## then ucscGenomes will list the genomes
head(ucscGenomes())
## Choose one, and assign it into your session object
genome(session) <- "sacCer2"
## NOW you can list the trackNames
head(trackNames(session))


###################################################
### chunk number 30: retrieveTracksAsdataframes
###################################################
#line 731 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
## this information is used to create a query object
query <- ucscTableQuery(session, "oreganno")
## and for each track, you can list the tables it contains
tableNames(query)
## This information can be used to refine/modify your query object
tableName(query) <- "oreganno"
## Which is all then used to getTable to retrieve the data
result <- getTable(query)
head(result)


###################################################
### chunk number 31: retrieveTracksAsdataframes
###################################################
#line 754 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
ranges <- GRanges(seqnames = result[["chrom"]],
                  ranges = IRanges(start = result[["chromStart"]],
                                     end = result[["chromEnd"]]),
                  strand = result[["strand"]],
                  name = result[["name"]])


###################################################
### chunk number 32: exer5
###################################################
#line 779 "H:/SeattleIntro2010/inst/doc/GenomicFeatures.Rnw"
query <- ucscTableQuery(session, "ensGene")
tableNames(query)
tableName(query) <- "ensGene"
ensGene <- getTable(query)

## then make that into a GRanges object.
ensRanges <- GRanges(seqnames = ensGene[["chrom"]],
                     ranges = IRanges(start = ensGene[["txStart"]],
                                        end = ensGene[["txEnd"]]),
                     strand = ensGene[["strand"]],
                     ensId = ensGene[["name"]])



