## http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?study=SRP000227
library(SeattleIntro2010)
datasrc <- "/home/mtmorgan/SeattleIntro2010/NagalakshmiEtAl"
pkgroot <- "/home/mtmorgan/SeattleIntro2010"
setwd(datasrc)

## qa
qaFile <- file.path(pkgroot, "data", "qa.rda")
if (!file.exists(qaFile)) {
    ## better: qa on the aligned reads?
    fls <- list.files("SRP000227", pattern="fastq", full=TRUE)
    qalst <- Map(function(fl) {
        fq <- readFastq(fl)
        qa(fq, lane=basename(fl))
    }, fls)
    qa <- do.call(rbind, qalst)
    save(qa, file=qaFile)
} else load(qaFile)
if (interactive())
    browseURL(report(qa))

## hitspergene
countsFile <- file.path(pkgroot, "data", "hitspergene.rda")
txdbFile <- file.path(pkgroot, "inst", "extdata", "sacCer2_sgdGene.sqlite")
if (!file.exists(countsFile)) {

    ## transcript ranges
    library(GenomicFeatures)
    if (!file.exists(txdbFile)) {
        txdb <- makeTranscriptDbFromUCSC(genome="sacCer2",
                                         tablename="sgdGene")
        saveFeatures(txdb, txdbFile)
    } else txdb <- loadFeatures(txdbFile)
    exons <- exonsBy(txdb, "gene")

    ## reads and counts
    fls <- list.files("aln", pattern="fastq.bam$", full=TRUE)
    cnt <- Map(function(fl, exons) {
        ga <- readGappedAlignments(fl)
        countOverlaps(exons, ga)
    }, fls, MoreArgs=list(exons=exons))
    hitspergene <- as(cnt, "DataFrame")
    dimnames(hitspergene) <-
        list(names(exons), sub(".fastq.bam", "", basename(fls)))

    ## sample annotations
    df <- DataFrame(Sample=rep(c("RH", "dT"), each=3),
        Replicate=rep(c("Biological", "Original", "Technical"), 2),
        SRR=c("SRR002058", "SRR002059", "SRR002061",
              "SRR002062", "SRR002051", "SRR002064"))
    elementMetadata(hitspergene) <-
        df[match(colnames(hitspergene), df$SRR),]
    o <- with(elementMetadata(hitspergene),
              order(Sample, Replicate))
    hitspergene <- hitspergene[,o]
    
    save(hitspergene, file=countsFile)
} else load(countsFile)

## SRR002051.pluscvg and SRR002051.minuscvg
bamFile <- file.path("aln", "SRR002051.fastq.bam")
pluscvgFile <- file.path(pkgroot, "data", "SRR002051.pluscvg.rda")
minuscvgFile <- file.path(pkgroot, "data", "SRR002051.minuscvg.rda")
if (!file.exists(pluscvgFile) || !file.exists(minuscvgFile)) {
    bam <- readGappedAlignments(bamFile)
    library(BSgenome.Scerevisiae.UCSC.sacCer2)
    bam@seqlengths <- seqlengths(Scerevisiae)
    SRR002051.pluscvg <- coverage(grg(bam)[strand(bam) == "+"])
    SRR002051.minuscvg <- coverage(grg(bam)[strand(bam) == "-"])
    save(SRR002051.pluscvg, file=pluscvgFile)
    save(SRR002051.minuscvg, file=minuscvgFile)
} else {
    load(pluscvgFile)
    load(minuscvgFile)
}

