library(GenomicFeatures)
library(multicore)

pkgroot <- "/home/mtmorgan/IWB2011"
datasrc <- file.path(pkgroot, "NagalakshmiEtAl", "aln")
txdbFile <- file.path(pkgroot, "inst", "extdata", "sacCer2_sgdGene.sqlite")
countsFile <- file.path(pkgroot, "data", "tscriptCounts.rda")

## transcript coordinates of gene symbols with exactly one transcript
txdb <- loadFeatures(txdbFile)
tscript <- transcripts(txdb, column="gene_id")
geneIds <- as(values(tscript)[["gene_id"]], "character")
values(tscript)[["gene_id"]] <- geneIds
x <- table(geneIds)
ugeneIds <- names(x)[x==1]
tscript1 <- tscript[geneIds %in% ugeneIds]
strand(tscript1) <- "*"              # protocol doesn't distinguish strand

## reads and counts
fls <- list.files(datasrc, pattern="fastq.sorted.bam$", full=TRUE)
counts <- mclapply(fls, function(fl, ts) {
    print(fl)
    ga <- readGappedAlignments(fl)
    hits <- countOverlaps(ga, ts)
    countOverlaps(ts, ga[hits==1])
}, tscript1)
tscriptCounts <- as(counts, "DataFrame")
dimnames(tscriptCounts) <-
    list(as(values(tscript1)[["gene_id"]], "character"),
         sub(".fastq.*", "", basename(fls)))

## sample annotations
df <- DataFrame(Protocol=rep(c("RH", "dT"), each=3),
                Replicate=rep(c("Biological", "Original", "Technical"), 2),
                SRR=c("SRR002058", "SRR002059", "SRR002061",
                  "SRR002062", "SRR002051", "SRR002064"))
elementMetadata(tscriptCounts) <-
    df[match(colnames(tscriptCounts), df$SRR),]
o <- with(elementMetadata(tscriptCounts),
          order(Protocol, Replicate))
tscriptCounts <- tscriptCounts[ridx,o]

save(tscriptCounts, file=countsFile)
