## ----knitr, echo=FALSE, results="hide"-----------------------------------
library("knitr")
opts_chunk$set(
  tidy=FALSE,
  dev="png",
  #fig.show="hide",
  fig.width=4, fig.height=4.5,
  dpi = 300,
  cache=TRUE,
  message=FALSE,
  warning=FALSE)

## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------
BiocStyle::latex()

## ----loadGenoGAM, echo=FALSE---------------------------------------------
library("GenoGAM")

## ----options, results="hide", echo=FALSE---------------------------------
options(digits=3, width=80, prompt=" ", continue=" ")

## ----parallel------------------------------------------------------------
library(GenoGAM)

## On multicore machines by default the number of available cores - 2 are registered
BiocParallel::registered()[1]

## ----parallel-register---------------------------------------------------
BiocParallel::register(BiocParallel::MulticoreParam(workers = 4, progressbar = TRUE))

## ----check---------------------------------------------------------------
BiocParallel::registered()[1]

## ----expdesign-----------------------------------------------------------

folder <- system.file("extdata/Set1", package='GenoGAM')

expDesign <- read.delim(
  file.path(folder, "experimentDesign.txt")
)

expDesign

## ----ggd-----------------------------------------------------------------
bpk <- 20
chunkSize <- 1000
overhangSize <- 15*bpk

## build the GenoGAMDataSet
ggd <- GenoGAMDataSet(
  expDesign, directory = folder,
  chunkSize = chunkSize, overhangSize = overhangSize,
  design = ~ s(x) + s(x, by = genotype)
)
## restricts the GenoGAM dataset to the positions of interest
## (this step is only required for running this small example)
ggd <- subset(ggd, seqnames == "chrXIV" & pos >= 305000 & pos <= 308000)

## ----sizeFactor----------------------------------------------------------
ggd <- computeSizeFactors(ggd)
sizeFactors(ggd)

## ----fitwocv-------------------------------------------------------------
## fit model without parameters estimation
fit <- genogam(ggd, 
               lambda = 40954.1,
               family = mgcv::nb(theta = 6.927986),
               bpknots = bpk
)
fit

## ----fitwithcv, eval=FALSE-----------------------------------------------
## fit_CV <- genogam(ggd,bpknots = bpk)

## ----view----------------------------------------------------------------
# extract count data into a data frame 
df_data <- view(ggd)
head(df_data)

# extract fit into a data frame 
df_fit <- view(fit)
head(df_fit)

## ----plotfit,  fig.width=4.5, fig.height=5-------------------------------
## plot function for the count data
dataplot <- function(df, col, ...){
  x <- df[["pos"]]
  y <- df[[col]]
  plot(0, type='n',
       xlim=range(x),
       ylab=col,
       ...
  )
  points(x, y, pch=19, col="#00000015", cex=0.5)
}

# plot function for a fit with confidence band
fitplot <- function(df, smooth, ...){
  x <- df[["pos"]]
  y <- df[[smooth]]
  se.y <- df[[paste0("se.",smooth)]]
  plot(0, type='n',
       xlim=range(x),
       ylim=range(c(y  - 1.96*se.y, y  + 1.96*se.y)),
       ylab=smooth,
       ...
  )
  lines(x, y)
  lines(x, y+1.96*se.y, lty='dotted')
  lines(x, y-1.96*se.y, lty='dotted')
  abline(h=0)
}

## plot
par(mfrow=c(5,1))
par(mar=c(1,4,1,1))
for(id in expDesign$ID)
  dataplot(df_data, id, xlab="", main="", ylim=c(0,6))
par(mar=c(2,4,1,1))
fitplot(df_fit, 's(x):genotype', xlab="", main="")

## ----signif--------------------------------------------------------------
fit <- computeSignificance(fit)
df_fit <- view(fit)
df_fit[["fdr.s(x):genotype"]] <- p.adjust(df_fit[["pvalue.s(x):genotype"]], method="BH")
head(df_fit)

## ----sessInfo, results="asis", echo=FALSE--------------------------------
toLatex(sessionInfo())

## ----resetOptions, results="hide", echo=FALSE----------------------------
options(prompt="> ", continue="+ ")