### Create an ncdf file 
### Inputs  ncdf_file: name of NetCDF file to be created
###         snpData_file: name of CSV file containing the GWAS experimental data
###         subjectMetadata_file: name of CSV file containing the subject metadata
###         snpMetadata_file: name of CSV file containing the SNP metadata
###         blocksize: number of rows to be processed at a time

### By default, function assumes that "snpData.csv", "subjectMetadata.csv" and
### "snpMetadata.csv" are present in the working directory

createNcdf <- function(ncdf_file,
                       snpData_file="snpData.csv",
                       subjectMetadata_file="subjectMetadata.csv",
                       snpMetadata_file="snpMetadata.csv", blocksize = 50)
{
    require(ncdf)
    snpMeta <- scan(snpMetadata_file, what = character(0), skip = 1,
                    quiet = TRUE)
    subjectMeta <- read.csv(subjectMetadata_file, sep = "," )
    NCOLS = length(snpMeta)
    NROWS = nrow(subjectMeta)

    ### Define ncdf dimensions 
    snpDim <- dim.def.ncdf(name = "snpDim", units = "id", 1:NCOLS)
    subjDim <- dim.def.ncdf(name = "subjectDim", units = "id", 1:NROWS)

    ### define variables to be stored in netcdf 
    snpDat <- var.def.ncdf(name = "snpData", 
                           units = "0: missing, 1: AA, 2: AB, 3: BB",
                           dim = list( subjDim, snpDim),
                           missval =  0, prec="byte")

    ### create the NetCDF file 
    nc <- create.ncdf(ncdf_file, list(snpDat))
    close.ncdf(nc)

    ### Write the snp Matrix
    indx <- 0
    nc <- open.ncdf(ncdf_file, write = TRUE)
    while(indx < NROWS) {
          dat <- matrix(scan(snpData_file, what = character(0), sep =",", 
                skip = indx, nlines = blocksize, quiet = TRUE), ncol = NCOLS, 
                byrow = TRUE)
        put.var.ncdf(nc, varid = "snpData", vals = dat, start=c(indx + 1, 1), 
                count =c(blocksize, NCOLS))
        indx <- indx + blocksize
        if(indx != NROWS && (indx + blocksize) > NROWS)
            blocksize <- NROWS - indx
        cat(indx, "\n")
    }
    close.ncdf(nc)
}


### Checks two rows of the NetCDF file for consistency with respect to the
### original data file "snpData.csv"

### Inputs  ncdf_file: name of existing NetCDF file
###         snpData_file: name of CSV file containing the GWAS experimental data
###         subjectMetadata_file: name of CSV file containing the subject metadata
###         snpMetadata_file: name of CSV file containing the SNP metadata
###         check_rows: the 1-based index of the rows to check
testNcdf <- function(ncdf_file,
                     snpData_file="snpData.csv",
                     subjectMetadata_file="subjectMetadata.csv",
                     snpMetadata_file="snpMetadata.csv",
                     check_rows=c(601, 1000))
{
    require(ncdf)
    require(RUnit)
    snpMeta <- scan(snpMetadata_file, what = character(0), skip = 1, quiet =
        TRUE)
    subjectMeta <- read.csv(subjectMetadata_file, sep = "," )
    NCOLS = length(snpMeta)
    NROWS = nrow(subjectMeta)

    ## Reopen the NetCDF file, and read in the variables 
    ## Check if read values match those from the file
    nc <- open.ncdf(ncdf_file, write = FALSE)

    for (rownb in check_rows) {
        dat <- get.var.ncdf(nc, varid = "snpData", start= c(rownb,1),
                            count = c(1,NCOLS))
        origDat <- as.numeric(scan(snpData_file, what = character(0), sep =",",
                                   skip = rownb - 1, nlines = 1, quiet = TRUE))
        checkEquals(as.numeric(dat), origDat)
    }

    close.ncdf(nc)
}

## Retrives columns in the range specified by rng
getCols <- function(ncdf_file, rng)
{
    rng <- sort(rng)
    if (length(rng) != 2)
        stop("rng has to be a vector of length 2")
    nc <- open.ncdf(ncdf_file, write = FALSE)
    dat <- get.var.ncdf(nc, varid = "snpData", start= c(1,rng[1]),
        count = c(1000, rng[2] - rng[1] + 1))
    close.ncdf(nc)
    structure(as.raw(dat), dim = dim(dat))
}

serializeNcdfAsRawMatrix <- function(ncdf_file="snpData.nc", objname="snpData")
{
    nc <- open.ncdf(ncdf_file, write = FALSE)
    nrow <- nc$var[[1]]$dim[[1]]$len
    ncol <- nc$var[[1]]$dim[[2]]$len
    dat <- get.var.ncdf(nc, varid = "snpData", start = c(1,1),
                        count = c(nrow,ncol))
    dat <- structure(as.raw(dat), dim = dim(dat))
    assign(objname, dat)
    filename <- paste(objname, ".rda", sep="")
    save(list=objname, file=filename)
    close.ncdf(nc)
}

