## ---- echo = FALSE, results = "asis"------------------------------------- options(width = 130) BiocStyle::markdown() set.seed(44) ## ---- message = FALSE---------------------------------------------------- library(ClassifyR) library(ggplot2) library(curatedOvarianData) data(GSE26712_eset) GSE26712_eset <- GSE26712_eset[1:1000, ] ## ---- results = "hold", tidy = FALSE------------------------------------- curatedClinical <- pData(GSE26712_eset) ovarPoor <- curatedClinical[, "vital_status"] == "deceased" & curatedClinical[, "days_to_death"] < 365 * 1 ovarGood <- curatedClinical[, "vital_status"] == "living" & curatedClinical[, "days_to_death"] > 365 * 5 sum(ovarPoor, na.rm = TRUE) sum(ovarGood, na.rm = TRUE) ## ------------------------------------------------------------------------ ovarExpression <- exprs(GSE26712_eset)[, c(which(ovarPoor), which(ovarGood))] ovarGroups <- factor(rep(c("Poor", "Good"), c(length(which(ovarPoor)), length(which(ovarGood)))), levels = c("Poor", "Good")) ## ---- fig.width = 18, fig.height = 10, tidy = FALSE---------------------- plotData <- data.frame(expression = as.numeric(ovarExpression), sample = factor(rep(1:ncol(ovarExpression), each = nrow(ovarExpression)))) ggplot(plotData, aes(x = sample, y = expression)) + geom_boxplot() + scale_y_continuous(limits = c(0, 15)) + xlab("Sample") + ylab("Expression Value") + ggtitle("Expression for All Arrays") ## ---- tidy = FALSE------------------------------------------------------- groupsTable <- data.frame(class = ovarGroups) rownames(groupsTable) <- colnames(ovarExpression) ovarSet <- ExpressionSet(ovarExpression, AnnotatedDataFrame(groupsTable)) featureNames(ovarSet) <- rownames(ovarExpression) dim(ovarSet) ## ---- tidy = FALSE------------------------------------------------------- library(sparsediscrim) DEresults <- runTests(ovarSet, "Ovarian Cancer", "Differential Expression", validation = "bootstrap", resamples = 5, folds = 3, params = list(SelectParams(limmaSelection, resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, 100), performanceType = "balanced", better = "lower")), TrainParams(dlda, TRUE, doesTests = FALSE), PredictParams(predict, TRUE, getClasses = function(result) result[["class"]])), parallelParams = bpparam(), verbose = 1) DEresults ## ---- fig.height = 12, fig.width = 12, results = "hold", message = FALSE---- DEplots <- plotFeatureClasses(ovarSet, features(DEresults)[[1]][[2]][1:5]) ## ------------------------------------------------------------------------ DEresults <- calcPerformance(DEresults, "balanced") DEresults performance(DEresults) ## ------------------------------------------------------------------------ DEresults <- calcPerformance(DEresults, "mat") DEresults performance(DEresults) ## ---- tidy = FALSE------------------------------------------------------- DVresults <- runTests(ovarSet, "Ovarian Cancer", "Differential Variability", validation = "bootstrap", resamples = 2, folds = 4, params = list(SelectParams(leveneSelection, resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, 100), performanceType = "balanced", better = "lower")), TransformParams(subtractFromLocation, location = "median"), TrainParams(fisherDiscriminant, FALSE, doesTests = TRUE), PredictParams(predictor = function(){}, FALSE, getClasses = function(result) result, returnType = "both")), verbose = 1) DVresults ## ---- fig.height = 12, fig.width = 12, results = "hold", message = FALSE---- DVplots <- plotFeatureClasses(ovarSet, features(DVresults)[[1]][[2]][1:5]) ## ------------------------------------------------------------------------ DVresults <- calcPerformance(DVresults, "balanced") DVresults performance(DVresults) ## ---- tidy = FALSE------------------------------------------------------- dParams <- list(bw = "nrd0", n = 4096, from = expression(min(featureValues)), to = expression(max(featureValues))) DDresults <- runTests(ovarSet, "Ovarian Cancer", "Differential Distribution", validation = "bootstrap", resamples = 2, folds = 2, params = list(SelectParams(KullbackLeiblerSelection, resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, 100), performanceType = "balanced", better = "lower")), TrainParams(naiveBayesKernel, FALSE, doesTests = TRUE), PredictParams(predictor = function(){}, FALSE, getClasses = function(result) result, weighted = "weighted", returnType = "both", densityParameters = dParams)), verbose = 1) DDresults ## ---- fig.height = 12, fig.width = 12, results = "hold", message = FALSE---- DDplots <- plotFeatureClasses(ovarSet, features(DDresults[[1]])[[1]][[1]][1:5]) ## ------------------------------------------------------------------------ DDresults[["weight=crossover distance"]] <- calcPerformance(DDresults[["weight=crossover distance"]], "balanced") DDresults[["weight=crossover distance"]] performance(DDresults[["weight=crossover distance"]]) ## ---- fig.width = 10, fig.height = 7------------------------------------- library(grid) resultsList <- list(Expression = DEresults, Variability = DVresults) errorPlot <- errorMap(resultsList) ## ---- fig.width = 6.8, fig.height = 5------------------------------------ errorBoxes <- performancePlot(list(DEresults, DVresults, DDresults[["weight=crossover distance"]]), performanceName = "Balanced Error Rate", boxFillColouring = "None", boxLineColouring = "None", title = "Errors Across Classification Types") ## ------------------------------------------------------------------------ trainingExpr <- matrix(rnorm(500 * 50, 9, 3), ncol = 50) trainingClasses <- factor(rep(c("Healthy", "Diseased"), each = 25), levels = c("Healthy", "Diseased")) trainingExpr[101:125, trainingClasses == "Diseased"] <- trainingExpr[101:125, trainingClasses == "Diseased"] - 2 testingExpr <- matrix(rnorm(500 * 50, 9, 3), ncol = 50) testingClasses <- factor(rep(c("Healthy", "Diseased"), each = 25), levels = c("Healthy", "Diseased")) testingExpr[111:135, testingClasses == "Diseased"] <- testingExpr[111:135, testingClasses == "Diseased"] - 2 ## ------------------------------------------------------------------------ allExpr <- cbind(trainingExpr, testingExpr) allClasses <- unlist(list(trainingClasses, testingClasses)) independentResult <- runTest(allExpr, allClasses, datasetName = "Simulation", classificationName = "DE", training = 1:50, testing = 51:100) independentResult ## ---- fig.height = 5, fig.width = 6-------------------------------------- ROCcurves <- ROCplot(list(DVresults, DDresults[["weight=crossover distance"]])) ## ------------------------------------------------------------------------ library(e1071) # Provides SVM functions. resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, seq(100, 1000, 100)), performanceType = "balanced", better = "lower") SVMresults <- suppressWarnings(runTests(ovarSet, "Ovarian Cancer", "Differential Expression", validation = "bootstrap", resamples = 5, folds = 3, params = list(SelectParams(limmaSelection, resubstituteParams = resubstituteParams), TrainParams(svm, TRUE, doesTests = FALSE, kernel = "linear", resubstituteParams = resubstituteParams, tuneParams = list(cost = c(0.01, 0.1, 1, 10))), PredictParams(predict, TRUE, getClasses = function(result) result)), parallelParams = bpparam(), verbose = 1)) ## ------------------------------------------------------------------------ length(tunedParameters(SVMresults)) tunedParameters(SVMresults)[[1]]