## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(roclab) ## ----------------------------------------------------------------------------- set.seed(123) n_lin <- 1500 n_pos_lin <- round(0.2 * n_lin) n_neg_lin <- n_lin - n_pos_lin X_train_lin <- rbind( matrix(rnorm(2 * n_neg_lin, mean = -1), ncol = 2), matrix(rnorm(2 * n_pos_lin, mean = 1), ncol = 2) ) y_train_lin <- c(rep(-1, n_neg_lin), rep(1, n_pos_lin)) # Fit a linear model fit_lin <- roclearn(X_train_lin, y_train_lin, lambda = 0.1) # Summary summary(fit_lin) n_test_lin <- 300 n_pos_test_lin <- round(0.2 * n_test_lin) n_neg_test_lin <- n_test_lin - n_pos_test_lin X_test_lin <- rbind( matrix(rnorm(2 * n_neg_test_lin, mean = -1), ncol = 2), matrix(rnorm(2 * n_pos_test_lin, mean = 1), ncol = 2) ) y_test_lin <- c(rep(-1, n_neg_test_lin), rep(1, n_pos_test_lin)) # Predict decision scores pred_score_lin <- predict(fit_lin, X_test_lin, type = "response") head(pred_score_lin) # Predict classes {-1, 1} pred_class_lin <- predict(fit_lin, X_test_lin, type = "class") head(pred_class_lin) # AUC on the test set auc(fit_lin, X_test_lin, y_test_lin) ## ----------------------------------------------------------------------------- set.seed(123) n_ker <- 1500 r_train_ker <- sqrt(runif(n_ker, 0.05, 1)) theta_train_ker <- runif(n_ker, 0, 2*pi) X_train_ker <- cbind(r_train_ker * cos(theta_train_ker), r_train_ker * sin(theta_train_ker)) y_train_ker <- ifelse(r_train_ker < 0.5, 1, -1) # Fit a kernel model fit_ker <- kroclearn(X_train_ker, y_train_ker, lambda = 0.1, kernel = "radial") # Summary summary(fit_ker) n_test_ker <- 300 r_test_ker <- sqrt(runif(n_test_ker, 0.05, 1)) theta_test_ker <- runif(n_test_ker, 0, 2*pi) X_test_ker <- cbind(r_test_ker * cos(theta_test_ker), r_test_ker * sin(theta_test_ker)) y_test_ker <- ifelse(r_test_ker < 0.5, 1, -1) # Predict decision scores pred_score_ker <- predict(fit_ker, X_test_ker, type = "response") head(pred_score_ker) # Predict classes {-1, 1} pred_class_ker <- predict(fit_ker, X_test_ker, type = "class") head(pred_class_ker) # AUC on the test set auc(fit_ker, X_test_ker, y_test_ker) ## ----------------------------------------------------------------------------- # 5-fold CV for linear models cvfit_lin <- cv.roclearn( X_train_lin, y_train_lin, lambda.vec = exp(seq(log(0.01), log(5), length.out = 20)), nfolds = 5 ) # Summarize the cross-validation result summary(cvfit_lin) ## ----fig.width=7, fig.height=6------------------------------------------------ # Plot the cross-validation AUC across lambda values plot(cvfit_lin) ## ----------------------------------------------------------------------------- # 5-fold CV for kernel models cvfit_ker <- cv.kroclearn( X_train_ker, y_train_ker, lambda.vec = exp(seq(log(0.01), log(5), length.out = 20)), kernel = "radial", nfolds = 5 ) # Summarize the cross-validation result summary(cvfit_ker) ## ----fig.width=7, fig.height=6------------------------------------------------ # Plot the cross-validation AUC across lambda values plot(cvfit_ker) ## ----------------------------------------------------------------------------- library(mlbench) data(Ionosphere) # Prepare data X_iono <- Ionosphere[, -35] y_iono <- ifelse(Ionosphere$Class == "bad", 1, -1) set.seed(123) train_idx <- sample(seq_len(nrow(X_iono)), size = 200) X_train_iono <- X_iono[train_idx, ] y_train_iono <- y_iono[train_idx] X_test_iono <- X_iono[-train_idx, ] y_test_iono <- y_iono[-train_idx] # Fit a linear model fit_iono_lin <- roclearn(X_train_iono, y_train_iono, lambda = 0.1, approx=TRUE) summary(fit_iono_lin) # Predict decision scores pred_score_iono_lin <- predict(fit_iono_lin, X_test_iono, type = "response") head(pred_score_iono_lin) # Predict classes {-1, 1} pred_class_iono_lin <- predict(fit_iono_lin, X_test_iono, type = "class") head(pred_class_iono_lin) # AUC on the test set auc(fit_iono_lin, X_test_iono, y_test_iono) ## ----------------------------------------------------------------------------- # 5-fold CV for linear models cvfit_iono_lin <- cv.roclearn( X_train_iono, y_train_iono, lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)), approx=TRUE, nfolds=5) summary(cvfit_iono_lin) ## ----fig.width=7, fig.height=6------------------------------------------------ # Plot the cross-validation AUC across lambda values plot(cvfit_iono_lin) ## ----------------------------------------------------------------------------- # Fit a kernel model fit_iono_ker <- kroclearn(X_train_iono, y_train_iono, lambda = 0.1, kernel = "radial", approx=TRUE) summary(fit_iono_ker) # Predict decision scores pred_score_iono_ker <- predict(fit_iono_ker, X_test_iono, type = "response") head(pred_score_iono_ker) # Predict classes {-1, 1} pred_class_iono_ker <- predict(fit_iono_ker, X_test_iono, type = "class") head(pred_class_iono_ker) # AUC on the test set auc(fit_iono_ker, X_test_iono, y_test_iono) ## ----------------------------------------------------------------------------- # 5-fold CV for kernel models cvfit_iono_ker <- cv.kroclearn( X_train_iono, y_train_iono, lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)), kernel = "radial", approx=TRUE, nfolds=5) summary(cvfit_iono_ker) ## ----fig.width=7, fig.height=6------------------------------------------------ # Plot the cross-validation AUC across lambda values plot(cvfit_iono_ker) ## ----------------------------------------------------------------------------- library(kernlab) data(spam) # Prepare data X_spam <- spam[, -58] y_spam <- ifelse(spam$type == "spam", 1, -1) set.seed(123) train_idx <- sample(seq_len(nrow(X_spam)), size = 3000) X_train_spam <- X_spam[train_idx, ] y_train_spam <- y_spam[train_idx] X_test_spam <- X_spam[-train_idx, ] y_test_spam <- y_spam[-train_idx] # Fit a linear model fit_spam_lin <- roclearn(X_train_spam, y_train_spam, lambda = 0.1) summary(fit_spam_lin) # Predict decision scores pred_score_spam_lin <- predict(fit_spam_lin, X_test_spam, type = "response") head(pred_score_spam_lin) # Predict classes {-1, 1} pred_class_spam_lin <- predict(fit_spam_lin, X_test_spam, type = "class") head(pred_class_spam_lin) # AUC on the test set auc(fit_spam_lin, X_test_spam, y_test_spam) ## ----------------------------------------------------------------------------- # 5-fold CV for linear models cvfit_spam_lin <- cv.roclearn( X_train_spam, y_train_spam, lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)), nfolds=5) summary(cvfit_spam_lin) ## ----fig.width=7, fig.height=6------------------------------------------------ # Plot the cross-validation AUC across lambda values plot(cvfit_spam_lin) ## ----------------------------------------------------------------------------- # Fit a kernel model fit_spam_ker <- kroclearn(X_train_spam, y_train_spam, lambda = 0.1, kernel = "radial") summary(fit_spam_ker) # Predict decision scores pred_score_spam_ker <- predict(fit_spam_ker, X_test_spam, type = "response") head(pred_score_spam_ker) # Predict classes {-1, 1} pred_class_spam_ker <- predict(fit_spam_ker, X_test_spam, type = "class") head(pred_class_spam_ker) # AUC on the test set auc(fit_spam_ker, X_test_spam, y_test_spam) ## ----------------------------------------------------------------------------- # 5-fold CV for kernel models cvfit_spam_ker <- cv.kroclearn( X_train_spam, y_train_spam, kernel = "radial", lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)), nfolds=5) summary(cvfit_spam_ker) ## ----fig.width=7, fig.height=6------------------------------------------------ # Plot the cross-validation AUC across lambda values plot(cvfit_spam_ker)