## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----eval = FALSE------------------------------------------------------------- # # Install from CRAN (when available): # install.packages("e2tree") # # # Or install the development version from GitHub: # # devtools::install_github("massimoaria/e2tree") ## ----classification-setup----------------------------------------------------- library(e2tree) data(iris) set.seed(42) smp_size <- floor(0.75 * nrow(iris)) train_ind <- sample(seq_len(nrow(iris)), size = smp_size) training <- iris[train_ind, ] validation <- iris[-train_ind, ] ## ----classification-ensemble-------------------------------------------------- if (!require("randomForest")) install.packages("randomForest", repos="https://cran.r-project.org") library(randomForest) ensemble <- randomForest(Species ~ ., data = training, importance = TRUE, proximity = TRUE) ensemble ## ----classification-dismatrix------------------------------------------------- D <- createDisMatrix(ensemble, data = training, label = "Species", parallel = list(active = FALSE, no_cores = 1)) dim(D) ## ----classification-tree------------------------------------------------------ setting <- list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5) tree <- e2tree(Species ~ ., training, D, ensemble, setting) ## ----classification-print----------------------------------------------------- print(tree) ## ----classification-summary--------------------------------------------------- summary(tree) ## ----classification-nodes----------------------------------------------------- # Terminal nodes only nodes(tree, terminal = TRUE) ## ----classification-plot, fig.width = 8, fig.height = 6----------------------- plot(tree, ensemble = ensemble, main = "E2Tree - Iris Classification") ## ----classification-rpart, fig.width = 8, fig.height = 6---------------------- # Convert to rpart rpart_obj <- as.rpart(tree, ensemble) # Convert to partykit (if installed) if (requireNamespace("partykit", quietly = TRUE)) { party_obj <- partykit::as.party(tree) plot(party_obj) } ## ----classification-predict--------------------------------------------------- pred <- predict(tree, newdata = validation) head(pred) ## ----classification-fitted---------------------------------------------------- head(fitted(tree)) ## ----classification-vimp------------------------------------------------------ vi <- vimp(tree, data = training) vi$vimp vi$g_imp ## ----regression-setup--------------------------------------------------------- data(mtcars) set.seed(123) smp_size <- floor(0.75 * nrow(mtcars)) train_ind <- sample(seq_len(nrow(mtcars)), size = smp_size) training_reg <- mtcars[train_ind, ] validation_reg <- mtcars[-train_ind, ] ## ----regression-ensemble------------------------------------------------------ ensemble_reg <- randomForest(mpg ~ ., data = training_reg, ntree = 500, importance = TRUE, proximity = TRUE) ## ----regression-dismatrix----------------------------------------------------- D_reg <- createDisMatrix(ensemble_reg, data = training_reg, label = "mpg", parallel = list(active = FALSE, no_cores = 1)) ## ----regression-tree---------------------------------------------------------- setting_reg <- list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5) tree_reg <- e2tree(mpg ~ ., training_reg, D_reg, ensemble_reg, setting_reg) print(tree_reg) ## ----regression-plot, fig.width = 8, fig.height = 6--------------------------- plot(tree_reg, ensemble = ensemble_reg, main = "E2Tree - MPG Regression") ## ----regression-predict------------------------------------------------------- pred_reg <- predict(tree_reg, newdata = validation_reg) head(pred_reg) ## ----regression-residuals----------------------------------------------------- res <- residuals(tree_reg) summary(res) ## ----val-mantel--------------------------------------------------------------- val_mantel <- eValidation(training, tree, D, test = "mantel", graph = FALSE) print(val_mantel) ## ----val-measures------------------------------------------------------------- val_meas <- eValidation(training, tree, D, test = "measures", graph = FALSE, n_perm = 499) print(val_meas) ## ----val-measures-table------------------------------------------------------- measures(val_meas) ## ----val-plot, fig.width = 8, fig.height = 7---------------------------------- val_full <- eValidation(training, tree, D, test = "both", graph = FALSE, n_perm = 499) plot(val_full) ## ----loi-classification------------------------------------------------------- prox <- proximity(val_full) result <- loi(prox$ensemble, prox$e2tree) summary(result) ## ----loi-plot----------------------------------------------------------------- plot(result) ## ----loi-perm----------------------------------------------------------------- perm <- loi_perm(prox$ensemble, prox$e2tree, n_perm = 499, seed = 42) print(perm) plot(perm) ## ----val-regression----------------------------------------------------------- val_reg <- eValidation(training_reg, tree_reg, D_reg, test = "measures", graph = FALSE, n_perm = 499) print(val_reg) ## ----session-info------------------------------------------------------------- sessionInfo()