## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE # examples require optional packages; set eval=TRUE locally ) ## ----xgb-clf------------------------------------------------------------------ # library(e2tree) # if (!require("xgboost")) install.packages("xgboost", # repos="https://cran.r-project.org") # library(xgboost) # # data(iris) # set.seed(42) # n <- floor(0.75 * nrow(iris)) # tr <- iris[sample(nrow(iris), n), ] # va <- iris[setdiff(seq_len(nrow(iris)), as.integer(rownames(tr))), ] # # # XGBoost requires a numeric matrix and 0-indexed integer labels # X_tr <- as.matrix(tr[, 1:4]) # y_tr <- as.integer(tr$Species) - 1L # dm_tr <- xgb.DMatrix(data = X_tr, label = y_tr) # # ensemble <- xgb.train( # params = list(objective = "multi:softmax", # num_class = 3, # max_depth = 4, # eta = 0.3), # data = dm_tr, # nrounds = 100, # verbose = 0 # ) # # # Attach the response back to the data.frame so the formula in e2tree() # # can find it; createDisMatrix() will use it to annotate the dissimilarity # # matrix (in classification, `label` is optional but recommended). # tr_xgb <- tr[, 1:4] # tr_xgb$Species <- tr$Species # # D <- createDisMatrix(ensemble, data = tr_xgb, label = "Species", # parallel = list(active = FALSE, no_cores = 1)) # # setting <- list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5) # tree_xgb <- e2tree(Species ~ ., data = tr_xgb, D = D, # ensemble = ensemble, setting = setting) # print(tree_xgb) ## ----xgb-reg------------------------------------------------------------------ # library(xgboost) # # data(mtcars) # set.seed(42) # n <- floor(0.75 * nrow(mtcars)) # tr <- mtcars[sample(nrow(mtcars), n), ] # # X_tr <- as.matrix(tr[, -1]) # y_tr <- tr$mpg # dm_tr <- xgb.DMatrix(data = X_tr, label = y_tr) # # ensemble <- xgb.train( # params = list(objective = "reg:squarederror", max_depth = 4, eta = 0.3), # data = dm_tr, # nrounds = 100, # verbose = 0 # ) # # # `data = tr` carries the response column too; the XGBoost adapter # # automatically trims the matrix to the features used at training time. # D <- createDisMatrix(ensemble, data = tr, label = "mpg", # parallel = list(active = FALSE, no_cores = 1)) # tree <- e2tree(mpg ~ ., data = tr, D = D, ensemble = ensemble, # setting = list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5)) # print(tree) ## ----gbm-clf------------------------------------------------------------------ # if (!require("gbm")) install.packages("gbm", # repos="https://cran.r-project.org") # library(gbm) # # data(iris) # set.seed(42) # df <- iris # df$is_setosa <- as.integer(df$Species == "setosa") # df$is_setosa_fct <- factor(df$is_setosa, levels = c(0L, 1L)) # n <- floor(0.75 * nrow(df)) # tr <- df[sample(nrow(df), n), ] # # ensemble <- gbm(is_setosa ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, # data = tr, # distribution = "bernoulli", # n.trees = 200, # interaction.depth = 4, # verbose = FALSE) # # D <- createDisMatrix(ensemble, # data = tr[, c("Sepal.Length","Sepal.Width", # "Petal.Length","Petal.Width", # "is_setosa_fct")], # label = "is_setosa_fct", # parallel = list(active = FALSE, no_cores = 1)) # tree <- e2tree(is_setosa_fct ~ Sepal.Length + Sepal.Width + # Petal.Length + Petal.Width, # data = tr, D = D, ensemble = ensemble, # setting = list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5)) # print(tree) ## ----gbm-reg------------------------------------------------------------------ # library(gbm) # # data(mtcars) # set.seed(42) # n <- floor(0.75 * nrow(mtcars)) # tr <- mtcars[sample(nrow(mtcars), n), ] # # ensemble <- gbm(mpg ~ ., data = tr, # distribution = "gaussian", # n.trees = 200, # interaction.depth = 4, # n.minobsinnode = 2, # bag.fraction = 0.8, # verbose = FALSE) # # D <- createDisMatrix(ensemble, data = tr, label = "mpg", # parallel = list(active = FALSE, no_cores = 1)) # tree <- e2tree(mpg ~ ., data = tr, D = D, ensemble = ensemble, # setting = list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5)) # print(tree) ## ----lgb-clf------------------------------------------------------------------ # if (!require("lightgbm")) install.packages("lightgbm", # repos="https://cran.r-project.org") # library(lightgbm) # # data(iris) # set.seed(42) # n <- floor(0.75 * nrow(iris)) # tr <- iris[sample(nrow(iris), n), ] # # X_tr <- as.matrix(tr[, 1:4]) # y_tr <- as.integer(tr$Species) - 1L # ds <- lgb.Dataset(X_tr, label = y_tr) # # ensemble <- lgb.train( # params = list(objective = "multiclass", # num_class = 3, # num_leaves = 15, # verbose = -1), # data = ds, # nrounds = 100 # ) # # tr_lgb <- tr[, 1:4] # tr_lgb$Species <- tr$Species # # D <- createDisMatrix(ensemble, data = tr_lgb, label = "Species", # parallel = list(active = FALSE, no_cores = 1)) # # tree <- e2tree(Species ~ ., data = tr_lgb, D = D, ensemble = ensemble, # setting = list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5)) # print(tree) ## ----lgb-reg------------------------------------------------------------------ # library(lightgbm) # # data(mtcars) # set.seed(42) # n <- floor(0.75 * nrow(mtcars)) # tr <- mtcars[sample(nrow(mtcars), n), ] # # X_tr <- as.matrix(tr[, -1]) # y_tr <- tr$mpg # ds <- lgb.Dataset(X_tr, label = y_tr) # # ensemble <- lgb.train( # params = list(objective = "regression", # num_leaves = 8, # min_data_in_leaf = 2, # learning_rate = 0.1, # verbose = -1), # data = ds, # nrounds = 200 # ) # # # Pass the response column to createDisMatrix() via `label`. The # # LightGBM adapter selects the columns it needs through the booster's # # stored feature names, so any extra columns in `data` are ignored. # D <- createDisMatrix(ensemble, data = tr, label = "mpg", # parallel = list(active = FALSE, no_cores = 1)) # tree <- e2tree(mpg ~ ., data = tr, D = D, ensemble = ensemble, # setting = list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5)) # print(tree)