## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----eval = FALSE-------------------------------------------------------------
# # Install from CRAN
# install.packages("tidylearn")
# 
# # Or install development version from GitHub
# # devtools::install_github("ces0491/tidylearn")

## ----setup--------------------------------------------------------------------
library(tidylearn)
library(dplyr)

## -----------------------------------------------------------------------------
# Classification with logistic regression
model_logistic <- tl_model(iris, Species ~ ., method = "logistic")
print(model_logistic)

## -----------------------------------------------------------------------------
# Make predictions
predictions <- predict(model_logistic)
head(predictions)

## -----------------------------------------------------------------------------
# Regression with linear model
model_linear <- tl_model(mtcars, mpg ~ wt + hp, method = "linear")
print(model_linear)

## -----------------------------------------------------------------------------
# Predictions
predictions_reg <- predict(model_linear)
head(predictions_reg)

## -----------------------------------------------------------------------------
# Principal Component Analysis
model_pca <- tl_model(iris[, 1:4], method = "pca")
print(model_pca)

## -----------------------------------------------------------------------------
# Transform data
transformed <- predict(model_pca)
head(transformed)

## -----------------------------------------------------------------------------
# K-means clustering
model_kmeans <- tl_model(iris[, 1:4], method = "kmeans", k = 3)
print(model_kmeans)

## -----------------------------------------------------------------------------
# Get cluster assignments
clusters <- model_kmeans$fit$clusters
head(clusters)

## -----------------------------------------------------------------------------
# Compare with actual species
table(clusters$cluster, iris$Species)

## -----------------------------------------------------------------------------
# Prepare data with multiple preprocessing steps
processed <- tl_prepare_data(
  iris,
  Species ~ .,
  impute_method = "mean",
  scale_method = "standardize",
  encode_categorical = FALSE
)

## -----------------------------------------------------------------------------
# Check preprocessing steps applied
names(processed$preprocessing_steps)

## -----------------------------------------------------------------------------
# Use processed data for modeling
model_processed <- tl_model(processed$data, Species ~ ., method = "forest")

## -----------------------------------------------------------------------------
# Simple random split
split <- tl_split(iris, prop = 0.7, seed = 123)

# Train model
model_train <- tl_model(split$train, Species ~ ., method = "logistic")

# Test predictions
predictions_test <- predict(model_train, new_data = split$test)
head(predictions_test)

## -----------------------------------------------------------------------------
# Stratified split (maintains class proportions)
split_strat <- tl_split(iris, prop = 0.7, stratify = "Species", seed = 123)

# Check proportions are maintained
prop.table(table(split_strat$train$Species))
prop.table(table(split_strat$test$Species))
prop.table(table(iris$Species))

## -----------------------------------------------------------------------------
# Example: Access the raw randomForest object
model_forest <- tl_model(iris, Species ~ ., method = "forest")
class(model_forest$fit)  # This is the randomForest object

# Use package-specific functions if needed
# randomForest::varImpPlot(model_forest$fit)

## -----------------------------------------------------------------------------
# Quick example combining everything
data_split <- tl_split(iris, prop = 0.7, stratify = "Species", seed = 42)
data_prep <- tl_prepare_data(data_split$train, Species ~ ., scale_method = "standardize")
model_final <- tl_model(data_prep$data, Species ~ ., method = "forest")
test_preds <- predict(model_final, new_data = data_split$test)

print(model_final)