## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(eval = TRUE) ## ----------------------------------------------------------------------------- # install.packages("ggmlR") # once on CRAN library(ggmlR) ## ----------------------------------------------------------------------------- data(iris) set.seed(42) x <- as.matrix(iris[, 1:4]) x <- scale(x) # standardise # one-hot encode species y <- model.matrix(~ Species - 1, iris) # [150 x 3] idx <- sample(nrow(x)) n_tr <- 120L x_tr <- x[idx[1:n_tr], ] y_tr <- y[idx[1:n_tr], ] x_val <- x[idx[(n_tr+1):150], ] y_val <- y[idx[(n_tr+1):150], ] ## ----------------------------------------------------------------------------- model <- ggml_model_sequential() |> ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> ggml_layer_batch_norm() |> ggml_layer_dropout(0.3, stochastic = TRUE) |> ggml_layer_dense(16L, activation = "relu") |> ggml_layer_dense(3L, activation = "softmax") |> ggml_compile( optimizer = "adam", loss = "categorical_crossentropy", metrics = c("accuracy") ) print(model) ## ----------------------------------------------------------------------------- model <- ggml_fit( model, x_tr, y_tr, epochs = 100L, batch_size = 32L, validation_split = 0.0, # we supply our own val set below verbose = 0L ) ## ----------------------------------------------------------------------------- score <- ggml_evaluate(model, x_val, y_val, batch_size = 16L) cat(sprintf("Val loss: %.4f Val accuracy: %.4f\n", score$loss, score$accuracy)) probs <- ggml_predict(model, x_val, batch_size = 16L) classes <- apply(probs, 1, which.max) true <- apply(y_val, 1, which.max) cat("Confusion matrix:\n") print(table(true = true, predicted = classes)) ## ----------------------------------------------------------------------------- path <- tempfile(fileext = ".rds") ggml_save_model(model, path) cat(sprintf("Saved: %s (%.1f KB)\n", path, file.size(path) / 1024)) model2 <- ggml_load_model(path) score2 <- ggml_evaluate(model2, x_val, y_val, batch_size = 16L) cat(sprintf("Reloaded model accuracy: %.4f\n", score2$accuracy)) ## ----------------------------------------------------------------------------- inp <- ggml_input(shape = 4L) h <- inp |> ggml_layer_dense(32L, activation = "relu") |> ggml_layer_batch_norm() h <- h |> ggml_layer_dense(16L, activation = "relu") out <- h |> ggml_layer_dense(3L, activation = "softmax") model_fn <- ggml_model(inputs = inp, outputs = out) |> ggml_compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = c("accuracy")) model_fn <- ggml_fit(model_fn, x_tr, y_tr, epochs = 100L, batch_size = 32L, verbose = 0L) score_fn <- ggml_evaluate(model_fn, x_val, y_val, batch_size = 16L) cat(sprintf("Functional model accuracy: %.4f\n", score_fn$accuracy)) ## ----------------------------------------------------------------------------- data(mtcars) set.seed(7) # Input group 1: engine (disp, hp, wt) # Input group 2: transmission / gearbox (cyl, gear, carb, am) engine <- as.matrix(scale(mtcars[, c("disp","hp","wt")])) trans <- as.matrix(scale(mtcars[, c("cyl","gear","carb","am")])) y_mpg <- matrix(scale(mtcars$mpg), ncol = 1L) # [32 x 1] # small dataset — use all for training, evaluate on same data for demo x1 <- engine; x2 <- trans inp1 <- ggml_input(shape = 3L, name = "engine") inp2 <- ggml_input(shape = 4L, name = "transmission") branch1 <- inp1 |> ggml_layer_dense(16L, activation = "relu") branch2 <- inp2 |> ggml_layer_dense(16L, activation = "relu") merged <- ggml_layer_add(list(branch1, branch2)) # element-wise add out_reg <- merged |> ggml_layer_dense(8L, activation = "relu") |> ggml_layer_dense(1L) model_reg <- ggml_model(inputs = list(inp1, inp2), outputs = out_reg) |> ggml_compile(optimizer = "adam", loss = "mse") model_reg <- ggml_fit(model_reg, x = list(x1, x2), y = y_mpg, epochs = 200L, batch_size = 16L, verbose = 0L) preds <- ggml_predict(model_reg, x = list(x1, x2), batch_size = 16L) cat(sprintf("Pearson r (scaled mpg): %.4f\n", cor(preds, y_mpg))) ## ----------------------------------------------------------------------------- cb_stop <- ggml_callback_early_stopping( monitor = "val_loss", patience = 15L, min_delta = 1e-4 ) model_cb <- ggml_model_sequential() |> ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> ggml_layer_dense(3L, activation = "softmax") |> ggml_compile(optimizer = "adam", loss = "categorical_crossentropy") model_cb <- ggml_fit(model_cb, x_tr, y_tr, epochs = 300L, batch_size = 32L, validation_split = 0.1, callbacks = list(cb_stop), verbose = 0L) ## ----------------------------------------------------------------------------- # Cosine annealing cb_cosine <- ggml_schedule_cosine_decay(T_max = 100L, eta_min = 1e-5) # Step decay: halve LR every 30 epochs cb_step <- ggml_schedule_step_decay(step_size = 30L, gamma = 0.5) # Reduce on plateau cb_plateau <- ggml_schedule_reduce_on_plateau( monitor = "val_loss", factor = 0.5, patience = 10L, min_lr = 1e-6 ) model_lr <- ggml_model_sequential() |> ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> ggml_layer_dense(3L, activation = "softmax") |> ggml_compile(optimizer = "adam", loss = "categorical_crossentropy") model_lr <- ggml_fit(model_lr, x_tr, y_tr, epochs = 150L, batch_size = 32L, validation_split = 0.1, callbacks = list(cb_cosine), verbose = 0L) ## ----------------------------------------------------------------------------- # transpose: rows = features, cols = samples x_tr_ag <- t(x_tr) # [4, 120] y_tr_ag <- t(y_tr) # [3, 120] x_val_ag <- t(x_val) # [4, 30] y_val_ag <- t(y_val) ## ----------------------------------------------------------------------------- ag_mod <- ag_sequential( ag_linear(4L, 32L, activation = "relu"), ag_batch_norm(32L), ag_dropout(0.3), ag_linear(32L, 16L, activation = "relu"), ag_linear(16L, 3L) ) params <- ag_mod$parameters() opt <- optimizer_adam(params, lr = 1e-3) ## ----------------------------------------------------------------------------- BS <- 32L n <- ncol(x_tr_ag) ag_train(ag_mod) set.seed(42) for (ep in seq_len(150L)) { perm <- sample(n) for (b in seq_len(ceiling(n / BS))) { idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))] xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) yb <- y_tr_ag[, idx, drop = FALSE] with_grad_tape({ loss <- ag_softmax_cross_entropy_loss(ag_mod$forward(xb), yb) }) grads <- backward(loss) opt$step(grads) opt$zero_grad() } if (ep %% 50L == 0L) cat(sprintf("epoch %d loss %.4f\n", ep, loss$data[1])) } ## ----------------------------------------------------------------------------- ag_eval(ag_mod) # forward in chunks, apply softmax manually ag_predict_cm <- function(model, x_cm, chunk = 64L) { n <- ncol(x_cm) out <- matrix(0.0, nrow(model$forward(ag_tensor(x_cm[,1,drop=FALSE]))$data), n) for (s in seq(1L, n, by = chunk)) { e <- min(s + chunk - 1L, n) lg <- model$forward(ag_tensor(x_cm[, s:e, drop = FALSE]))$data ev <- exp(lg - apply(lg, 2, max)) out[, s:e] <- ev / colSums(ev) } out } probs_ag <- ag_predict_cm(ag_mod, x_val_ag) # [3, 30] pred_ag <- apply(probs_ag, 2, which.max) true_ag <- apply(y_val_ag, 1, which.max) # col-major: rows = classes acc_ag <- mean(pred_ag == true_ag) cat(sprintf("Autograd val accuracy: %.4f\n", acc_ag)) ## ----------------------------------------------------------------------------- ag_mod2 <- ag_sequential( ag_linear(4L, 64L, activation = "relu"), ag_batch_norm(64L), ag_dropout(0.3), ag_linear(64L, 32L, activation = "relu"), ag_linear(32L, 3L) ) params2 <- ag_mod2$parameters() opt2 <- optimizer_adam(params2, lr = 1e-3) sch2 <- lr_scheduler_cosine(opt2, T_max = 150L, lr_min = 1e-5) ag_train(ag_mod2) set.seed(42) for (ep in seq_len(150L)) { perm <- sample(n) for (b in seq_len(ceiling(n / BS))) { idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))] xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) yb <- y_tr_ag[, idx, drop = FALSE] with_grad_tape({ loss2 <- ag_softmax_cross_entropy_loss(ag_mod2$forward(xb), yb) }) grads2 <- backward(loss2) clip_grad_norm(params2, grads2, max_norm = 5.0) opt2$step(grads2) opt2$zero_grad() } sch2$step() } ag_eval(ag_mod2) probs2 <- ag_predict_cm(ag_mod2, x_val_ag) acc2 <- mean(apply(probs2, 2, which.max) == true_ag) cat(sprintf("ag + cosine + clip val accuracy: %.4f\n", acc2)) ## ----------------------------------------------------------------------------- make_net <- function(n_in, n_hidden, n_out) { W1 <- ag_param(matrix(rnorm(n_hidden * n_in) * sqrt(2/n_in), n_hidden, n_in)) b1 <- ag_param(matrix(0.0, n_hidden, 1L)) W2 <- ag_param(matrix(rnorm(n_out * n_hidden) * sqrt(2/n_hidden), n_out, n_hidden)) b2 <- ag_param(matrix(0.0, n_out, 1L)) list( forward = function(x) ag_add(ag_matmul(W2, ag_relu(ag_add(ag_matmul(W1, x), b1))), b2), parameters = function() list(W1=W1, b1=b1, W2=W2, b2=b2) ) } set.seed(1) net <- make_net(4L, 32L, 3L) opt_r <- optimizer_adam(net$parameters(), lr = 1e-3) for (ep in seq_len(200L)) { perm <- sample(n) for (b in seq_len(ceiling(n / BS))) { idx <- perm[seq((b-1L)*BS+1L, min(b*BS, n))] xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) yb <- y_tr_ag[, idx, drop = FALSE] with_grad_tape({ loss_r <- ag_softmax_cross_entropy_loss(net$forward(xb), yb) }) gr <- backward(loss_r) opt_r$step(gr); opt_r$zero_grad() } } probs_r <- ag_predict_cm(net, x_val_ag) acc_r <- mean(apply(probs_r, 2, which.max) == true_ag) cat(sprintf("Raw ag_param val accuracy: %.4f\n", acc_r)) ## ----------------------------------------------------------------------------- # Use Vulkan GPU if available, fall back to CPU device <- tryCatch({ ag_device("gpu") "gpu" }, error = function(e) "cpu") cat("Running on:", device, "\n") # Mixed precision (f16 on GPU, f32 on CPU) ag_dtype(if (device == "gpu") "f16" else "f32")