## ----setup, include=FALSE----------------------------------------------------- hasFNN <- requireNamespace("FNN", quietly = TRUE) knitr::opts_chunk$set(echo = TRUE, fig.align = "center") ## ----install, eval=FALSE------------------------------------------------------ # devtools::install_github("yinqiaoyan/spARI", dependencies = TRUE) ## ----install_FNN, eval=FALSE-------------------------------------------------- # install.packages("FNN") # find k-nearest neighbors ## ----------------------------------------------------------------------------- library(spARI) ## ----------------------------------------------------------------------------- data("spARI_example_data") true_labels <- spARI_example_data$true_labels c1_labels <- spARI_example_data$c1_labels c2_labels <- spARI_example_data$c2_labels coords <- spARI_example_data$coords ## ----------------------------------------------------------------------------- res_value1 <- spARI(r_labels=true_labels, c_labels=c1_labels, coords=coords) res_value2 <- spARI(r_labels=true_labels, c_labels=c2_labels, coords=coords) print(res_value1) print(res_value2) ## ----------------------------------------------------------------------------- cols <- c("#D27786","#A5D7F1","#c599f3","#ffb610") par(mfrow=c(1,3), mar=c(3,3,1,1)) plot(coords[, 1], coords[, 2], col=cols[true_labels], pch=16, main="Reference") plot(coords[, 1], coords[, 2], col=cols[c1_labels], pch=16, main="Clustering i") plot(coords[, 1], coords[, 2], col=cols[c2_labels], pch=16, main="Clustering ii") ## ----------------------------------------------------------------------------- set.seed(1) coords <- data.frame(x = c(runif(30,0,1), runif(30,1.2,2.2)), y = runif(60,0,1)) # reference ref <- c(rep(1,30), rep(2,30)) # clustering A cluA <- sample(ref) # clustering B cluB <- ref par(mfrow=c(1,2), mar=c(3,3,1,1)) plot(coords$x, coords$y, col=cluA, pch=16, main="Clustering A (lower spRI/spARI)") plot(coords$x, coords$y, col=cluB, pch=16, main="Clustering B (higher spRI/spARI)") # compute spRI and spARI res_A <- spARI(ref, cluA, coords = as.matrix(coords)) res_B <- spARI(ref, cluB, coords = as.matrix(coords)) print(res_A) print(res_B) ## ----------------------------------------------------------------------------- library(spARI) data("spARI_example_data") true_labels <- spARI_example_data$true_labels c1_labels <- spARI_example_data$c1_labels c2_labels <- spARI_example_data$c2_labels coords <- spARI_example_data$coords ## Compute the distance matrix coords_norm <- coords coords_norm[,1] <- (coords[,1] - min(coords[,1])) / (max(coords[,1]) - min(coords[,1])) coords_norm[,2] <- (coords[,2] - min(coords[,2])) / (max(coords[,2]) - min(coords[,2])) dist_mat <- as.matrix(stats::dist(coords_norm)) ## ----------------------------------------------------------------------------- res_value1 <- spARI(r_labels=true_labels, c_labels=c1_labels, dist_mat=dist_mat) res_value2 <- spARI(r_labels=true_labels, c_labels=c2_labels, dist_mat=dist_mat) print(res_value1) print(res_value2) ## ----message=FALSE, warning=FALSE, eval=hasFNN-------------------------------- library(FNN) library(Matrix) ## Define sparse distance matrix generation function build_symmetric_knn_distance_matrix <- function(coord_mat, k = 5) { N <- nrow(coord_mat) # Find k nearest neighbors for each object (exclude itself) knn_res <- FNN::get.knn(coord_mat, k = k) i_idx <- rep(1:N, each = k) j_idx <- as.vector(t(knn_res$nn.index)) d_val <- as.vector(t(knn_res$nn.dist)) # Generate sparse matrix (symmetric) D_temp <- sparseMatrix( i = i_idx, j = j_idx, x = d_val, dims = c(N, N) ) D1 <- as(D_temp, "dgTMatrix") D2 <- as(t(D_temp), "dgTMatrix") i_all <- c(D1@i, D2@i) j_all <- c(D1@j, D2@j) x_all <- c(D1@x, D2@x) # Combine and aggregate max df <- data.frame(i = i_all + 1, j = j_all + 1, x = x_all) # +1 for R indexing df_agg <- aggregate(x ~ i + j, data = df, FUN = max) # Create symmetric sparse matrix D_sym <- sparseMatrix(i = df_agg$j, j = df_agg$i, x = df_agg$x) return(D_sym) } ## Generate sparse distance matrix set.seed(123) N <- 1e5 # 100,000 objects coords <- matrix(runif(N*2), N, 2) # coordinates ranging between 0 and 1 dist_mat <- build_symmetric_knn_distance_matrix(coords, k = 5) dim(dist_mat) # 100000 100000 length(dist_mat@x) # 595850 ## ----eval=hasFNN-------------------------------------------------------------- K <- 15 set.seed(123) # reference partition true_labels <- sample(1:K, N, replace = TRUE) # clustering partition c_labels <- true_labels ids <- sample(which(true_labels == 1), 5000) c_labels[ids] <- 2 ids <- sample(which(true_labels == 3), 5000) c_labels[ids] <- 4 ## ----eval=hasFNN-------------------------------------------------------------- library(spARI) stime <- Sys.time() res <- spARI(true_labels, c_labels, dist_mat=dist_mat) etime <- Sys.time() print(res) # spRI spARI # 0.9834065 0.8752946 print(etime-stime) # Execution time is about 4.8 seconds # at a MacBook Air powered by Apple M4 CPU with 16GB of RAM ## ----------------------------------------------------------------------------- set.seed(12) ## Generate adjacency matrix n <- 10 p <- 0.4 adj_mat <- matrix(0L, n, n) up_tri <- upper.tri(adj_mat) adj_mat[up_tri] <- rbinom(sum(up_tri), size = 1, prob = p) adj_mat <- adj_mat + t(adj_mat) diag(adj_mat) <- 0 print(adj_mat) ## Generate synthetic reference and clustering partitions ref <- sample(1:3, n, replace = TRUE) clu <- ref clu[c(6,7)] <- 1 ## ----------------------------------------------------------------------------- library(spARI) res <- spARI(r_labels = ref, c_labels = clu, dist_mat = adj_mat) print(res) ## ----------------------------------------------------------------------------- library(spARI) data("spARI_example_data") true_labels <- spARI_example_data$true_labels c1_labels <- spARI_example_data$c1_labels c2_labels <- spARI_example_data$c2_labels coords <- spARI_example_data$coords ## ----------------------------------------------------------------------------- set.seed(42) perm_test(r_labels=true_labels, c_labels=c1_labels, coords=coords, use_parallel=FALSE) perm_test(r_labels=true_labels, c_labels=c2_labels, coords=coords, use_parallel=FALSE) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(SpatialExperiment) library(S4Vectors) set.seed(123) count_matrix <- matrix( sample(0:10, 100, replace = TRUE), nrow = 10, # 10 genes ncol = 10 # 10 spots ) rownames(count_matrix) <- paste0("gene", 1:10) colnames(count_matrix) <- paste0("spot", 1:10) # Construct gene annotations (rowData) gene_annotation <- DataFrame( gene_id = rownames(count_matrix), gene_name = paste0("Gene_", 1:10) ) # Construct spot metadata (colData) spot_metadata <- DataFrame( spot_id = colnames(count_matrix), sample_id = rep("sample1", 10), cell_type = c(2, 3, 2, 2, 1, 3, 1, 3, 1, 3), cluster = c(2, 3, 2, 3, 1, 3, 3, 3, 1, 3) ) # Construct spatial coordinates (spatialCoords) coords_matrix <- cbind( x = runif(10, min = 0, max = 100), y = runif(10, min = 0, max = 100) ) rownames(coords_matrix) <- colnames(count_matrix) # Construct the SpatialExperiment object spe <- SpatialExperiment( assays = list(counts = count_matrix), rowData = gene_annotation, colData = spot_metadata, spatialCoords = coords_matrix ) ## ----------------------------------------------------------------------------- library(spARI) spARI(spe=spe) set.seed(42) perm_test(spe=spe, use_parallel=FALSE) ## ----session-info, echo=FALSE------------------------------------------------- sessionInfo()