## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----check-packages, echo=FALSE, message=FALSE, warning=FALSE----------------- required_packages <- c("proxy", "ppclust", "blockcluster", "cluster", "factoextra", "ggplot2", "tidyr") missing_packages <- required_packages[!vapply(required_packages, requireNamespace, logical(1), quietly = TRUE)] if (length(missing_packages) > 0) { message("❌ The following required packages are not installed:\n") message(paste0("- ", missing_packages, collapse = "\n"), "\n") message("📦 To install them, run the following in R:\n") message("```r") message(sprintf("install.packages(c(%s))", paste(sprintf('\"%s\"', missing_packages), collapse = ", "))) message("```") knitr::knit_exit() } ## ----setup,echo=FALSE, include=FALSE------------------------------------------ library(Silhouette) library(proxy) library(ppclust) library(cluster) library(factoextra) library(blockcluster) library(ggplot2) library(drclust) set.seed(123) ## ----kmeans------------------------------------------------------------------- data(iris) km <- kmeans(iris[, -5], centers = 3) ## ----crisp-silhouette1, fig.width=7, fig.height=4, fig.alt = "fig1.1"--------- library(proxy) dist_matrix <- proxy::dist(iris[, -5], km$centers) sil <- Silhouette(dist_matrix) head(sil) summary(sil) plot(sil) ## ----crisp-silhouette2, fig.width=7, fig.height=4, fig.alt = "fig1.2"--------- sil_pac <- Silhouette(dist_matrix, method = "pac", sort = TRUE) head(sil_pac) summary(sil_pac) plot(sil_pac) ## ----crisp-silhouette3-------------------------------------------------------- s <- summary(sil_pac,print.summary = TRUE) # summary table s$sil.sum # cluster wise silhouette widths s$clus.avg.widths # Overall average silhouette width s$avg.width ## ----fm----------------------------------------------------------------------- library(ppclust) data(iris) fm <- ppclust::fcm(x = iris[, -5], centers = 3) ## ----crisp-silhouette4, fig.width=7, fig.height=4, fig.alt = "fig1.2"--------- sil_fm <- Silhouette(fm$d) plot(sil_fm) ## ----crisp-silhouette5, fig.width=7, fig.height=4, fig.alt = "fig1.3"--------- sil_fcm <- Silhouette(prox_matrix = "d", clust_fun = fcm, x = iris[, -5], centers = 3) plot(sil_fcm) ## ----fuzzy-silhouette4.1, fig.width=7, fig.height=4--------------------------- data(iris) fm1 <- ppclust::fcm(x = iris[, -5], centers = 3) ## ----fuzzy-silhouette4, fig.width=7, fig.height=4, fig.alt = "fig1.6"--------- sil_fm1 <- Silhouette(prox_matrix = fm1$d, prob_matrix = fm1$u, average = "fuzzy") plot(sil_fm1) ## ----fuzzy-silhouette5, fig.width=7, fig.height=4, fig.alt = "fig1.3"--------- library(ppclust) sil_fcm1 <- Silhouette(prox_matrix = "d", prob_matrix = "u", average = "fuzzy", clust_fun = fcm, x = iris[, -5], centers = 3) plot(sil_fcm1) ## ----fcm---------------------------------------------------------------------- data(iris) # FCM clustering fcm_result <- ppclust::fcm(iris[, 1:4], 3) # FCM2 clustering fcm2_result <- ppclust::fcm2(iris[, 1:4], 3) ## ----softSilhouette, fig.width=7, fig.height=4, fig.alt = "fig2.1"------------ # Soft silhouette for FCM sil_fcm <- softSilhouette(prob_matrix = fcm_result$u) plot(sil_fcm) # Soft silhouette for FCM2 sil_fcm2 <- softSilhouette(prob_matrix = fcm2_result$u) plot(sil_fcm2) ## ----softSilhouette1, fig.width=7, fig.height=4, fig.alt = "fig2.2"----------- sfcm <- summary(sil_fcm, print.summary = FALSE) sfcm2 <- summary(sil_fcm2, print.summary = FALSE) cat("FCM average silhouette width:", sfcm$avg.width, "\n", "FCM2 average silhouette width:", sfcm2$avg.width) ## ----cer-db-silhouette, fig.width=7, fig.height=4, fig.alt = "fig2.3"--------- # Certainty-based silhouette for FCM and FCM2 cer_fcm <- cerSilhouette(prob_matrix = fcm_result$u, print.summary = TRUE) plot(cer_fcm) cer_fcm2 <- cerSilhouette(prob_matrix = fcm2_result$u, print.summary = TRUE) plot(cer_fcm2) # Density-based silhouette for FCM and FCM2 db_fcm <- dbSilhouette(prob_matrix = fcm_result$u, print.summary = TRUE) plot(db_fcm) db_fcm2 <- dbSilhouette(prob_matrix = fcm2_result$u, print.summary = TRUE) plot(db_fcm2) ## ----cer-db-silhouette-summary------------------------------------------------ # Compare average silhouette widths across all methods # Summary for FCM cer_sfcm <- summary(cer_fcm, print.summary = FALSE) db_sfcm <- summary(db_fcm, print.summary = FALSE) # Summary for FCM2 cer_sfcm2 <- summary(cer_fcm2, print.summary = FALSE) db_sfcm2 <- summary(db_fcm2, print.summary = FALSE) # Print comparison cat("FCM - Soft silhouette:", sfcm$avg.width, "\n", "FCM - Certainty silhouette:", cer_sfcm$avg.width, "\n", "FCM - Density-based silhouette:", db_sfcm$avg.width, "\n\n","FCM2 - Soft silhouette:", sfcm2$avg.width, "\n","FCM2 - Certainty silhouette:", cer_sfcm2$avg.width, "\n","FCM2 - Density-based silhouette:", db_sfcm2$avg.width, "\n") ## ----screeplot1--------------------------------------------------------------- data(iris) avg_sil_width <- rep(NA,7) for (k in 2:7) { sil_out <- Silhouette( prox_matrix = "d", method = "pac", clust_fun = ppclust::fcm, x = iris[, 1:4], centers = k) avg_sil_width[k] <- summary(sil_out, print.summary = FALSE)$avg.width } ## ----screeplot2, fig.width=7, fig.height=4, fig.alt = "fig3.1"---------------- plot(avg_sil_width, type = "o", ylab = "Overall Silhouette Width", xlab = "Number of Clusters", main = "Silhouette Scree Plot" ) ## ----plot0, fig.width=6, fig.height=4, fig.alt = "fig4.0"--------------------- data(iris) km_out <- kmeans(iris[, -5], 3) dist_mat <- proxy::dist(iris[, -5], km_out$centers) sil_obj <- Silhouette(dist_mat) plot(sil_obj) # S3 method auto-dispatch plotSilhouette(sil_obj) # explicit call (identical output) ## ----plot1, fig.width=6, fig.height=4, fig.alt = "fig4.1"--------------------- library(cluster) pam_result <- pam(iris[, 1:4], k = 3) plotSilhouette(pam_result) # for cluster::pam object clara_result <- clara(iris[, 1:4], k = 3) plotSilhouette(clara_result) fanny_result <- fanny(iris[, 1:4], k = 3) plotSilhouette(fanny_result) ## ----plot2, fig.width=6, fig.height=4, fig.alt = "fig4.2"--------------------- sil_base <- cluster::silhouette(pam_result) plotSilhouette(sil_base) ## ----plot3, fig.width=6, fig.height=4, fig.alt = "fig4.3"--------------------- library(factoextra) eclust_result <- eclust(iris[, 1:4], "kmeans", k = 3, graph = FALSE) plotSilhouette(eclust_result) hcut_result <- hcut(iris[, 1:4], k = 3) plotSilhouette(hcut_result) ## ----plot3.1, fig.width=7, fig.height=6, fig.alt = "fig4.3.1"----------------- library(drclust) # Loading the numeric in matrix iris_mat <- as.matrix(iris[,-5]) #applying a clustering algorithm drclust_out <- dpcakm(iris_mat, 20, 3) #silhouette based on the data and the output of the clustering algorithm d <- silhouette(iris_mat, drclust_out) plotSilhouette(d$cl.silhouette) ## ----plot4, fig.width=6, fig.height=4, fig.alt = "fig4.4"--------------------- data(iris) fcm_out <- ppclust::fcm(iris[, 1:4], 3) sil_fuzzy <- Silhouette( prox_matrix = "d", prob_matrix = "u", clust_fun = fcm, x = iris[, 1:4], centers = 3, sort = TRUE ) plot(sil_fuzzy, summary.legend = FALSE, grayscale = TRUE) ## ----plot5, fig.width=6, fig.height=4, fig.alt = "fig4.5"--------------------- plotSilhouette(sil_fuzzy, grayscale = TRUE) # Use grayscale palette plotSilhouette(sil_fuzzy, summary.legend = TRUE) # Include size + avg silhouette in legend plotSilhouette(sil_fuzzy, label = TRUE) # Label bars with row index ## ----custom1, fig.width=7, fig.height=4, fig.alt = "fig5.1"------------------- # Create a custom Silhouette object cluster_assignments <- c(1, 1, 2, 2, 3, 3) neighbor_clusters <- c(2, 2, 1, 1, 1, 1) silhouette_widths <- c(0.8, 0.7, 0.6, 0.9, 0.5, 0.4) weights <- c(0.9, 0.8, 0.7, 0.95, 0.6, 0.5) sil_custom <- getSilhouette( cluster = cluster_assignments, neighbor = neighbor_clusters, sil_width = silhouette_widths, weight = weights, proximity_type = "similarity", method = "pac", average = "fuzzy" ) # Validate the object is.Silhouette(sil_custom) # Basic class check: TRUE is.Silhouette(sil_custom, strict = TRUE) # Strict structural validation: TRUE is.Silhouette(data.frame(a = 1:6)) # Non-Silhouette object: FALSE # Visualize the custom Silhouette object plotSilhouette(sil_custom, summary.legend = TRUE) ## ----calSil1------------------------------------------------------------------ library(ppclust) data(iris) # Compute all silhouette methods using FCM clustering summary_result <- calSilhouette( prox_matrix = "d", prob_matrix = "u", proximity_type = "dissimilarity", clust_fun = ppclust::fcm, x = iris[, -5], centers = 3, print.summary = TRUE ) # View the results head(summary_result) ## ----calSil2------------------------------------------------------------------ # Perform clustering first fcm_result <- ppclust::fcm(iris[, -5], centers = 3) # Compute all silhouette methods using the clustering output summary_direct <- calSilhouette( prox_matrix = fcm_result$d, prob_matrix = fcm_result$u, proximity_type = "dissimilarity", a = 2, print.summary = TRUE ) # Access specific results head(summary_direct) ## ----calSil3------------------------------------------------------------------ # Compare FCM and FCM2 algorithms fcm_summary <- calSilhouette( prox_matrix = "d", prob_matrix = "u", proximity_type = "dissimilarity", clust_fun = ppclust::fcm, x = iris[, -5], centers = 3, print.summary = FALSE ) fcm2_summary <- calSilhouette( prox_matrix = "d", prob_matrix = "u", proximity_type = "dissimilarity", clust_fun = ppclust::fcm2, x = iris[, -5], centers = 3, print.summary = FALSE ) # Create comparison data frame comparison <- data.frame( Method = fcm_summary$Method, FCM_Crisp = fcm_summary$Crisp, FCM2_Crisp = fcm2_summary$Crisp, FCM_Fuzzy = fcm_summary$Fuzzy, FCM2_Fuzzy = fcm2_summary$Fuzzy, stringsAsFactors = FALSE ) print(comparison) ## ----calSil4, fig.width=8, fig.height=5, fig.alt = "fig7.1"------------------- library(ggplot2) library(tidyr) # Reshape data for plotting comparison_long <- tidyr::pivot_longer( comparison, cols = -Method, names_to = "Algorithm_Type", values_to = "Silhouette_Width" ) # Create grouped bar plot ggplot(comparison_long, aes(x = Method, y = Silhouette_Width, fill = Algorithm_Type)) + geom_bar(stat = "identity", position = "dodge") + theme_minimal() + theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 10), legend.position = "bottom" ) + labs( title = "Comparison of Silhouette Methods: FCM vs FCM2", x = "Silhouette Method", y = "Average Silhouette Width", fill = "Algorithm & Type" ) + scale_fill_brewer(palette = "Set2") + geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") ## ----calSil5, fig.width=8, fig.height=5, fig.alt = "fig7.2"------------------- # Compute silhouette summaries for k = 2 to 6 k_range <- 2:6 results_list <- list() for (k in k_range) { results_list[[as.character(k)]] <- calSilhouette( prox_matrix = "d", prob_matrix = "u", proximity_type = "dissimilarity", clust_fun = ppclust::fcm, x = iris[, -5], centers = k, print.summary = FALSE ) } # Extract crisp pac method silhouette widths for comparison pac_widths <- sapply(results_list, function(x) x$Crisp[x$Method == "pac"]) # Plot optimal k selection plot(k_range, pac_widths, type = "o", pch = 19, xlab = "Number of Clusters (k)", ylab = "Average Silhouette Width (PAC method)", main = "Optimal Cluster Selection using calSilhouette()", col = "steelblue", lwd = 2, ylim = c(min(pac_widths) * 0.95, max(pac_widths) * 1.05) ) grid() abline(h = max(pac_widths), lty = 2, col = "red") text(k_range[which.max(pac_widths)], max(pac_widths), labels = paste("Optimal k =", k_range[which.max(pac_widths)]), pos = 3, col = "red") ## ----calSil6------------------------------------------------------------------ # Get all pac-based methods pac_methods <- summary_result[grep("pac", summary_result$Method), ] cat("PAC-based methods:\n") print(pac_methods, row.names = FALSE) # Get all medoid-based methods medoid_methods <- summary_result[grep("medoid", summary_result$Method), ] cat("\nMedoid-based methods:\n") print(medoid_methods, row.names = FALSE) # Get probability-based methods (cer, db) prob_methods <- summary_result[summary_result$Method %in% c("cer", "db"), ] cat("\nProbability-based methods (cer, db):\n") print(prob_methods, row.names = FALSE) # Compare crisp vs fuzzy vs median averaging cat("\n=== Best Methods by Averaging Type ===\n") cat("Best method by crisp averaging:", summary_result$Method[which.max(summary_result$Crisp)], "(", round(max(summary_result$Crisp, na.rm = TRUE), 4), ")\n") cat("Best method by fuzzy averaging:", summary_result$Method[which.max(summary_result$Fuzzy)], "(", round(max(summary_result$Fuzzy, na.rm = TRUE), 4), ")\n") cat("Best method by median averaging:", summary_result$Method[which.max(summary_result$Median)], "(", round(max(summary_result$Median, na.rm = TRUE), 4), ")\n") ## ----calSil7------------------------------------------------------------------ library(proxy) data(iris) # K-means clustering (crisp clustering) km <- kmeans(iris[, -5], centers = 3) # Compute distance matrix dist_matrix <- proxy::dist(iris[, -5], km$centers) # Compute only proximity-based silhouettes (medoid and pac) crisp_summary <- calSilhouette( prox_matrix = dist_matrix, proximity_type = "dissimilarity", print.summary = TRUE ) # View results (note: no Fuzzy column since prob_matrix not provided) print(crisp_summary) ## ----calSil8, fig.width=8, fig.height=6, fig.alt = "fig7.3"------------------- library(ggplot2) library(tidyr) # Reshape data for heatmap heatmap_data <- tidyr::pivot_longer( summary_result, cols = c(Crisp, Fuzzy, Median), names_to = "Average_Type", values_to = "Silhouette_Width" ) # Create heatmap ggplot(heatmap_data, aes(x = Average_Type, y = Method, fill = Silhouette_Width)) + geom_tile(color = "white") + geom_text(aes(label = round(Silhouette_Width, 3)), color = "black", size = 3) + scale_fill_gradient2( low = "red", mid = "yellow", high = "green", midpoint = median(heatmap_data$Silhouette_Width, na.rm = TRUE), na.value = "gray90" ) + theme_minimal() + theme( axis.text.x = element_text(angle = 0, hjust = 0.5), axis.text.y = element_text(size = 10), legend.position = "right" ) + labs( title = "Silhouette Width Heatmap Across Methods and Averaging Types", x = "Averaging Type", y = "Silhouette Method", fill = "Silhouette\nWidth" ) ## ----ext1--------------------------------------------------------------------- library(blockcluster) data(iris) result <- coclusterContinuous(as.matrix(iris[, -5]), nbcocluster = c(3, 2)) ## ----ext2--------------------------------------------------------------------- sil_mode1 <- softSilhouette( prob_matrix = result@rowposteriorprob, method = "pac", print.summary = FALSE ) sil_mode2 <- softSilhouette( prob_matrix = result@colposteriorprob, method = "pac", print.summary = FALSE ) ## ----ext3--------------------------------------------------------------------- ext_sil <- extSilhouette( sil_list = list(sil_mode1, sil_mode2), dim_names = c("Rows", "Columns"), print.summary = TRUE )