## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 4 ) ## ----------------------------------------------------------------------------- library(TopicTestlet) set.seed(1234) ## ----------------------------------------------------------------------------- # Simulation parameters N_students <- 100 J_items <- 4 K_topics_true <- 2 # A. Simulate Numeric Scores (0-2) score_matrix <- matrix( sample(0:2, N_students * J_items, replace = TRUE), nrow = N_students, ncol = J_items ) # B. Simulate Textual Essays # Define vocabularies for two topics vocab_topic1 <- c("logic", "reasoning", "evidence", "fact", "analysis", "data") vocab_topic2 <- c("feeling", "story", "character", "plot", "narrative", "mood") # Helper function to generate a random essay generate_essay <- function() { words <- sample(c(vocab_topic1, vocab_topic2), size = 20, replace = TRUE) paste(words, collapse = " ") } # Create matrix of essays essay_matrix <- matrix( replicate(N_students * J_items, generate_essay()), nrow = N_students, ncol = J_items ) # Preview the data head(score_matrix, 3) substr(essay_matrix[1,1], 1, 50) # First 50 chars of first essay ## ----------------------------------------------------------------------------- text_vector <- aggregate_responses(essay_matrix) # Check the first student's aggregated text substr(text_vector[1], 1, 60) ## ----------------------------------------------------------------------------- # In a real analysis, you might check a wider range (e.g., 2:10) perp_results <- ttm_perplexity(text_vector, k_range = 2:3) print(perp_results) # Select the K with the lowest perplexity best_k <- perp_results$k[which.min(perp_results$perplexity)] cat("Optimal number of topics:", best_k) ## ----------------------------------------------------------------------------- delta_matrix <- ttm_lda(text_vector, k = best_k) # The result is an N x K matrix head(delta_matrix) ## ----------------------------------------------------------------------------- # We use max_iter = 50 for speed in this vignette. # For operational use, allow more iterations for convergence. ttm_results <- ttm_est( scores = score_matrix, delta = delta_matrix, max_iter = 50 ) # Model Fit Statistics print(paste("AIC:", round(ttm_results$AIC, 2))) print(paste("BIC:", round(ttm_results$BIC, 2))) ## ----------------------------------------------------------------------------- plot(ttm_results$theta, ttm_results$gamma, xlab = "Student Ability (Theta)", ylab = "Testlet Effect (Gamma)", main = "Relationship between Ability and Testlet Effect", pch = 19, col = rgb(0, 0, 1, 0.6)) grid() abline(lm(ttm_results$gamma ~ ttm_results$theta), col = "red", lwd = 2) ## ----------------------------------------------------------------------------- hist(ttm_results$theta, main = "Distribution of Estimated Abilities", xlab = "Theta", col = "lightblue", border = "white")