## ----------------------------------------------------------------------------- library(dplyr) library(ggplot2) library(tibble) ## ----------------------------------------------------------------------------- library(shoppingwords) ## ----------------------------------------------------------------------------- glimpse(reviews) ## ----------------------------------------------------------------------------- # $ rating 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 2, 3, 1, 1, 3, 3, 1, 1, 1, 1, 1, 3, 1, 4,... # $ comment "I gave 5 stars so that the comment would be visible I ordered a 5-pack and ... # $ id 3573, 3975, 4910, 4950, 5908, 6144, 6192, 6335, 6370, 6371,...``` ## ----------------------------------------------------------------------------- stopwords_tr |> slice(c(37:39)) ## ----------------------------------------------------------------------------- stopwords::stopwords("tr", source = "stopwords-iso") |> head(n = 5) ## ----------------------------------------------------------------------------- phrases |> slice(c(7:8)) ## ----------------------------------------------------------------------------- cleaned_reviews <- match_stopwords(reviews) # Remove stopwords cleaned_reviews |> group_by(rating) |> summarise(avg_text_length = mean(nchar(cleaned_text))) ## ----------------------------------------------------------------------------- reviews_sample <- tibble( comment = c( "Bu ürün xs ancak fiyatı yüksek gibi", "Fiyat çok pahalı ama kaliteli iyi" ), rating = c(4.5, 3.0) ) cleaned_sample <- match_stopwords(reviews_sample) ## ----------------------------------------------------------------------------- reviews_test |> slice_head(n = 3) ## ----------------------------------------------------------------------------- reviews_test |> count(emotion, sort = TRUE) ## ----------------------------------------------------------------------------- reviews_test |> count(rating, emotion, sort = TRUE) |> arrange(desc(rating)) ## ----plot-example, fig.alt = "The distribution of the reviews across all ratings"---- reviews_test |> count(rating, emotion) |> ggplot(aes(x = factor(rating), y = n, fill = emotion)) + geom_col(position = "dodge") + scale_fill_manual( values = c("p" = "lightblue", "n" = "darkred"), labels = c("p" = "Positive", "n" = "Negative") ) + labs( x = "User Ratings", y = "Number of Reviews", fill = "Polarity" ) + theme_minimal() + theme(legend.position = "right")