## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library("matchmaker")

# Read in data set
dat <- read.csv(matchmaker_example("coded-data.csv"),
  stringsAsFactors = FALSE
)
dat$date <- as.Date(dat$date)

# Read in dictionary
dict <- read.csv(matchmaker_example("spelling-dictionary.csv"),
  stringsAsFactors = FALSE
)

## ----show_data, echo = FALSE--------------------------------------------------
knitr::kable(head(dat))

## ----show_dictionary, echo = FALSE--------------------------------------------
knitr::kable(dict)

## ----example------------------------------------------------------------------
# Clean spelling based on dictionary -----------------------------
cleaned <- match_df(dat,
  dictionary = dict,
  from = "options",
  to = "values",
  by = "grp"
)
head(cleaned)

## ----keys-example-------------------------------------------------------------
who <- c("Anakin", "Darth", "R2-D2", "Leia", "C-3PO", "Rey", "Obi-Wan", "Luke", "Chewy", "Owen", "Lando")
icecream <- c(letters[1:3], "NO", "N", "yes", "Y", "n", "n", NA, "")
names(icecream) <- who
icecream

## ----mydict-1, echo = FALSE, results = 'asis'---------------------------------
my_dict1 <- data.frame(
  keys = c("yes", "Y", "n", "N", "NO", ".missing", ".default"),
  values = c("Yes", "Yes", "No", "No", "No", ".na", "(invalid)"),
  stringsAsFactors = FALSE
)
knitr::kable(my_dict1, caption = "my_dict1")

## ----key-value-change---------------------------------------------------------
match_vec(icecream, dictionary = my_dict1, from = "keys", to = "values")

## ----luke-no-like-------------------------------------------------------------
icecream["Luke"] <- "NOOOOOOO"
match_vec(icecream, dictionary = my_dict1, from = "keys", to = "values")

## ----mydict-2, echo = FALSE, results = 'asis'---------------------------------
my_dict2 <- data.frame(
  keys = c(".regex \\^[Yy][Ee]?[Ss]*$", ".regex \\^[Nn][Oo]*$", ".missing", ".default"),
  values = c("Yes", "No", ".na", "(invalid)"),
  stringsAsFactors = FALSE
)

knitr::kable(my_dict2, caption = "my_dict2", escape = TRUE)
my_dict2$keys <- c(".regex ^[Yy][Ee]?[Ss]*$", ".regex ^[Nn][Oo]*$", ".missing", ".default")

## ----luke-match---------------------------------------------------------------
match_vec(icecream, dictionary = my_dict2, from = "keys", to = "values")

## ----regex-df-----------------------------------------------------------------
# view the lab_result columns:
print(labs <- grep("^lab_result_", names(dat), value = TRUE))
str(dat[labs])
# show the lab_result part of the dictionary:
print(dict[grep("^[.]regex", dict$grp), ])
# clean the data and compare the result
cleaned <- match_df(dat, dict, 
  from = "options", 
  to = "values", 
  by = "grp", 
  order = "orders"
) 
str(cleaned[labs])

## ----global-df----------------------------------------------------------------
# show the lab_result part of the dictionary:
print(dict[grep("^[.]regex", dict$grp), ])
# show the original data
str(dat[labs])
# show the modified data
str(cleaned[labs])

## ----global-keys--------------------------------------------------------------
print(dict[grep("^[.](regex|global)", dict$grp), ])

## ----the_warning, message = TRUE----------------------------------------------
cleaned <- match_df(dat, dict, 
  from = "options", 
  to = "values", 
  by = "grp", 
  order = "orders",
  warn = TRUE
)