## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.height = 5, fig.width = 7, out.width = "80%", fig.show = "hold", fig.align = "center" ) ## ----reallib, include = FALSE------------------------------------------------- library(streamsampler) df <- streamsampler::streamdat ## ----fakelib, eval = FALSE---------------------------------------------------- # library(streamsampler) # library(dataRetrieval) ## ----eval=FALSE--------------------------------------------------------------- # daily <- dataRetrieval::readNWISdv( # siteNumbers = "01481500", # parameterCd = "00060", # startDate = "2007-10-01", # endDate = "2023-09-30" # ) # # qw <- dataRetrieval::readNWISdv( # siteNumbers = "01481500", # parameterCd = "00095", # startDate = "2007-10-01", # endDate = "2023-09-30" # ) # # daily <- daily[, -c(1,2,5)] # colnames(daily) <- c("date", "q") # # qw <- qw[, -c(1,2,5)] # colnames(qw) <- c("date", "sc") # # df <- merge(daily, qw, by = "date", all.x = TRUE, all.y = TRUE) ## ----dim---------------------------------------------------------------------- dim(df) ## ----evalq-------------------------------------------------------------------- # Subset for discharge record q_dates <- df[!is.na(df$q), "date"] results <- eval_dates( dates = q_dates, rec_start = as.Date("2007-10-01"), rec_end = as.Date("2023-09-30"), by = "day" ) print(results) ## ----evalqw------------------------------------------------------------------- # Subset for sc record qw_dates <- df[!is.na(df$sc), "date"] sc_completeness <- eval_dates( dates = qw_dates, rec_start = as.Date("2007-10-01"), rec_end = as.Date("2023-09-30"), by = "day" ) print(sc_completeness) ## ----evalex------------------------------------------------------------------- ex_sample_dates <- seq.Date( from = as.Date("2020-01-01"), to = as.Date("2022-12-31"), by = "9 days" ) eval_dates( dates = ex_sample_dates, rec_start = as.Date("2020-01-01"), rec_end = as.Date("2022-12-31"), by = "week" ) ## ----gapsqw------------------------------------------------------------------- qw_dates <- as.Date(df[!is.na(df$sc), "date"]) qw_gaps <- find_gaps(dates = qw_dates) head(qw_gaps) ## ----viewgaps----------------------------------------------------------------- gap_start <- which( df$date == qw_gaps[1, "start"] ) gap_end <- which( df$date == qw_gaps[1, "end"] ) df[(gap_start - 1):(gap_end + 1), ] ## ----evalsign----------------------------------------------------------------- q_data <- df[!is.na(df$q), "q"] eval_sign(values = q_data) ## ----statsqw------------------------------------------------------------------ df_stats <- qw_stats( dates = df$date, values = df$sc, rec_start = as.Date("2007-10-01"), rec_end = as.Date("2023-09-30"), by = "day" ) df_stats ## ----summmonth---------------------------------------------------------------- season_summary <- summarize_seasons( dates = df$date, values = df$q, season_start = 10, n_seasons = 4 ) head(season_summary$monthly) ## ----summseason--------------------------------------------------------------- head(season_summary$seasonal) ## ----sznavgplot--------------------------------------------------------------- df_seasons <- season_summary$seasonal df_seasons <- df_seasons[order(df_seasons$adj_year, df_seasons$season), ] plot( df_seasons$avg_value, type = "l", xaxt = "n", xlab = "Water Year", ylim = c(0, 1200) ) axis( 1, at = seq(1, length(df_seasons$adj_year), by = 4), labels = unique(df_seasons$adj_year), las = 2 ) ## ----rollmeanq---------------------------------------------------------------- roll_q <- rollmean_date( dates = df$date, values = df$q, look_behind = 29, look_units = "days" ) df[["rollmean_q"]] <- roll_q df_rollq_seasons <- summarize_seasons( dates = df$date, values = df$rollmean_q, season_start = 10, n_seasons = 4 )$seasonal df_rollq_seasons <- df_rollq_seasons[order(df_rollq_seasons$adj_year, df_rollq_seasons$season), ] plot( df_seasons$avg_value, type = "l", col = "darkgray", xaxt = "n", xlab = "Water Year", ylab = "Discharge (cfs)", ylim = c(0, 1200) ) lines( df_rollq_seasons$avg_value, col = "black" ) axis( 1, at = seq(1, length(df_rollq_seasons$adj_year), by = 4), labels = unique(df_rollq_seasons$adj_year), las = 2 ) legend( "topleft", c("Q", "30-day mean (cfs)"), col = c("darkgray", "black"), lty = 1 ) ## ----defthresh---------------------------------------------------------------- rollq_thresh <- thresholds( dates = df$date, values = df$rollmean_q, season_start = 10, n_seasons = 4, half_win = 2, threshold = 0.8 ) head(rollq_thresh) ## ----subsamp------------------------------------------------------------------ ss_sc <- subsample( dates = df$date, values = df$sc, thresh_ref = df$rollmean_q ) head(ss_sc) ## ----plotss------------------------------------------------------------------- not_selected <- ss_sc[ss_sc$selection_type == "not_selected", ] blw_thresh <- ss_sc[ss_sc$selection_type == "below_threshold", ] excd_thresh <- ss_sc[ss_sc$selection_type == "exceeds_threshold", ] # Sampling across dates plot( not_selected$date, not_selected$thresh_ref, col = "gray", log = "y", ylim = c(50, 5000), xlab = "Date", ylab = "Q (cfs)" ) points( blw_thresh$date, blw_thresh$thresh_ref, col = "blue", pch = 16 ) points( excd_thresh$date, excd_thresh$thresh_ref, col = "purple", pch = 16 ) legend("topright", c("not_selected", "below_threshold", "exceeds_threshold"), fill = c("gray", "blue", "purple") ) # Sampling across the threshold reference plot( not_selected$thresh_ref, not_selected$value, log = "x", ylim = c(0, max(ss_sc$value, na.rm = TRUE)), xlim = c(50, 2000), col = "gray", xlab = "Q (cfs)", ylab = "SC (uS/cm)" ) points( blw_thresh$thresh_ref, blw_thresh$value, col = "blue", pch = 16 ) points( excd_thresh$thresh_ref, excd_thresh$value, col = "purple", pch = 16 ) legend("topleft", c("not_selected", "below_threshold", "exceeds_threshold"), fill = c("gray", "blue", "purple") ) # Compare spread ss_sc$q_lab <- "Discharge" ss_sc$sc_lab <- "SC" boxplot( thresh_ref ~ selection_type + q_lab, data = ss_sc, at = 1:3, xlim = c(0.5, 7.0), log = "y", col = "#7fc97f", ylab = "", xlab = "", xaxt = "n" ) boxplot( value ~ selection_type + sc_lab, data = ss_sc, add = TRUE, at = 5:7 - 0.5, xaxt = "n", col = "#beaed4" ) axis( 1, at = c(1:3, 5:7 - 0.5), labels = rep(c("below", "exceeds", "not\nsampled"), 2), lwd = 0 ) legend( "topright", c("Discharge (cfs)", "SC (uS/cm)"), fill = c("#7fc97f", "#beaed4") ) ## ----plotssnew---------------------------------------------------------------- ss_sc_peaks <- subsample( dates = df$date, values = df$sc, n_samples = 1, freq = "quarter", thresh_ref = df$sc, threshold = 0.9, n_et_samples = 10, look_behind = 14, look_ahead = 14, look_units = "days", season_weights = c(1, 1, 3), season_start = 1, n_seasons = 3 ) ss_df <- merge(df, ss_sc_peaks[, c("date", "selection_type")]) not_selected <- ss_df[ss_df$selection_type == "not_selected", ] blw_thresh <- ss_df[ss_df$selection_type == "below_threshold", ] excd_thresh <- ss_df[ss_df$selection_type == "exceeds_threshold", ] # Sampling across dates plot( not_selected$date, not_selected$sc, col = "gray", log = "y", xlab = "Date", ylab = "SC (uS/cm)" ) points( blw_thresh$date, blw_thresh$sc, col = "blue", pch = 16 ) points( excd_thresh$date, excd_thresh$sc, col = "purple", pch = 16 ) legend("topleft", c("not_selected", "below_threshold", "exceeds_threshold"), fill = c("gray", "blue", "purple") ) # Sampling across the threshold reference plot( not_selected$q, not_selected$sc, col = "gray", log = "x", ylim = c(0, max(ss_sc$value, na.rm = TRUE)), xlab = "Q (cfs)", ylab = "SC (uS/cm)" ) points( blw_thresh$q, blw_thresh$sc, col = "blue", pch = 16 ) points( excd_thresh$q, excd_thresh$sc, col = "purple", pch = 16 ) legend("topleft", c("not_selected", "below_threshold", "exceeds_threshold"), fill = c("gray", "blue", "purple") ) # Compare spread ss_df$q_lab <- "Discharge" ss_df$sc_lab <- "SC" boxplot( q ~ selection_type + q_lab, data = ss_df, # at = 1:3 - 0.2, at = 1:3, # boxwex = 0.25, xlim = c(0.5, 7.0), log = "y", col = "#7fc97f", ylab = "", xlab = "", xaxt = "n" # names = c("below", "exceeds", "not\nsampled") ) boxplot( sc ~ selection_type + sc_lab, data = ss_df, add = TRUE, at = 5:7 - 0.5, xaxt = "n", col = "#beaed4" # names = c("below", "exceeds", "not\nsampled") ) axis( 1, at = c(1:3, 5:7 - 0.5), labels = rep(c("below", "exceeds", "not\nsampled"), 2), lwd = 0 ) legend( "topright", c("Discharge (cfs)", "SC (uS/cm)"), fill = c("#7fc97f", "#beaed4") ) ## ----------------------------------------------------------------------------- sroutine <- subsample_routine( dates = df$date, values = df$sc, day = 15, freq = "month" ) sroutine <- merge(df[, -c(3, 4)], sroutine) plot( sroutine[sroutine$selection_type == "not_selected", "date"], sroutine[sroutine$selection_type == "not_selected", "value"], col = "gray", log = "y", xlab = "Date", ylab = "SC (uS/cm)" ) points( sroutine[sroutine$selection_type == "routine", "date"], sroutine[sroutine$selection_type == "routine", "value"], col = "blue", pch = 16 ) legend("topleft", c("Not Selected", "Routine"), fill = c("gray", "blue") ) plot( sroutine$q[sroutine$selection_type == "not_selected"], sroutine$value[sroutine$selection_type == "not_selected"], pch = 21, col = "gray", xlab = "Q (cfs)", ylab = "SC (uS/cm)", main = "Subsampled Daily Data", log = "x" ) points( sroutine$q[sroutine$selection_type != "not_selected"], sroutine$value[sroutine$selection_type != "not_selected"], pch = 16, cex = 1.5, col = c( "routine" = "blue" )[sroutine$selection_type[sroutine$selection_type != "not_selected"]] ) legend( "topright", legend = c("Not Selected", "Routine"), col = c("gray", "blue"), pch = c(21, 16), bty = "n" )