Skip to content

Commit

Permalink
Merge pull request #542 from USEPA/11.6.24review-cm
Browse files Browse the repository at this point in the history
data retrieval updates
  • Loading branch information
cristinamullin authored Nov 8, 2024
2 parents a8b4428 + 5deb5cf commit 3cd90e2
Show file tree
Hide file tree
Showing 28 changed files with 670 additions and 377 deletions.
4 changes: 2 additions & 2 deletions R/Figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,8 @@ TADA_OverviewMap <- function(.data) {
#'
TADA_FlaggedSitesMap <- function(.data) {
invalid <- TADA_FlagCoordinates(.data, flaggedonly = TRUE)
lowres <- invalid[invalid$TADA.InvalidCoordinates.Flag == "Imprecise_lessthan3decimaldigits", ]
outsideusa <- invalid[invalid$TADA.InvalidCoordinates.Flag %in% c("LAT_OutsideUSA", "LONG_OutsideUSA"), ]
lowres <- invalid[invalid$TADA.SuspectCoordinates.Flag == "Imprecise_lessthan3decimaldigits", ]
outsideusa <- invalid[invalid$TADA.SuspectCoordinates.Flag %in% c("LAT_OutsideUSA", "LONG_OutsideUSA"), ]
nearby <- TADA_FindNearbySites(.data)
print(colnames(nearby))
nearby <- TADA_GetUniqueNearbySites(nearby)
Expand Down
23 changes: 12 additions & 11 deletions R/RequiredCols.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,7 @@ require.cols <- c(
"LongitudeMeasure",
"TADA.LongitudeMeasure", # generated
"HorizontalCoordinateReferenceSystemDatumName",
"geometry",
"TADA.InvalidCoordinates.Flag", # generated
"TADA.SuspectCoordinates.Flag", # generated
"HUCEightDigitCode",
"MonitoringLocationIdentifier", # required
"TADA.MonitoringLocationIdentifier",
Expand All @@ -200,7 +199,7 @@ extra.cols <- c(
"ActivityEndDate",
"ActivityEndTime.Time",
"ActivityEndTime.TimeZoneCode",
"ActivityEndDateTime", # generated by USGS DR
"ActivityEndDateTime", # originally generated by USGS DR, # no longer in default dataRetrieval profile? 11/7/24
"ActivityConductingOrganizationText",
"SampleAquifer",
"ActivityLocation.LatitudeMeasure",
Expand All @@ -215,8 +214,9 @@ extra.cols <- c(
"AnalysisStartDate",
"ResultDetectionQuantitationLimitUrl",
"LabSamplePreparationUrl",
"timeZoneStart",
"timeZoneEnd",
"timeZoneStart", # no longer in default dataRetrieval profile? 11/7/24
"timeZoneEnd", # no longer in default dataRetrieval profile? 11/7/24
"ActivityStartTime.TimeZoneCode_offset", # new column from default dataRetrieval profile? 11/7/24
"SourceMapScaleNumeric",
"HorizontalAccuracyMeasure.MeasureValue",
"HorizontalAccuracyMeasure.MeasureUnitCode",
Expand All @@ -232,7 +232,9 @@ extra.cols <- c(
"DrainageAreaMeasure.MeasureValue",
"DrainageAreaMeasure.MeasureUnitCode",
"ContributingDrainageAreaMeasure.MeasureValue",
"ContributingDrainageAreaMeasure.MeasureUnitCode"
"ContributingDrainageAreaMeasure.MeasureUnitCode",
"ProviderName",
"LastUpdated"
)

attains.cols <- c(
Expand All @@ -256,11 +258,10 @@ attains.cols <- c(

# Only used in TADA Shiny or should be at the end
last.cols <- c(
"ProviderName",
"LastUpdated",
"TADA.Remove",
"TADA.RemovalReason",
"TADAShiny.tab"
"TADAShiny.tab",
"geometry"
)


Expand Down Expand Up @@ -329,7 +330,7 @@ TADA_OrderCols <- function(.data) {
#'
TADA_GetTemplate <- function() {
# remove names with TADA. string from require.cols
template_cols <- c(require.cols, last.cols)
template_cols <- require.cols
template_cols <- Filter(function(x) !any(grepl("TADA.", x)), template_cols)
templatedata <- data.frame()
templatedata <- data.frame(matrix(nrow = 0, ncol = length(template_cols)))
Expand Down Expand Up @@ -526,7 +527,7 @@ TADA_RetainRequired <- function(.data) {
print("TADA_RetainRequired: removing columns not required for TADA workflow including original columns that have been replaced with TADA prefix duplicates.")

# Create list of all columns to be retained
keep.cols <- c(require.cols, last.cols)
keep.cols <- c(require.cols, attains.cols, last.cols)

# create list of all columns in original data set
original.cols <- .data %>% names()
Expand Down
225 changes: 111 additions & 114 deletions R/ResultFlagsDependent.R

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions R/ResultFlagsIndependent.R

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions R/TADARefTables.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ TADA_GetNutrientSummationRef <- function() {
#' input data relates to target synonyms for TADA.CharacteristicName, TADA.ResultSampleFractionText,
#' and TADA.MethodSpeciationName. Once the synonym table is created, users may optionally edit the
#' target columns in the reference table to meet their needs. Additionally, the function assumes
#' the user has already removed any data containing invalid characteristic-unit-fraction-speciation
#' the user has already removed any data containing suspect characteristic-unit-fraction-speciation
#' combinations (i.e. user has already run TADA_FlagFraction, TADA_FlagSpeciation, TADA_FlagResultUnit,
#' etc.).
#'
Expand All @@ -44,8 +44,8 @@ TADA_GetNutrientSummationRef <- function() {
#' # Create a synonym reference table for flagged, cleaned dataframe:
#' Data_6Tribes_5yClean <- subset(Data_6Tribes_5y, !is.na(Data_6Tribes_5y$TADA.ResultMeasureValue))
#' Data_6Tribes_5yClean <- TADA_FlagFraction(Data_6Tribes_5yClean, clean = TRUE)
#' Data_6Tribes_5yClean <- TADA_FlagResultUnit(Data_6Tribes_5yClean, clean = "invalid_only")
#' Data_6Tribes_5yClean <- TADA_FlagSpeciation(Data_6Tribes_5yClean, clean = "invalid_only")
#' Data_6Tribes_5yClean <- TADA_FlagResultUnit(Data_6Tribes_5yClean, clean = "suspect_only")
#' Data_6Tribes_5yClean <- TADA_FlagSpeciation(Data_6Tribes_5yClean, clean = "suspect_only")
#' Data_6Tribes_5yClean <- TADA_FlagMethod(Data_6Tribes_5yClean, clean = TRUE)
#' CreateRefTable <- TADA_GetSynonymRef(Data_6Tribes_5yClean)
#'
Expand All @@ -69,20 +69,20 @@ TADA_GetSynonymRef <- function(.data) {
TADA_CheckColumns(.data, expected_cols)

if (!any(c("TADA.MethodSpeciation.Flag", "TADA.SampleFraction.Flag", "TADA.ResultUnit.Flag") %in% names(.data))) {
print("Warning: This dataframe is missing TADA QC flagging columns, indicating that you have not yet run the TADA_FlagResultUnit, TADA_FlagFraction, or TADA_FlagSpeciation functions. It is highly recommended you run these flagging functions and remove Invalid combinations before proceeding to this step.")
print("Warning: This dataframe is missing TADA QC flagging columns, indicating that you have not yet run the TADA_FlagResultUnit, TADA_FlagFraction, or TADA_FlagSpeciation functions. It is highly recommended you run these flagging functions and remove Suspect combinations before proceeding to this step.")
}

# check to see if any invalid data flags exist
# check to see if any suspect data flags exist
check_inv <- .data[, names(.data) %in% c("TADA.MethodSpeciation.Flag", "TADA.SampleFraction.Flag", "TADA.ResultUnit.Flag")]
check_inv <- check_inv %>%
tidyr::pivot_longer(cols = names(check_inv), names_to = "Flag_Column") %>%
dplyr::filter(value == "Invalid")
dplyr::filter(value == "Suspect")

if (dim(check_inv)[1] > 0) {
check_inv <- check_inv %>%
dplyr::group_by(Flag_Column) %>%
dplyr::summarise("Result Count" = length(value))
print("Warning: Your dataframe contains invalid metadata combinations in the following flag columns:")
print("Warning: Your dataframe contains suspect metadata combinations in the following flag columns:")
print(as.data.frame(check_inv))
}

Expand Down
4 changes: 2 additions & 2 deletions R/Transformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
#' # Create a synonym reference table for flagged, cleaned dataframe:
#' Data_6Tribes_5yClean <- subset(Data_6Tribes_5y, !is.na(Data_6Tribes_5y$TADA.ResultMeasureValue))
#' Data_6Tribes_5yClean <- TADA_FlagFraction(Data_6Tribes_5yClean, clean = TRUE)
#' Data_6Tribes_5yClean <- TADA_FlagResultUnit(Data_6Tribes_5yClean, clean = "invalid_only")
#' Data_6Tribes_5yClean <- TADA_FlagSpeciation(Data_6Tribes_5yClean, clean = "invalid_only")
#' Data_6Tribes_5yClean <- TADA_FlagResultUnit(Data_6Tribes_5yClean, clean = "suspect_only")
#' Data_6Tribes_5yClean <- TADA_FlagSpeciation(Data_6Tribes_5yClean, clean = "suspect_only")
#' Data_6Tribes_5yClean <- TADA_FlagMethod(Data_6Tribes_5yClean, clean = TRUE)
#' CreateRefTable <- TADA_GetSynonymRef(Data_6Tribes_5yClean)
#'
Expand Down
35 changes: 23 additions & 12 deletions R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ utils::globalVariables(c(
"QAPPApprovalAgencyName", "QAPPApprovedIndicator",
"ResultDetectionConditionText", "ResultMeasureValue",
"SamplingDesignTypeCode", "Source", "Status", "TADA.ContinuousData.Flag",
"TADA.InvalidCoordinates.Flag", "TADA.PotentialDupRowIDs.Flag", "TADA.QAPPDocAvailable",
"TADA.SuspectCoordinates.Flag", "TADA.PotentialDupRowIDs.Flag", "TADA.QAPPDocAvailable",
"Target.Unit", "Type", "Value.Unit", "TADA.AnalyticalMethod.Flag",
"TADA.MethodSpeciation.Flag", "TADA.ResultUnit.Flag",
"TADA.SampleFraction.Flag", "YearSummarized", "where", "TADA.CharacteristicName",
Expand Down Expand Up @@ -490,7 +490,7 @@ TADA_CheckColumns <- function(.data, expected_cols) {
#'
TADA_ConvertSpecialChars <- function(.data, col, percent.ave = TRUE) {
if (!col %in% names(.data)) {
stop("Invalid column name specified for input dataset.")
stop("Suspect column name specified for input dataset.")
}

# Define new column names
Expand Down Expand Up @@ -621,7 +621,7 @@ TADA_ConvertSpecialChars <- function(.data, col, percent.ave = TRUE) {
#' Substitute Preferred Characteristic Name for Deprecated Names
#'
#' This function uses the WQX Characteristic domain table to substitute
#' deprecated (i.e. retired and/or invalid) Characteristic Names with the new
#' deprecated (i.e. retired and/or suspect) Characteristic Names with the new
#' name in the TADA.CharacteristicName column. TADA_SubstituteDeprecatedChars is
#' run within TADA_AutoClean, which runs within TADA_DataRetreival and (if autoclean = TRUE)
#' in TADA_BigDataRetrieval. Therefore, deprecated characteristic names are
Expand Down Expand Up @@ -756,13 +756,24 @@ TADA_FormatDelimitedString <- function(delimited_string, delimiter = ",") {
#' group.
#'
#' @param .data TADA dataframe OR TADA sites dataframe
#' @param dist_buffer Numeric. The maximum distance (in meters) two sites can be
#' from one another to be considered "nearby" and grouped together.
#'
#' @return Input dataframe with a TADA.MonitoringLocationIdentifier column that indicates
#' the nearby site groups each monitoring location belongs to.
#' @param dist_buffer Numeric. The maximum distance (in meters) two sites can
#' be from one another to be considered "nearby" and grouped together.
#' The default is 100m.
#'
#' @return Input dataframe with a TADA.MonitoringLocationIdentifier column that
#' indicates the nearby site groups each monitoring location belongs to. Grouped
#' sites are concatenated in the TADA.MonitoringLocationIdentifier column
#' (e.g. "USGS-10010025","USGS-10010026" enclosed in square brackets []).
#' This JSON array is the new TADA monitoring location ID for the grouped sites.
#' TADA.MonitoringLocationIdentifier can be leveraged to analyze data from
#' nearby sites together (as the same general location).
#'
#' @export
#'
#' @examples
#' GroupNearbySites_100m <- TADA_FindNearbySites(Data_Nutrients_UT)
#' GroupNearbySites_10m <- TADA_FindNearbySites(Data_Nutrients_UT, dist_buffer = 10)
#'
#'
TADA_FindNearbySites <- function(.data, dist_buffer = 100) {
# check .data is data.frame
Expand Down Expand Up @@ -1081,7 +1092,7 @@ TADA_AggregateMeasurements <- function(.data, grouping_cols = c("ActivityStartDa
#' @param .data A TADA dataframe
#' @param remove_na Boolean, Determines whether to keep TADA.ResultMeasureValues that are NA.
#' Defaults to TRUE.
#' @param clean Boolean. Determines whether to keep the Invalid rows in the dataset following each
#' @param clean Boolean. Determines whether to keep the Suspect rows in the dataset following each
#' flagging function. Defaults to TRUE.
#'
#' @return A TADA dataframe with the following flagging columns:TADA.ResultUnit.Flag,
Expand All @@ -1095,17 +1106,17 @@ TADA_AggregateMeasurements <- function(.data, grouping_cols = c("ActivityStartDa
#' # Run flagging functions, keeping all rows
#' Data_6Tribes_5y_ALL <- TADA_RunKeyFlagFunctions(Data_6Tribes_5y, remove_na = FALSE, clean = FALSE)
#'
#' # Run flagging functions, removing NA's and Invalid rows
#' # Run flagging functions, removing NA's and Suspect rows
#' Data_6Tribes_5y_CLEAN <- TADA_RunKeyFlagFunctions(Data_6Tribes_5y, remove_na = TRUE, clean = TRUE)
TADA_RunKeyFlagFunctions <- function(.data, remove_na = TRUE, clean = TRUE) {
if (remove_na == TRUE) {
.data <- .data %>% dplyr::filter(!is.na(TADA.ResultMeasureValue))
}

if (clean == TRUE) {
.data <- TADA_FlagResultUnit(.data, clean = "invalid_only")
.data <- TADA_FlagResultUnit(.data, clean = "suspect_only")
.data <- TADA_FlagFraction(.data, clean = TRUE)
.data <- TADA_FlagSpeciation(.data, clean = "invalid_only")
.data <- TADA_FlagSpeciation(.data, clean = "suspect_only")
.data <- TADA_FindQCActivities(.data, clean = TRUE)
} else {
.data <- TADA_FlagResultUnit(.data, clean = "none")
Expand Down
4 changes: 2 additions & 2 deletions R/WQXRefTables.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ TADA_GetWQXCharValRef <- function() {
WQXcharValRef <- raw.data %>%
dplyr::mutate(TADA.WQXVal.Flag = dplyr::case_when(
Status %in% notreviewed ~ "Not Reviewed",
Status %in% valid ~ "Valid",
Status %in% invalid ~ "Invalid",
Status %in% valid ~ "Pass",
Status %in% invalid ~ "Suspect",
Status %in% nonstandard ~ "NonStandardized",
Status %in% NA ~ "Not Reviewed",
TRUE ~ as.character("Not Reviewed")
Expand Down
4 changes: 2 additions & 2 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ StringA
StringB
SubstituteDeprecatedChars
SummarizeColumn
SuspectCoordinates
TADA
TADADataRetrieval
TADAGeospatialRefLayers
Expand Down Expand Up @@ -281,9 +282,8 @@ WQXMonitoringLocationTypeName
WQXRefTables
WQXResultDetectionConditionRef
WQXResultUnitConversion
WQXResultUnitCoversion
WQXTargetUnit
WQXUnitConversionFactor
WQXVal
WQXcharValRef
WQXunitRef
WaterSciCon
Expand Down
20 changes: 16 additions & 4 deletions man/TADA_FindNearbySites.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/TADA_FlagAboveThreshold.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 14 additions & 14 deletions man/TADA_FlagCoordinates.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3cd90e2

Please sign in to comment.