diff --git a/.Rbuildignore b/.Rbuildignore index bb231a66a..e2018abba 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -73,3 +73,7 @@ vignettes/Reference_Lists.Rmd environment.yml ^\.positai$ ^\.claude$ +requirements.txt +^\.venv$ +.env + diff --git a/.gitignore b/.gitignore index 892ca673e..684336f1a 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ vignettes/*.R /.quarto/ **/*.quarto_ipynb .positai +/.venv/ +/.env/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3dfcf3492..afbf6bea1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,7 +8,6 @@ workflow: default: tags: - chs-shared - - dind stages: - build @@ -36,9 +35,12 @@ variables: build-image: stage: build cache: [] - image: ${DEVOPS_REGISTRY}usgs/docker:20 + tags: + - chs-shared + - dind + image: ${DEVOPS_REGISTRY}usgs/docker:29 services: - - name: ${DEVOPS_REGISTRY}usgs/docker:20-dind + - name: ${DEVOPS_REGISTRY}usgs/docker:29-dind alias: docker script: - echo ${CI_REGISTRY_PASSWORD} | docker login -u ${CI_REGISTRY_USER} --password-stdin $CI_REGISTRY diff --git a/DESCRIPTION b/DESCRIPTION index e7ad2af72..f9ba7fdf4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data -Version: 2.7.24.9001 +Version: 2.7.25 Authors@R: c( person("Laura", "DeCicco", role = c("aut","cre"), email = "ldecicco@usgs.gov", diff --git a/NEWS b/NEWS index 7daff6a98..4ea55cd3d 100644 --- a/NEWS +++ b/NEWS @@ -17,7 +17,10 @@ Because it is impossible to tell if they intended Dates or POSIX, we cannot know for sure and therefore could add incorrect filters to the query. * The "id" column that comes back from read_waterdata_fiel_meta was changed to field_measurements_series_id to match the expectation of `read_waterdata_field_measurements` - +* New argument added to read_waterdata_stats_por: "normal_type" accepts +"DOY" and "MOY" +* New argument added to read_waterdata_stats_daterange: "interval_type" accepts +"M" (month), "CY" (calendar year), and "WY" (water year). dataRetrieval 2.7.24 diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index c0469f64f..937f80b0f 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -67,16 +67,6 @@ construct_api_requests <- function( skipGeometry = full_list[["skipGeometry"]] ) - time_periods <- c( - "last_modified", - "datetime", - "time", - "begin", - "end", - "begin_utc", - "end_utc" - ) - full_list <- switch_arg_id( full_list, id_name = output_id, @@ -655,3 +645,14 @@ add_api_token <- function(req) { } req } + +# Treat these columns as time: +time_periods <- c( + "last_modified", + "datetime", + "time", + "begin", + "end", + "begin_utc", + "end_utc" +) diff --git a/R/read_waterdata_combined_meta.R b/R/read_waterdata_combined_meta.R index beb3f1189..de173e1db 100644 --- a/R/read_waterdata_combined_meta.R +++ b/R/read_waterdata_combined_meta.R @@ -121,6 +121,14 @@ #' monitoring_location_id = hucs$monitoring_location_id #' ) #' +#' # Query for instantaneous gage height data for a site in Iowa +#' sites_inst <- read_waterdata_combined_meta( +#' monitoring_location_id = "USGS-05418400", +#' parameter_code = "00065" +#' ) +#' +#' # parse individual thresholds lists: +#' threshold_1 <- jsonlite::fromJSON(sites_inst$thresholds[3]) #' #' } read_waterdata_combined_meta <- function( diff --git a/R/read_waterdata_peaks.R b/R/read_waterdata_peaks.R index 3837a4c9a..b647432b2 100644 --- a/R/read_waterdata_peaks.R +++ b/R/read_waterdata_peaks.R @@ -27,6 +27,16 @@ #' `r dataRetrieval:::get_properties_for_docs("peaks", "peak_id")`. #' The default (`NA`) will return all columns of the data. #' +#' @param allow_incomplete_dates Specifically in the peaks data, exact peak dates +#' are not always known. Sometimes peaks are known just for the year, sometimes +#' they are known to the year and month, and and sometimes to the exact date. +#' This argument determines if incomplete dates + fake month/day values are +#' allowed in the "time" column so that it can be a Date object (`TRUE`), or whether +#' to use only the available year, month, day to get a character value (`FALSE`). +#' Default is `FALSE`. If set to `FALSE` but all dates are +#' complete, the "time" column will be returned as a Date object. +#' If this argument is set to `TRUE`, fake days or months are injected into the +#' "time" column. #' @inheritParams check_arguments_api #' @inheritParams check_arguments_non_api #' @@ -45,6 +55,10 @@ #' monitoring_location_id = wi_peaks$monitoring_location_id[1], #' parameter_code = "00060") #' +#' incomplete_dates <- read_waterdata_peaks( +#' monitoring_location_id = "USGS-06334330", +#' parameter_code = "00060") +#' #' } read_waterdata_peaks <- function( monitoring_location_id = NA_character_, @@ -64,6 +78,7 @@ read_waterdata_peaks <- function( time = NA_character_, bbox = NA, ..., + allow_incomplete_dates = FALSE, convertType = getOption("dataRetrieval.convertType"), no_paging = getOption("dataRetrieval.no_paging"), chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), @@ -75,7 +90,27 @@ read_waterdata_peaks <- function( rlang::check_dots_empty() args <- mget(names(formals())) + args[["allow_incomplete_dates"]] <- NULL return_list <- get_ogc_data(args, output_id, service) + if (anyNA(return_list[, c("year", "month", "day")])) { + if (allow_incomplete_dates) { + warning("Incomplete dates are included in time column.") + } else { + parse_time <- as.character(return_list$year) + parse_time[!is.na(return_list$month)] <- paste( + parse_time[!is.na(return_list$month)], + zeroPad(return_list$month[!is.na(return_list$month)], 2), + sep = "-" + ) + parse_time[!is.na(return_list$day)] <- paste( + parse_time[!is.na(return_list$day)], + zeroPad(return_list$day[!is.na(return_list$day)], 2), + sep = "-" + ) + return_list$time <- parse_time + } + } + return(return_list) } diff --git a/R/rejigger_cols.R b/R/rejigger_cols.R index acde9bfeb..70cf651b6 100644 --- a/R/rejigger_cols.R +++ b/R/rejigger_cols.R @@ -64,15 +64,17 @@ rejigger_cols <- function(df, properties, output_id) { #' cleanup_cols <- function(df, service) { if ("time" %in% names(df)) { - if (service == "daily") { + if (service %in% c("daily", "peaks")) { df$time <- as.Date(df$time) } else { attr(df$time, "tzone") <- "UTC" } } - if ("last_modified" %in% names(df)) { - attr(df$last_modified, "tzone") <- "UTC" + for (time_period_columns in time_periods) { + if (time_period_columns %in% names(df)) { + attr(df[[time_period_columns]], "tzone") <- "UTC" + } } df diff --git a/docker/Dockerfile b/docker/Dockerfile index 93422fde0..34cf659fb 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,8 +1,4 @@ -FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.5 - -# Disable the annoying bell on WSL2 -RUN sed -i 's/^# set bell-style none$/set bell-style none/' /etc/inputrc -RUN echo 'set visualbell' >> /root/.vimrc +FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.6 # Change the name of this environment to something which pleases you, if you # so please. But the name will not be relevant for most cases, as reticulate diff --git a/inst/CITATION b/inst/CITATION index 0a45adf0b..a77feb353 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -37,9 +37,9 @@ bibentry(bibtype = "Manual", title = "dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services", publisher = "U.S. Geological Survey", address="Reston, VA", - version = "2.7.24", + version = "2.7.25", institution = "U.S. Geological Survey", year = 2026, doi = "10.5066/P9X4L3GE", - textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., Blodgett, D.L., Hinman, E.D., Zemmels, J., 2026, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.24, doi:10.5066/P9X4L3GE" + textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., Blodgett, D.L., Hinman, E.D., Zemmels, J., 2026, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.25, doi:10.5066/P9X4L3GE" ) diff --git a/man/get_nldi_sources.Rd b/man/get_nldi_sources.Rd index ceabebd11..928271eff 100644 --- a/man/get_nldi_sources.Rd +++ b/man/get_nldi_sources.Rd @@ -4,7 +4,7 @@ \alias{get_nldi_sources} \title{Get current NLDI offerings} \usage{ -get_nldi_sources(url = pkg.env$nldi_base) +get_nldi_sources(url = getOption("dataRetrieval.nldi_base")) } \arguments{ \item{url}{URL for NLDI sources. Default is supplied by package environment.} diff --git a/man/read_waterdata_combined_meta.Rd b/man/read_waterdata_combined_meta.Rd index efab47c84..4e7a3519a 100644 --- a/man/read_waterdata_combined_meta.Rd +++ b/man/read_waterdata_combined_meta.Rd @@ -336,6 +336,14 @@ site_list <- read_waterdata_combined_meta( monitoring_location_id = hucs$monitoring_location_id ) +# Query for instantaneous gage height data for a site in Iowa +sites_inst <- read_waterdata_combined_meta( + monitoring_location_id = "USGS-05418400", + parameter_code = "00065" +) + +# parse individual thresholds lists: +threshold_1 <- jsonlite::fromJSON(sites_inst$thresholds[3]) } \dontshow{\}) # examplesIf} diff --git a/man/read_waterdata_peaks.Rd b/man/read_waterdata_peaks.Rd index b809a24c6..0009702d3 100644 --- a/man/read_waterdata_peaks.Rd +++ b/man/read_waterdata_peaks.Rd @@ -22,6 +22,7 @@ read_waterdata_peaks( time = NA_character_, bbox = NA, ..., + allow_incomplete_dates = FALSE, convertType = getOption("dataRetrieval.convertType"), no_paging = getOption("dataRetrieval.no_paging"), chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), @@ -103,6 +104,17 @@ Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{...}{Not used. Included to help differentiate official Water Data API arguments from more seldom used, optional dataRetrieval-specific arguments.} +\item{allow_incomplete_dates}{Specifically in the peaks data, exact peak dates +are not always known. Sometimes peaks are known just for the year, sometimes +they are known to the year and month, and and sometimes to the exact date. +This argument determines if incomplete dates + fake month/day values are +allowed in the "time" column so that it can be a Date object (\code{TRUE}), or whether +to use only the available year, month, day to get a character value (\code{FALSE}). +Default is \code{FALSE}. If set to \code{FALSE} but all dates are +complete, the "time" column will be returned as a Date object. +If this argument is set to \code{TRUE}, fake days or months are injected into the +"time" column.} + \item{convertType}{logical, defaults to TRUE. If \code{TRUE}, the function will convert the data to dates, any qualifiers to string vector and reorder the returned data frame.} @@ -163,6 +175,10 @@ dv_data_sf <- read_waterdata_peaks( monitoring_location_id = wi_peaks$monitoring_location_id[1], parameter_code = "00060") +incomplete_dates <- read_waterdata_peaks( + monitoring_location_id = "USGS-06334330", + parameter_code = "00060") + } \dontshow{\}) # examplesIf} } diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index 5981d5c52..990c450a4 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -201,7 +201,7 @@ or ```{bash} #| echo: true #| eval: false -conda -c conda-forge install dataretrieval +conda install conda-forge::dataretrieval ``` Then each time you open Python, you'll need to load the library: @@ -734,8 +734,6 @@ qw_data <- read_waterdata_samples( ncol(qw_data) ``` -R generates a few POSIXct columns to combine date, time, timezone information. - ### Python ```{python} @@ -1323,8 +1321,10 @@ ts_available <- read_waterdata_ts_meta(monitoring_location_id = "USGS-04183500") ### Python ```{python} -#| eval: !expr evaluate_python -ts_avail, ts_me = waterdata.get_time_series_metadata(monitoring_location_id="USGS-04183500") +#| eval: false +ts_avail, ts_me = waterdata.get_time_series_metadata( + monitoring_location_id="USGS-04183500" +) ``` ::: diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index f80367aa3..771b61091 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -157,7 +157,7 @@ pip install dataretrieval ```{bash} #| echo: true #| eval: false -conda -c conda-forge install dataretrieval +conda install conda-forge::dataretrieval ``` Then each time you open Python, you'll need to load the library: diff --git a/vignettes/Contributing.Rmd b/vignettes/Contributing.Rmd index 823be0d58..1c528253f 100644 --- a/vignettes/Contributing.Rmd +++ b/vignettes/Contributing.Rmd @@ -400,6 +400,31 @@ conda activate pyclass ``` The slides will use the R package `reticulate` to manage flipping back and forth between R and Python. To help `reticulate` know where Python is installed, you will need to add an envionmnental variable to your .Renviorn file "RETICULATE_PYTHON". Run `usethis::edit_r_environ()`, then add the path to your Python installation, and restart R. RStudio can render both the R and Python in the Quarto slides. However, if you want to do troubleshooting on individual code chunks, you might want to switch to Positron which allows seamless transition between R and Python consoles. +Let's say you need to update to a new version of dataretrieval: + +1. Open the Miniforge prompt + +2. Navigate to dataRetrieval directory + +3. Activate pyclass: +``` +conda activate pyclass +``` + +4. Update package: +``` +conda install conda-forge::dataretrieval +``` + +5. Restart your python session. + + +Let's say you want to update all packages. In step 4 above, run: +``` +mamba update +``` + + # References