1 Background

USGS streamflow observations are a primary source of hydrologic information and often used for validation and calibration of hydrologic models. Web services have been developed at NWIS and the dataRetrieval R package has emerged to make it easy to get USGS data using R.

This vignette demonstrates how to download USGS streamflow observations using the dataRetrieval package, import modeled streamflow from WRF-Hydro output files using functionality from rwrfhydro, and plot both the observed and modeled time series. Note that there are different ways to read the simulated streamflow depending on which option have been turned on in the hydro.namelist.

1.1 Load packages

Load required packages.

library(dataRetrieval)
library(data.table)
library(ggplot2)
library(foreach)
library(rwrfhydro)
## To check rwrfhydro updates run: CheckForUpdates()

2 Download USGS streamflow observations

Download USGS streamflow observations using a function from the dataRetrieval package.

gageID <- c("01374559", "01374581", "0137462010")
obsDF <- readNWISuv(siteNumbers=gageID,
                    parameterCd="00060",
                    startDate="2011-08-25",
                    endDate="2011-09-03")
head(obsDF)
##   agency_cd  site_no            dateTime X_00060_00000 X_00060_00000_cd
## 1      USGS 01374559 2011-08-25 05:00:00          4.88                A
## 2      USGS 01374559 2011-08-25 05:15:00          4.54                A
## 3      USGS 01374559 2011-08-25 05:30:00          4.71                A
## 4      USGS 01374559 2011-08-25 05:45:00          4.71                A
## 5      USGS 01374559 2011-08-25 06:00:00          4.54                A
## 6      USGS 01374559 2011-08-25 06:15:00          4.54                A
##   tz_cd
## 1   UTC
## 2   UTC
## 3   UTC
## 4   UTC
## 5   UTC
## 6   UTC

Rename columns and create a column for streamflow with units of cubic meters per second.

colnames(obsDF) <- c("agency","site_no","dateTime","streamflow_cfs","quality_flag", "time_zone")
obsDF$streamflow <- obsDF$streamflow_cfs/35.31
head(obsDF)
##   agency  site_no            dateTime streamflow_cfs quality_flag
## 1   USGS 01374559 2011-08-25 05:00:00           4.88            A
## 2   USGS 01374559 2011-08-25 05:15:00           4.54            A
## 3   USGS 01374559 2011-08-25 05:30:00           4.71            A
## 4   USGS 01374559 2011-08-25 05:45:00           4.71            A
## 5   USGS 01374559 2011-08-25 06:00:00           4.54            A
## 6   USGS 01374559 2011-08-25 06:15:00           4.54            A
##   time_zone streamflow
## 1       UTC  0.1382045
## 2       UTC  0.1285755
## 3       UTC  0.1333900
## 4       UTC  0.1333900
## 5       UTC  0.1285755
## 6       UTC  0.1285755

Read in the reference file, and add the feature_id to the observations

referenceFile <- "~/wrf-hydro-training/example_case/Gridded/croton_frxst_pts_csv.csv"
refDF <- read.csv(referenceFile, colClasses = c("integer", "numeric", "numeric", "character", "character"))
print(refDF)
##   FID       LON      LAT STATION    Site_No
## 1   1 -73.75900 41.46811       A   01374559
## 2   2 -73.73707 41.44807       B   01374581
## 3   3 -73.69425 41.41023       C 0137462010
obsDF <- merge(obsDF,  refDF[, c("FID", "Site_No")], by.x = "site_no", by.y = "Site_No")
obsDF$feature_id <- obsDF$FID  ## adding the feature_id

3 Import WRF-Hydro simulated streamflow at the gage location

One can read the simulated streamflow values different ways depending on what output files are turned on in the hydro.namelist. The easiest option is to read it from frxst_pts_out.txt file.

# specify the path to the frxst_pts_out.txt file
modelOutputPath <- "~/wrf-hydro-training/output/lesson4/run_gridded_baseline/frxst_pts_out.txt"

simQ <- read.csv(modelOutputPath, 
                 col.names =  c("TimeFromStart", "Time", "station", "longitude", "latitude", "q_cms", "q_cfs", "stage"),
                 colClasses = c("integer", "character", "character", rep("numeric", 5)), header = FALSE)
head(simQ)
##   TimeFromStart                Time      station longitude latitude q_cms
## 1          3600 2011-08-26 01:00:00            3 -73.69508 41.41042 2.879
## 2          3600 2011-08-26 01:00:00            2 -73.73784 41.44910 1.341
## 3          3600 2011-08-26 01:00:00            1 -73.75716 41.46555 0.699
## 4          7200 2011-08-26 02:00:00            3 -73.69508 41.41042 2.882
## 5          7200 2011-08-26 02:00:00            2 -73.73784 41.44910 1.328
## 6          7200 2011-08-26 02:00:00            1 -73.75716 41.46555 0.693
##     q_cfs stage
## 1 101.678 0.457
## 2  47.343 0.467
## 3  24.690 0.437
## 4 101.766 0.457
## 5  46.892 0.465
## 6  24.490 0.435

The other option is to read it from the CHANOBS files like what you did in the training lessons.

chnObsFiles <- list.files("~/wrf-hydro-training/output/lesson4/run_gridded_baseline/",
                          glob2rx("2011*CHANOBS*"), full.names = TRUE)

simQ <- foreach(f = chnObsFiles, .combine = rbind.data.frame) %do% {
  a <- rwrfhydro::GetNcdfFile(f, variables = c("feature_id", "streamflow"), quiet = TRUE)
  a$dateTime <- as.POSIXct(substr(basename(f), 1, 12), format = "%Y%m%d%H%M", tz = "UTC")
  return(a)
}
head(simQ)
##   feature_id streamflow            dateTime
## 1          3  2.8792028 2011-08-26 01:00:00
## 2          2  1.3406059 2011-08-26 01:00:00
## 3          1  0.6991324 2011-08-26 01:00:00
## 4          3  2.8816965 2011-08-26 02:00:00
## 5          2  1.3278302 2011-08-26 02:00:00
## 6          1  0.6934885 2011-08-26 02:00:00

You could also read this information from the CHRTOUT_DOMAIN files and CHRTOUT_GRID, refer to the manual page of ReadChrtout

3.1 Plot hydrographs

Plot observed and simulated hydrographs using ggplot2.

obsDF$run <- "Observation"
simQ$run <- "Gridded Baseline"

merged <- rbind(simQ, obsDF[, names(simQ)])

ggplot(data = merged) + geom_line(aes(dateTime, streamflow, color = run)) + facet_wrap(~feature_id, ncol = 1)

4 Import WRF-Hydro simulated streamflow at the gage location for NWM

The simulated streamflow can be found in CHANOBS, CHRTOUT_DOMAN and CHRTOUT_GRID files. Here is an example on how to read it from CHANOBS files.

chnObsFiles <- list.files("~/wrf-hydro-training/output/lesson5/run_nwm",
                          glob2rx("2011*CHANOBS*"), full.names = TRUE)

simQnwm <- foreach(f = chnObsFiles, .combine = rbind.data.frame) %do% {
  a <- rwrfhydro::GetNcdfFile(f, variables = c("feature_id", "streamflow"), quiet = TRUE)
  a$dateTime <- as.POSIXct(substr(basename(f), 1, 12), format = "%Y%m%d%H%M", tz = "UTC")
  return(a)
}

The relationship between the links/feature_id and the gage ID can be found in the routelink file.

routelinkFile <- "~/wrf-hydro-training/example_case/NWM/DOMAIN/Route_Link.nc"

rtlink <- rwrfhydro::GetNcdfFile(routelinkFile, quiet = TRUE)
head(rtlink)
##     BtmWdth     ChSlp Kchan Length MusK MusX NHDWaterbodyComID Qi    So
## 1 1.9666173 0.6912134     0   2233 3600  0.2             -9999  0 0.089
## 2 2.8226683 0.5893509     0   2339 3600  0.2             -9999  0 0.028
## 3 0.9797607 0.9399659     0    431 3600  0.2             -9999  0 0.057
## 4 1.5312428 0.7718934     0   1829 3600  0.2             -9999  0 0.071
## 5 0.1349000 2.2542832     0     43 3600  0.2             -9999  0 0.001
## 6 0.8428194 1.0045201     0    224 3600  0.2             -9999  0 0.001
##    TopWdth TopWdthCC    alt ascendingIndex from           gages      lat
## 1 3.277696  9.833087 283.30            126    0                 41.51133
## 2 4.704447 14.113342 159.00            118    0                 41.50933
## 3 1.632935  4.898804 288.57            119    0                 41.50097
## 4 2.552071  7.656214 365.63            134    0                 41.47364
## 5 0.224800  0.674400 235.79            128    0                 41.50355
## 6 1.404699  4.214097 295.35            130    0                 41.52832
##      link       lon    n  nCC order      to
## 1 6212272 -73.78367 0.06 0.12     1 6212262
## 2 6212276 -73.79778 0.06 0.12     1 6212262
## 3 6212914 -73.82017 0.06 0.12     1 6215698
## 4 6215748 -73.83207 0.06 0.12     1 6215730
## 5 6224248 -73.70467 0.06 0.12     1 6234638
## 6 6224454 -73.68591 0.06 0.12     1 6224206
# let s subset to only those that have a gage on them
rtlink <- subset(rtlink, trimws(gages) != "")
rtlink
##      BtmWdth     ChSlp Kchan Length MusK MusX NHDWaterbodyComID Qi    So
## 143 4.605017 0.4748894     0    397 3600  0.2             -9999  0 0.001
## 144 3.404725 0.5425658     0   2468 3600  0.2             -9999  0 0.011
## 161 5.819620 0.4282930     0    505 3600  0.2             -9999  0 0.002
## 180 7.312880 0.3872389     0   3166 3600  0.2             -9999  0 0.015
##       TopWdth TopWdthCC    alt ascendingIndex from           gages
## 143  7.675028  23.02508 180.48             39    0        01374559
## 144  5.674541  17.02362 183.15             58    0        01374598
## 161  9.699367  29.09810 164.32             44    0        01374581
## 180 12.188132  36.56440 147.61             61    0      0137462010
##          lat    link       lon     n  nCC order      to
## 143 41.47079 6226948 -73.76059 0.055 0.11     3 6228462
## 144 41.47361 6226964 -73.69085 0.060 0.12     2 6226998
## 161 41.44981 6227008 -73.73565 0.055 0.11     4 6227012
## 180 41.40192 6227150 -73.68741 0.055 0.11     4       0

Now you can merge the name of the gage to th simulated streamflow.

simQnwm <- merge(simQnwm, rtlink[, c("link", "gages")], by.x = "feature_id", by.y = "link")

Concatenate the observation and plot it.

names(simQnwm)[which(names(simQnwm) == "gages")] <- "site_no"
simQnwm$run <- "NWM"

merged <- rbind(simQnwm, obsDF[, names(simQnwm)])

ggplot(data = merged) + geom_line(aes(dateTime, streamflow, color = run)) + facet_wrap(~trimws(site_no), ncol = 1)