### Optionally: specify GitHub Personal Access Token (GITHUB_PAT) ### See here why this might be important for you: ### https://kwb-r.github.io/kwb.pkgbuild/articles/install.html#set-your-github_pat # Sys.setenv(GITHUB_PAT = "mysecret_access_token") # Install package "remotes" from CRAN if (! require("remotes")) { install.packages("remotes", repos = "https://cloud.r-project.org") } # Install KWB package 'kwb.heatsine' from GitHub remotes::install_github("KWB-R/kwb.heatsine")
The sinus fit optimisation works for for daily time series of a surface water and a groundwater monitoring point, which follow the following requirements:
The time series must be provided in a .csv
(comma separated value) file, which follows the naming convention:
temperature_<type>
_<monitoring_id>
.csv
where <type>
needs to be exactly groundwater
or surface-water
. In case any other value for <type>
is chosen the program will not work (e.g SURFACE WATER
). The value of the <monitoring_id>
will be used for labeling the monitoring points in the plots and can be almost any value but must not contain _
(underscore) or " "
(space).
The data structure of each .csv
file is very simple as it contains only the two columns date
(formatted: YYYY-MM-DD
, e.g. 2020-06-23
) and value
(temperature data in degree Celsius) as shown exemplary below:
Please make sure that both columns date
and value
are all lowercase and correctly spelled as the program will not work if this is not the case.
The R package includes the following temporal times series data for testing:
list.files(kwb.heatsine::extdata_file()) #> [1] "temperature_groundwater_TEG343.csv" #> [2] "temperature_groundwater_TEGbaum06.csv" #> [3] "temperature_groundwater_TEGhzk-10.csv" #> [4] "temperature_groundwater_TEGsaat20.csv" #> [5] "temperature_groundwater_TEGwest13.csv" #> [6] "temperature_surface-water_TEGsee-mikrosieb.csv"
In this the example the files temperature_surface-water_TEGsee-mikrosieb.csv
and temperature_groundwater_TEG343.csv
are used for testing the functionality and imported by using the code below:
## Load the R package library(kwb.heatsine) load_temp <- function(base_name) { kwb.heatsine::load_temperature_from_csv( kwb.heatsine::extdata_file(base_name) ) } data_sw <- load_temp("temperature_surface-water_TEGsee-mikrosieb.csv") data_gw <- load_temp("temperature_groundwater_TEGbaum06.csv")
Plot time series for whole time period (moveover on data points to select)
kwb.heatsine::plot_temperature_interactive(data_sw)
kwb.heatsine::plot_temperature_interactive(data_gw)
Reduce time period to a full sinus period which is definde by the user input (date_end
is optional, if not given it is set to ‘date_start’ + 365.25 days).
data_sw_selected <- kwb.heatsine::select_timeperiod( data_sw, date_start = "2015-10-10", date_end = "2016-10-14" ) data_gw_selected <- kwb.heatsine::select_timeperiod( data_gw, date_start = "2015-12-10", date_end = "2017-01-15" )
Plot selected datasets:
kwb.heatsine::plot_temperature_interactive(df = data_sw_selected)
kwb.heatsine::plot_temperature_interactive(df = data_gw_selected)
Sinus fit operation is performed by minimizing the RMSE error between observed and simulated temperature curves.
limits <- c(100, 500) # minimum/maximum period length tolerance <- 0.001 # the desired accuracy () debug <- TRUE # Helper function to do the sinus optimisation do_sinus_optimisation <- function(temp_df) { kwb.heatsine::optimise_sinus_variablePeriod( temp_df = temp_df, opt_limits = limits, opt_tolerance = tolerance, opt_debug = debug ) } sinusfit_sw <- do_sinus_optimisation(temp_df = data_sw_selected) #> Ran opt_func with period length 252.79 days: 6.27 'RMSE' for Temp #> Ran opt_func with period length 347.21 days: 1.89 'RMSE' for Temp #> Ran opt_func with period length 405.57 days: 1.78 'RMSE' for Temp #> Ran opt_func with period length 379.63 days: 1.54 'RMSE' for Temp #> Ran opt_func with period length 379.14 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 366.94 days: 1.55 'RMSE' for Temp #> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 376.33 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 375.25 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 374.59 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 374.18 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.92 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.77 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.67 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.61 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.57 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.55 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.54 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.53 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp #> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp sinusfit_gw <- do_sinus_optimisation(temp_df = data_gw_selected) #> Ran opt_func with period length 252.79 days: 0.64 'RMSE' for Temp #> Ran opt_func with period length 347.21 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 405.57 days: 0.37 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 346.50 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 346.23 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 346.06 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.96 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.89 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.85 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.83 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.82 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.81 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.80 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.80 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp #> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp # Generate data frame with simulated and observed values predictions <- kwb.heatsine::get_predictions( sinusfit_sw = sinusfit_sw, sinusfit_gw = sinusfit_gw, retardation_factor = 1.8 )
kwb.heatsine::plot_prediction_interactive(predictions) #> Warning: `group_by_()` is deprecated as of dplyr 0.7.0. #> Please use `group_by()` instead. #> See vignette('programming') for more help #> This warning is displayed once every 8 hours. #> Call `lifecycle::last_warnings()` to see where this warning was generated.
knitr::kable(predictions$traveltimes)
point_type | surface-water (TEGsee-mikrosieb) | groundwater (TEGbaum06) | traveltime_thermal_days | retardation_factor | traveltime_hydraulic_days |
---|---|---|---|---|---|
turning-point_1 | 2015-10-21 | 2016-02-06 | 107.93081 | 1.8 | 59.96156 |
min | 2016-01-23 | 2016-05-03 | 101.00000 | 1.8 | 56.11111 |
turning-point_2 | 2016-04-25 | 2016-07-28 | 94.06919 | 1.8 | 52.26066 |
max | 2016-07-28 | 2016-10-22 | 86.00000 | 1.8 | 47.77778 |
turning-point_3 | 2016-10-29 | 2017-01-16 | 79.06919 | 1.8 | 43.92733 |
While only RMSE is used for optimising the sinus fit, a number of goodness-of-fit parameters is also calculated and shown in the table below. For details on each of these parameters the use is referred to the function hydroGOF::gof()
which is used for calculating all these values.
knitr::kable(predictions$gof)
type | ME | MAE | MSE | RMSE | NRMSE % | PBIAS % | RSR | rSD | NSE | mNSE | rNSE | d | md | rd | cp | r | R2 | bR2 | KGE | VE |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
surface-water | 0 | 1.24 | 2.33 | 1.53 | 20.1 | 0 | 0.2 | 0.98 | 0.96 | 0.82 | -0.17 | 0.99 | 0.91 | 0.70 | -10.49 | 0.98 | 0.96 | 0.95 | 0.97 | 0.90 |
groundwater | 0 | 0.14 | 0.03 | 0.18 | 19.6 | 0 | 0.2 | 0.98 | 0.96 | 0.82 | 0.96 | 0.99 | 0.91 | 0.99 | -9.53 | 0.98 | 0.96 | 0.96 | 0.97 | 0.99 |
knitr::kable(predictions$paras)
type | period_length | alpha | beta | y0 | a | x0 |
---|---|---|---|---|---|---|
surface-water | 373.5143 | -10.278478 | 2.0304625 | 12.72494 | 10.477113 | 2.946559 |
groundwater | 345.7910 | -1.071289 | 0.8094449 | 11.51617 | 1.342707 | 2.494530 |
type | monitoring_id | label | date | observed | simulated | residuals | simulated_pi_lower | simulated_pi_upper |
---|---|---|---|---|---|---|---|---|
surface-water | TEGsee-mikrosieb | surface-water (TEGsee-mikrosieb) | 2015-10-10 | 13.50958 | 14.75541 | -1.245822 | 11.72751 | 17.78331 |
surface-water | TEGsee-mikrosieb | surface-water (TEGsee-mikrosieb) | 2015-10-11 | 12.88167 | 14.58222 | -1.700557 | 11.55433 | 17.61012 |
surface-water | TEGsee-mikrosieb | surface-water (TEGsee-mikrosieb) | 2015-10-12 | 12.17833 | 14.40852 | -2.230183 | 11.38062 | 17.43642 |
surface-water | TEGsee-mikrosieb | surface-water (TEGsee-mikrosieb) | 2015-10-13 | 11.68292 | 14.23433 | -2.551416 | 11.20643 | 17.26223 |
surface-water | TEGsee-mikrosieb | surface-water (TEGsee-mikrosieb) | 2015-10-14 | 11.23500 | 14.05972 | -2.824722 | 11.03182 | 17.08762 |
surface-water | TEGsee-mikrosieb | surface-water (TEGsee-mikrosieb) | 2015-10-15 | 11.14667 | 13.88473 | -2.738067 | 10.85684 | 16.91263 |
Export results to csv
:
write_to_csv <- function(df, file_base) { readr::write_csv(df, path = file.path( kwb.utils::createDirectory("csv"), paste0(file_base, ".csv") )) } write_to_csv(predictions$data, "sinus-fit_predictions") write_to_csv(predictions$paras, "sinus-fit_parameters") write_to_csv(predictions$gof, "sinus-fit_goodness-of-fit") write_to_csv(predictions$traveltimes, "sinus-fit_traveltimes") write_to_csv(predictions$residuals, "sinus-fit_residuals") ## Export plots: save_to_html <- function(p, file_base) { htmlwidgets::saveWidget(p, paste0(file_base, ".html")) } withr::with_dir(new = kwb.utils::createDirectory("plots"), code = { save_to_html( plot_temperature_interactive(data_sw), "temperature_surface-water_time-series_full" ) save_to_html( plot_temperature_interactive(data_sw_selected), "temperature_surface-water_time-series_selected" ) save_to_html( plot_temperature_interactive(data_gw), "temperature_groundwater_time-series_full" ) save_to_html( plot_temperature_interactive(data_gw_selected), "temperature_groundwater_time-series_selected" ) save_to_html( plot_prediction_interactive(predictions), "temperature_prediction" ) # save_to_html( # plot_residuals_interactive(prediction_df, binwidth = 0.5), # "temperature_prediction_residuals" # ) })