Installation

### Optionally: specify GitHub Personal Access Token (GITHUB_PAT)
### See here why this might be important for you:
### https://kwb-r.github.io/kwb.pkgbuild/articles/install.html#set-your-github_pat

# Sys.setenv(GITHUB_PAT = "mysecret_access_token")

# Install package "remotes" from CRAN
if (! require("remotes")) {
  install.packages("remotes", repos = "https://cloud.r-project.org")
}

# Install KWB package 'kwb.heatsine' from GitHub
remotes::install_github("KWB-R/kwb.heatsine")

Data prerequisites

The sinus fit optimisation works for for daily time series of a surface water and a groundwater monitoring point, which follow the following requirements:

File naming

The time series must be provided in a .csv (comma separated value) file, which follows the naming convention:

temperature_<type>_<monitoring_id>.csv

where <type> needs to be exactly groundwater or surface-water. In case any other value for <type> is chosen the program will not work (e.g SURFACE WATER). The value of the <monitoring_id> will be used for labeling the monitoring points in the plots and can be almost any value but must not contain _ (underscore) or " " (space).

Data structure

The data structure of each .csv file is very simple as it contains only the two columns date (formatted: YYYY-MM-DD, e.g. 2020-06-23) and value (temperature data in degree Celsius) as shown exemplary below:

date,value
2020-06-23,12.3
2020-06-24,12.3

Please make sure that both columns date and value are all lowercase and correctly spelled as the program will not work if this is not the case.

Data preparation

Import

The R package includes the following temporal times series data for testing:

list.files(kwb.heatsine::extdata_file())
#> [1] "temperature_groundwater_Txxbxxxx6.csv"         
#> [2] "temperature_groundwater_Txxhxxx10.csv"         
#> [3] "temperature_groundwater_Txxsxxx20.csv"         
#> [4] "temperature_groundwater_Txxwxxx13.csv"         
#> [5] "temperature_groundwater_Txxxx3.csv"            
#> [6] "temperature_surface-water_Txxsxx-mxxxxsxxx.csv"

In this the example the files temperature_surface-water_Txxsxx-mxxxxsxxx.csv and temperature_groundwater_Txxbxxxx6.csv are used for testing the functionality and imported by using the code below:

## Load the R package
library(kwb.heatsine)

load_temp <- function(base_name) {
  kwb.heatsine::load_temperature_from_csv(
    kwb.heatsine::extdata_file(base_name)
  )
}

data_sw <- load_temp("temperature_surface-water_Txxsxx-mxxxxsxxx.csv")
data_gw <- load_temp("temperature_groundwater_Txxbxxxx6.csv")

Visualise

Plot time series for whole time period (moveover on data points to select)

Surface water

kwb.heatsine::plot_temperature_interactive(data_sw)

Groundwater

kwb.heatsine::plot_temperature_interactive(data_gw)

Select

Reduce time period to a full sinus period which is definde by the user input (date_end is optional, if not given it is set to ‘date_start’ + 365.25 days).

data_sw_selected <- kwb.heatsine::select_timeperiod(
  data_sw, 
  date_start = "2015-10-10",
  date_end = "2016-10-14"
)

data_gw_selected <- kwb.heatsine::select_timeperiod(
  data_gw,
  date_start = "2015-12-10",
  date_end = "2017-01-15"
)

Plot selected datasets:

Surface Water (selected)

kwb.heatsine::plot_temperature_interactive(df = data_sw_selected)

Grundwater (selected)

kwb.heatsine::plot_temperature_interactive(df = data_gw_selected)

Sinus optimisation

Run

Sinus fit operation is performed by minimizing the RMSE error between observed and simulated temperature curves.

limits <- c(100, 500) # minimum/maximum period length
tolerance <- 0.001 # the desired accuracy ()
debug <- TRUE

# Helper function to do the sinus optimisation
do_sinus_optimisation <- function(temp_df) {
  kwb.heatsine::optimise_sinus_variablePeriod(
    temp_df = temp_df,
    opt_limits = limits,
    opt_tolerance = tolerance,
    opt_debug = debug
  )
}

sinusfit_sw <- do_sinus_optimisation(temp_df = data_sw_selected)
#> Ran opt_func with period length 252.79 days: 6.27 'RMSE' for Temp
#> Ran opt_func with period length 347.21 days: 1.89 'RMSE' for Temp
#> Ran opt_func with period length 405.57 days: 1.78 'RMSE' for Temp
#> Ran opt_func with period length 379.63 days: 1.54 'RMSE' for Temp
#> Ran opt_func with period length 379.14 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 366.94 days: 1.55 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 376.33 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 375.25 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 374.59 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 374.18 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.92 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.77 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.67 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.61 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.57 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.55 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.54 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.53 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
sinusfit_gw <- do_sinus_optimisation(temp_df = data_gw_selected)
#> Ran opt_func with period length 252.79 days: 0.64 'RMSE' for Temp
#> Ran opt_func with period length 347.21 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 405.57 days: 0.37 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 346.50 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 346.23 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 346.06 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.96 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.89 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.85 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.83 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.82 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.81 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.80 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.80 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp
#> Ran opt_func with period length 345.79 days: 0.18 'RMSE' for Temp

# Generate data frame with simulated and observed values
predictions <- kwb.heatsine::get_predictions(
  sinusfit_sw = sinusfit_sw, 
  sinusfit_gw = sinusfit_gw, 
  retardation_factor = 1.8
)

Analyse

Interactive Plot

kwb.heatsine::plot_prediction_interactive(predictions)
#> Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
#> Please use `group_by()` instead.
#> See vignette('programming') for more help
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_warnings()` to see where this warning was generated.

Table: Traveltimes

knitr::kable(predictions$traveltimes)
point_type surface-water (Txxsxx-mxxxxsxxx) groundwater (Txxbxxxx6) traveltime_thermal_days retardation_factor traveltime_hydraulic_days
turning-point_1 2015-10-21 2016-02-06 107.93081 1.8 59.96156
min 2016-01-23 2016-05-03 101.00000 1.8 56.11111
turning-point_2 2016-04-25 2016-07-28 94.06919 1.8 52.26066
max 2016-07-28 2016-10-22 86.00000 1.8 47.77778
turning-point_3 2016-10-29 2017-01-16 79.06919 1.8 43.92733

Table: Goodness of Fit

While only RMSE is used for optimising the sinus fit, a number of goodness-of-fit parameters is also calculated and shown in the table below. For details on each of these parameters the use is referred to the function hydroGOF::gof() which is used for calculating all these values.

knitr::kable(predictions$gof)
type ME MAE MSE RMSE NRMSE % PBIAS % RSR rSD NSE mNSE rNSE d md rd cp r R2 bR2 KGE VE
surface-water 0 1.24 2.33 1.53 20.1 0 0.2 0.98 0.96 0.82 -0.17 0.99 0.91 0.70 -10.49 0.98 0.96 0.95 0.97 0.90
groundwater 0 0.14 0.03 0.18 19.6 0 0.2 0.98 0.96 0.82 0.96 0.99 0.91 0.99 -9.53 0.98 0.96 0.96 0.97 0.99

Table: Optimisation Parameters

knitr::kable(predictions$paras)
type period_length alpha beta y0 a x0
surface-water 373.5143 -10.278478 2.0304625 12.72494 10.477113 2.946559
groundwater 345.7910 -1.071289 0.8094449 11.51617 1.342707 2.494530

Table: Data

# only first entries
knitr::kable(head(predictions$data))
type monitoring_id label date observed simulated residuals simulated_pi_lower simulated_pi_upper
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-10 13.50958 14.75541 -1.245822 11.72751 17.78331
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-11 12.88167 14.58222 -1.700557 11.55433 17.61012
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-12 12.17833 14.40852 -2.230183 11.38062 17.43642
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-13 11.68292 14.23433 -2.551416 11.20643 17.26223
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-14 11.23500 14.05972 -2.824722 11.03182 17.08762
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-15 11.14667 13.88473 -2.738067 10.85684 16.91263

Export

Export results to csv:

write_to_csv <- function(df, file_base) {
  readr::write_csv(df, path = file.path(
    kwb.utils::createDirectory("csv"), paste0(file_base, ".csv")
  ))
}

write_to_csv(predictions$data, "sinus-fit_predictions")
write_to_csv(predictions$paras, "sinus-fit_parameters")
write_to_csv(predictions$gof, "sinus-fit_goodness-of-fit")
write_to_csv(predictions$traveltimes, "sinus-fit_traveltimes")
write_to_csv(predictions$residuals, "sinus-fit_residuals")

## Export plots:

save_to_html <- function(p, file_base) {
  htmlwidgets::saveWidget(p, paste0(file_base, ".html"))
}

withr::with_dir(new = kwb.utils::createDirectory("plots"), code = {
  
  save_to_html(
    plot_temperature_interactive(data_sw), 
    "temperature_surface-water_time-series_full"
  )
  
  save_to_html(
    plot_temperature_interactive(data_sw_selected),
    "temperature_surface-water_time-series_selected"
  )
  
  save_to_html(
    plot_temperature_interactive(data_gw), 
    "temperature_groundwater_time-series_full"
  )
  
  save_to_html(
    plot_temperature_interactive(data_gw_selected),
    "temperature_groundwater_time-series_selected"
  )
  
  save_to_html(
    plot_prediction_interactive(predictions), 
    "temperature_prediction"
  )
  
  # save_to_html(
  #   plot_residuals_interactive(prediction_df, binwidth = 0.5),
  #   "temperature_prediction_residuals"
  # )
})