Install R Package
# Enable repository from kwb-r
options(repos = c(
kwbr = 'https://kwb-r.r-universe.dev',
CRAN = 'https://cloud.r-project.org'))
# Download and install kwb.impetus in R
install.packages('kwb.impetus')
Define Helper Functions
library(kwb.impetus)
get_master <- function(stations_df) {
if("Betreiber" %in% names(stations_df)) {
stations_df <- stations_df %>%
dplyr::filter(.data$Betreiber == "Land Berlin")
}
urls <- stations_df %>% dplyr::pull(.data$stammdaten_link)
wasserportal::get_wasserportal_masters_data(master_urls = urls) %>%
dplyr::rename(Messstellennummer = .data$Nummer) %>%
dplyr::mutate(Messstellennummer = as.double(.data$Messstellennummer))
}
get_data_name <- function(para_name) {
prefix <- ""
if(startsWith(para_name, "surface")) {
prefix <- "daily_"
}
paste0(prefix, para_name)
}
get_master_name <- function(para_name) {
paste0("stations_", para_name)
}
Surface Water
stations <- wasserportal::get_stations()
variables <- wasserportal::get_surfacewater_variables()
variables <- variables[1:2]
# Downloads pre-compiled data (daily updated at 5am UTC)
# from https://kwb-r.github.io/wasserportal/daily_surface_water.<parameter_name>.csv
# see also: https://kwb-r.github.io/wasserportal/articles/surface-water.html
overview_names <- names(stations$overview_list)
wp_master <- wasserportal::wp_masters_data_to_list(overview_names)
wp_data <- wasserportal::wp_timeseries_data_to_list(overview_names)
### Alternatively: download latest data from Wasserportal Berlin (takes a while)
#wp_sw_data <- wasserportal::get_daily_surfacewater_data(stations, variables)
#wp_gw_data <- wasserportal::get_groundwater_data(stations)
#wp_data <- c(wp_sw_data, wp_gw_data)
Surface Water Level
para_name <- "surface-water_water-level"
surfacewater_level_master <- wp_data[[get_data_name(para_name)]] %>%
dplyr::filter(.data$Parameter == "Wasserstand") %>%
dplyr::mutate(Datum = as.Date(.data$Datum),
Year = as.integer(format(.data$Datum, "%Y")),
Decade = kwb.impetus::floor_decade(.data$Year),
Decade_Label = sprintf("%d - %d",
.data$Decade,
.data$Decade + 9),
Month = as.factor(format(.data$Datum, "%m"))) %>%
dplyr::left_join(get_master(wp_master[[get_master_name(para_name)]])) %>%
dplyr::mutate(Tagesmittelwert_Pegelstand_mNN = as.numeric(.data$Pegelnullpunkt_m) + .data$Tagesmittelwert/100) %>%
### remove -777 for messstellennummer 5867000 (few values in 2000) resulted by step above
dplyr::filter(.data$Tagesmittelwert_Pegelstand_mNN != -777) %>%
dplyr::select(- .data$Pegelnullpunkt_m) %>%
dplyr::mutate(Label = sprintf("%s (%s, id: %s, fluss-km: %2.2f)",
.data$Name,
.data$Gewaesser,
.data$Messstellennummer,
as.numeric(.data$Flusskilometer)))
surfacewater_level_master_agg <- surfacewater_level_master %>%
dplyr::group_by(.data$Messstellennummer,
.data$Decade,
.data$Decade_Label,
.data$Month,
.data$Label) %>%
dplyr::summarise(Monatsmittelwert_Pegelstand_mNN = mean(.data$Tagesmittelwert_Pegelstand_mNN),
q05 = as.numeric(quantile(.data$Tagesmittelwert_Pegelstand_mNN, probs = 0.05)),
q95 = as.numeric(quantile(.data$Tagesmittelwert_Pegelstand_mNN, probs = 0.95)),
)
para_name_export <- "surface-water_level"
readr::write_csv(surfacewater_level_master_agg, sprintf("%s.csv", para_name_export ))
decades <- tibble::tibble(names = unique(surfacewater_level_master_agg$Decade_Label)[order(unique(surfacewater_level_master_agg$Decade_Label))],
values = c('darkblue', 'blue', 'darkgreen', 'lightgreen', 'orange', 'red'))
kwb.utils::preparePdf(sprintf("%s.pdf", para_name_export),
landscape = TRUE,
width.cm = 29.7,
height.cm = 21)
ids <- unique(surfacewater_level_master_agg$Messstellennummer)
for(id in ids) {
p1 <- surfacewater_level_master_agg %>%
dplyr::filter(.data$Messstellennummer == id)
p2 <- surfacewater_level_master %>%
dplyr::filter(.data$Year == max(.data$Year)) %>%
dplyr::group_by(.data$Messstellennummer,
.data$Year,
.data$Month,
.data$Label) %>%
dplyr::summarise(Monatsmittelwert_Pegelstand_mNN = mean(.data$Tagesmittelwert_Pegelstand_mNN),
q05 = quantile(.data$Tagesmittelwert_Pegelstand_mNN, probs = 0.05),
q95 = quantile(.data$Tagesmittelwert_Pegelstand_mNN, probs = 0.95)) %>%
dplyr::filter(.data$Messstellennummer == id)
n_decades <- length(unique(p1$Decade_Label))
gg <- p1 %>%
ggplot2::ggplot(mapping = ggplot2::aes(x = as.integer(.data$Month),
y = .data$Monatsmittelwert_Pegelstand_mNN,
col = as.factor(.data$Decade_Label))) +
#ggplot2::geom_point(alpha = 0.5) +
ggplot2::geom_ribbon(ggplot2::aes_string(ymin = "q05",
ymax = "q95",
fill = "Decade_Label"),
alpha = 0.1) +
kwb.impetus::scale_fill_decades(decades) +
kwb.impetus::scale_color_decades(decades) +
ggplot2::geom_point() +
ggplot2::geom_point(ggplot2::aes(x = as.integer(.data$Month),
y = .data$Monatsmittelwert_Pegelstand_mNN),
data = p2,
col = "darkgrey",
alpha = 0.5,
inherit.aes = FALSE,
show.legend = FALSE) +
ggplot2::scale_x_continuous(breaks = 1:12, labels = 1:12, minor_breaks = NULL) +
#ggplot2::geom_boxplot() +
ggplot2::facet_wrap( ~ .data$Decade_Label,
nrow = 1,
ncol = n_decades) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position="bottom") +
ggplot2::labs(y = "Monthly Mean Water Level (m NN)",
x = "Month Number",
col = "Mean",
fill = "5%/95% Conf.-Interval",
title = unique(p1$Label))
plot(gg)
}
dev.off()
Surface Water Flow
para_name <- "surface-water_flow"
surfacewater_flow_master <- wp_data[[get_data_name(para_name)]] %>%
dplyr::filter(.data$Parameter == "Durchfluss") %>%
dplyr::mutate(Datum = as.Date(.data$Datum),
Year = as.integer(format(.data$Datum, "%Y")),
Decade = kwb.impetus::floor_decade(.data$Year),
Decade_Label = sprintf("%d - %d",
.data$Decade,
.data$Decade + 9),
Month = as.factor(format(.data$Datum, "%m"))) %>%
dplyr::left_join(get_master(wp_master[[get_master_name(para_name)]])) %>%
dplyr::mutate(Label = sprintf("%s (%s, id: %s, fluss-km: %2.2f)",
.data$Name,
.data$Gewaesser,
.data$Messstellennummer,
as.numeric(.data$Flusskilometer)))
surfacewater_flow_master_agg <- surfacewater_flow_master %>%
dplyr::group_by(.data$Messstellennummer,
.data$Decade,
.data$Decade_Label,
.data$Month,
.data$Label) %>%
dplyr::summarise(Monatsmittelwert = mean(.data$Tagesmittelwert),
q05 = as.numeric(quantile(.data$Tagesmittelwert, probs = 0.05)),
q95 = as.numeric(quantile(.data$Tagesmittelwert, probs = 0.95)),
)
readr::write_csv(surfacewater_flow_master_agg, sprintf("%s.csv", para_name))
decades <- tibble::tibble(names = unique(surfacewater_flow_master_agg$Decade_Label)[order(unique(surfacewater_flow_master_agg$Decade_Label))],
values = c('darkblue', 'blue', 'darkgreen', 'lightgreen', 'orange', 'red'))
kwb.utils::preparePdf(sprintf("%s.pdf", para_name),
landscape = TRUE,
width.cm = 29.7,
height.cm = 21)
ids <- unique(surfacewater_flow_master_agg$Messstellennummer)
for(id in ids) {
p1 <- surfacewater_flow_master_agg %>%
dplyr::filter(.data$Messstellennummer == id)
p2 <- surfacewater_flow_master %>%
dplyr::filter(.data$Year == max(.data$Year)) %>%
dplyr::group_by(.data$Messstellennummer,
.data$Year,
.data$Month,
.data$Label) %>%
dplyr::summarise(Monatsmittelwert = mean(.data$Tagesmittelwert),
q05 = quantile(.data$Tagesmittelwert, probs = 0.05),
q95 = quantile(.data$Tagesmittelwert, probs = 0.95)) %>%
dplyr::filter(.data$Messstellennummer == id)
n_decades <- length(unique(p1$Decade_Label))
gg <- p1 %>%
ggplot2::ggplot(mapping = ggplot2::aes(x = as.integer(.data$Month),
y = .data$Monatsmittelwert,
col = as.factor(.data$Decade_Label))) +
#ggplot2::geom_point(alpha = 0.5) +
ggplot2::geom_ribbon(ggplot2::aes_string(ymin = "q05",
ymax = "q95",
fill = "Decade_Label"),
alpha = 0.1) +
kwb.impetus::scale_fill_decades(decades) +
kwb.impetus::scale_color_decades(decades) +
ggplot2::geom_point() +
ggplot2::geom_point(ggplot2::aes(x = as.integer(.data$Month),
y = .data$Monatsmittelwert),
data = p2,
col = "darkgrey",
alpha = 0.5,
inherit.aes = FALSE,
show.legend = FALSE) +
ggplot2::scale_x_continuous(breaks = 1:12, labels = 1:12, minor_breaks = NULL) +
#ggplot2::geom_boxplot() +
ggplot2::facet_wrap( ~ .data$Decade_Label,
nrow = 1,
ncol = n_decades) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position="bottom") +
ggplot2::labs(y = "Monthly mean Surface Water Flow (m\u00B3/s)",
x = "Month Number",
col = "Mean",
fill = "5%/95% Conf.-Interval",
title = unique(p1$Label))
plot(gg)
}
dev.off()
Groundwater
Grundwater Level
para_name <- "groundwater_level"
groundwater_level_master <- wp_data[[get_data_name(para_name)]] %>%
dplyr::filter(.data$Parameter == "GW-Stand") %>%
dplyr::mutate(Datum = as.Date(.data$Datum),
Year = as.integer(format(.data$Datum, "%Y")),
Decade = kwb.impetus::floor_decade(.data$Year),
Decade_Label = sprintf("%d - %d",
.data$Decade,
.data$Decade + 9),
Month = as.factor(format(.data$Datum, "%m"))) %>%
dplyr::left_join(get_master(wp_master[[get_master_name(para_name)]])) %>%
dplyr::mutate(Label = sprintf("%s (%s, %s, gok: %.1f m, fok-fuk: %.1f-%.1f m)",
.data$Messstellennummer,
.data$Bezirk,
.data$Grundwasserleiter,
as.double(.data$Gelaendeoberkante_GOK_m_ue_NHN),
as.double(.data$Filteroberkante_m_u_GOK),
as.double(.data$Filterunterkante_m_u_GOK)))
groundwater_level_master_agg <- groundwater_level_master %>%
dplyr::group_by(.data$Messstellennummer,
.data$Decade,
.data$Decade_Label,
.data$Month,
.data$Label) %>%
dplyr::summarise(Monatsmittelwert = mean(.data$Messwert),
q05 = as.numeric(quantile(.data$Messwert, probs = 0.05)),
q95 = as.numeric(quantile(.data$Messwert, probs = 0.95)),
)
readr::write_csv(groundwater_level_master_agg, sprintf("%s.csv", para_name))
decades <- tibble::tibble(names = unique(groundwater_level_master_agg$Decade_Label)[order(unique(groundwater_level_master_agg$Decade_Label))],
values = c('darkblue', 'blue', 'darkgreen', 'lightgreen', 'orange', 'red'))
kwb.utils::preparePdf(sprintf("%s.pdf", para_name),
landscape = TRUE,
width.cm = 29.7,
height.cm = 21)
ids <- unique(groundwater_level_master_agg$Messstellennummer)
for(id in ids) {
p1 <- groundwater_level_master_agg %>%
dplyr::filter(.data$Messstellennummer == id)
p2 <- groundwater_level_master %>%
dplyr::filter(.data$Year == max(.data$Year)) %>%
dplyr::group_by(.data$Messstellennummer,
.data$Year,
.data$Month,
.data$Label) %>%
dplyr::summarise(Monatsmittelwert = mean(.data$Messwert),
q05 = quantile(.data$Messwert, probs = 0.05),
q95 = quantile(.data$Messwert, probs = 0.95)) %>%
dplyr::filter(.data$Messstellennummer == id)
n_decades <- length(unique(p1$Decade_Label))
gg <- p1 %>%
ggplot2::ggplot(mapping = ggplot2::aes(x = as.integer(.data$Month),
y = .data$Monatsmittelwert,
col = as.factor(.data$Decade_Label))) +
#ggplot2::geom_point(alpha = 0.5) +
ggplot2::geom_ribbon(ggplot2::aes_string(ymin = "q05",
ymax = "q95",
fill = "Decade_Label"),
alpha = 0.1) +
kwb.impetus::scale_fill_decades(decades) +
kwb.impetus::scale_color_decades(decades) +
ggplot2::geom_point() +
ggplot2::geom_point(ggplot2::aes(x = as.integer(.data$Month),
y = .data$Monatsmittelwert),
data = p2,
col = "darkgrey",
alpha = 0.5,
inherit.aes = FALSE,
show.legend = FALSE) +
ggplot2::scale_x_continuous(breaks = 1:12, labels = 1:12, minor_breaks = NULL) +
#ggplot2::geom_boxplot() +
ggplot2::facet_wrap( ~ .data$Decade_Label,
nrow = 1,
ncol = n_decades) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position="bottom") +
ggplot2::labs(y = "Monthly Mean Groundwater Level (m NN)",
x = "Month Number",
col = "Month",
fill = "5%/95% Conf.-Interval",
title = unique(p1$Label))
plot(gg)
}
dev.off()