PROMISCES: Reach (UBA)

Install R Package

# Enable this universe
options(repos = c(
  kwbr = 'https://kwb-r.r-universe.dev',
  CRAN = 'https://cloud.r-project.org'))

# Install R package
install.packages('wasserportal')

Get GW Quality from Wasserportal

# Load R package
library(wasserportal)

categories <- wasserportal::readPackageFile(file = "categories.csv") 

cas_reach <- wasserportal::readPackageFile(file = "cas_reach.csv") %>% 
  dplyr::left_join(categories)
#> Joining, by = "category"

cas_wasserportal <- wasserportal::readPackageFile(file = "cas_wasserportal.csv",
                                                  encoding = "UTF-8") %>%  
  dplyr::inner_join(cas_reach, by = "cas_number") 

### Remove duplicated Wasserportal substances (same CAS number but different, names!)
cas_wasserportal_clean <- wasserportal::readPackageFile(file = "cas_wasserportal.csv") %>%  
  dplyr::count(cas_number) %>%
  dplyr::select(-n) %>% 
  dplyr::filter(!is.na(cas_number)) %>% 
  dplyr::inner_join(cas_reach, by = "cas_number") 


### For details see:
### https://kwb-r.github.io/wasserportal/articles/groundwater.html
### JSON files (see below) are build every day automatically at 5a.m. with
### continious integration, for build status, see here:
### https://github.com/KWB-R/wasserportal/actions/workflows/pkgdown.yaml

### GW quality (all available parameters!)
gwq_master <- jsonlite::fromJSON("https://kwb-r.github.io/wasserportal/stations_gwq_master.json")
gwq_data <- jsonlite::fromJSON("https://kwb-r.github.io/wasserportal/stations_gwq_data.json") %>%
  #dplyr::filter(Parameter %in% cas_wasserportal$Parameter) %>% 
  dplyr::inner_join(cas_wasserportal, by = "Parameter") %>%
  dplyr::mutate(Messstellennummer = as.character(Messstellennummer),
## CensorCode: either "below" (less than) for concentration below detection limit 
## (value is detection limit) or "nc" (not censored) for concentration above 
## detection limit
                CensorCode = dplyr::case_when(Messwert <= 0 ~ "lt",
                                              TRUE ~ "nc"),
                Messwert = dplyr::case_when(Messwert < 0 ~ abs(Messwert),
### Only two decimal numbers are exported by Wasserportal, but some sustances 
### have lower detection limit, e.g. 0.002 which results in -0.00 export, thus 
### the dummy detection limit 0.00999 was introduced (until fixed by Senate: 
### Christoph will sent a email to Matthias Schröder)
                                            Messwert == 0 ~ 0.009999, 
                                            TRUE ~ Messwert)) %>%
  dplyr::left_join(gwq_master, by = c("Messstellennummer" = "Nummer"))

gwq_subs <- gwq_data %>%  
  dplyr::count(.data$cas_number, .data$CensorCode) %>% 
  tidyr::pivot_wider(names_from = CensorCode, values_from = n) %>% 
  dplyr::mutate(lt = ifelse(is.na(lt), 0, lt), 
                nc = ifelse(is.na(nc), 0, nc),
                n_total = lt + nc, 
                percent_nc = 100*nc/n_total) %>% 
  dplyr::rename(n_lt = lt, 
                n_nc = nc) %>% 
  dplyr::left_join(cas_reach[, c("category", "category_name", "name", "cas_number")]) %>%
  dplyr::rename(name_uba = name) %>% 
  dplyr::select(category, category_name, name_uba, cas_number,n_lt, n_nc, n_total, percent_nc)
#> Joining, by = "cas_number"

 readr::write_csv(gwq_subs, "gwq_subs.csv")
 DT::datatable(gwq_subs, filter = "top", rownames = FALSE)

Show entries

Search:

category	category_name	name_uba	cas_number	n_lt	n_nc	n_total	percent_nc
1 7				2 5950	0 852	2 5968	0.000000000000000 26.277372262773699
7	Detected Non PMT/vPvM substances	Ethylbenzene	100-41-4	5943	23	5966	0.385517934964801
7	Detected Non PMT/vPvM substances	Styrene	100-42-5	446	0	446	0
7	Detected Non PMT/vPvM substances	1,4-dichlorobenzene	106-46-7	446	0	446	0
2	Established PMT/vPvM substances	1,2-dichloroethane	107-06-2	5141	247	5388	4.58426132145509
7	Detected Non PMT/vPvM substances	Toluene	108-88-3	5721	245	5966	4.10660408984244
1	Prioritized PMT/vPvM substances	Chlorobenzene	108-90-7	446	0	446	0
2	Established PMT/vPvM substances	Tris(2-chloroethyl)phosphate	115-96-8	2	0	2	0
2	Established PMT/vPvM substances	Anthracene	120-12-7	5933	29	5962	0.486413955048641
2	Established PMT/vPvM substances	Tetrachloroethylene	127-18-4	5551	417	5968	6.9872654155496
7	Detected Non PMT/vPvM substances	Nitrilotriacetic acid	139-13-9	1609	60	1669	3.59496704613541

Showing 1 to 10 of 33 entries

Previous1 2 3 4Next

samples % dplyr::rename(name_uba = name) %>% dplyr::select(category, category_name, cas_number, name_uba, Messstellennummer, Datum, CensorCode, Messwert, Einheit) samples_by_para_and_station % dplyr::count(.data$cas_number, .data$Messstellennummer, .data$CensorCode) %>% tidyr::pivot_wider(names_from = CensorCode, values_from = n) %>% dplyr::mutate(lt = ifelse(is.na(lt), 0, lt), nc = ifelse(is.na(nc), 0, nc), n_total = lt + nc, percent_nc = 100*nc/n_total) %>% dplyr::rename(n_lt = lt, n_nc = nc) %>% dplyr::left_join(cas_reach[, c("category", "category_name", "name", "cas_number")]) %>% dplyr::rename(name_uba = name) %>% dplyr::select(category, category_name, name_uba, cas_number, Messstellennummer, n_lt, n_nc, n_total, percent_nc) %>% dplyr::left_join(gwq_master, by = c(Messstellennummer = "Nummer")) %>% dplyr::arrange(dplyr::desc(percent_nc)) #>  [1m [22mJoining, by = "cas_number" samples_by_category_and_station % dplyr::group_by(.data$category, .data$category_name, .data$Messstellennummer) %>% dplyr::summarise(n_lt = sum(n_lt), n_nc = sum(n_nc), n_total = sum(n_total)) %>% dplyr::mutate(percent_nc = 100*n_nc/n_total) %>% dplyr::arrange(dplyr::desc(percent_nc)) #>  [1m [22m`summarise()` has grouped output by 'category', 'category_name'. You can #> override using the `.groups` argument. gwq_subs_stations_n_abovedetection % dplyr::filter(n_nc > 0) %>% dplyr::group_by(.data$cas_number) %>% dplyr::summarise(n_stations_abovedetection = dplyr::n()) gwq_subs_stations_n_paras_abovedetection % dplyr::filter(n_nc > 0) %>% dplyr::group_by(.data$category, .data$category_name, .data$Messstellennummer) %>% dplyr::summarise(n_paras_abovedetection = dplyr::n()) %>% dplyr::left_join(gwq_master, by = c("Messstellennummer" = "Nummer")) #>  [1m [22m`summarise()` has grouped output by 'category', 'category_name'. You can #> override using the `.groups` argument. gwq_subs_stations_n_paras_abovedetection_wide % dplyr::ungroup() %>% dplyr::select(Messstellennummer, category, n_paras_abovedetection) %>% tidyr::pivot_wider(names_from = "category", names_prefix = "cat_", values_from = "n_paras_abovedetection") %>% dplyr::left_join(gwq_master, by = c("Messstellennummer" = "Nummer")) samples_by_para_and_station_n % dplyr::group_by(category, category_name, name_uba, cas_number) %>% dplyr::summarise(n_stations_sampled = dplyr::n(), n_stations_total = length(unique(gwq_master$Nummer)), n_lt = sum(n_lt), n_nc = sum(n_nc), n_total = sum(n_total)) %>% dplyr::left_join(gwq_subs_stations_n_abovedetection) %>% dplyr::mutate(n_stations_abovedetection = ifelse(is.na(n_stations_abovedetection), 0, n_stations_abovedetection), n_abovedetection = ifelse(is.na(n_nc), 0, n_nc), n_belowdetection = ifelse(is.na(n_lt), 0, n_lt), percent_samples_abovedetection = 100*n_nc/n_total, percent_stations_abovedetection = 100*n_stations_abovedetection/n_stations_total, percent_stations_sampled = 100*n_stations_sampled/n_stations_total) %>% dplyr::select(category, category_name, name_uba, cas_number, n_stations_abovedetection, n_stations_sampled, n_stations_total, percent_stations_abovedetection, percent_stations_sampled, n_belowdetection, n_abovedetection, n_total, percent_samples_abovedetection) %>% dplyr::arrange(dplyr::desc(percent_stations_abovedetection), dplyr::desc(percent_samples_abovedetection)) #>  [1m [22m`summarise()` has grouped output by 'category', 'category_name', 'name_uba'. #> You can override using the `.groups` argument. #> Joining, by = "cas_number" ### Export data to EXCEL gwq_data_list % dplyr::rename(name_uba = name), cas_wasserportal = cas_wasserportal %>% dplyr::rename(name_uba = name, name_wasserportal = Parameter), samples = samples, samples_by_para = gwq_subs %>% dplyr::arrange(dplyr::desc(percent_nc)), samples_by_para_and_station = samples_by_para_and_station, samples_by_para_and_station_n = samples_by_para_and_station_n, samples_by_stations_para_above = gwq_subs_stations_n_paras_abovedetection_wide, samples_by_category_and_station = samples_by_category_and_station) openxlsx::write.xlsx(x = gwq_data_list, file = "wasserportal_gwq_reach_data_v2.1.0.xlsx", overwrite = TRUE)

Reach Substances in Wasserportal

Total

g <- cas_reach %>%
  dplyr::mutate(source = sprintf("UBA (n = %d)", nrow(cas_reach))) %>% 
  dplyr::bind_rows(cas_wasserportal_clean %>% 
                   dplyr::mutate(source = sprintf("Wasserportal (n = %d)", 
                                                  nrow(cas_wasserportal_clean)))) %>% 
  ggplot2::ggplot(mapping = ggplot2::aes(x = forcats::as_factor(.data$category), 
                                         fill = .data$source,
                                         col = .data$source)) + 
  ggplot2::geom_histogram(stat = "count", alpha = 0.5) +
  ggplot2::geom_text(stat="count", ggplot2::aes(label=..count..), vjust=-0.5, position="stack") +
  ggplot2::scale_x_discrete() +
  ggplot2::theme_bw() +
  ggplot2::theme(legend.position="top") +
  ggplot2::labs(y = "Number of Substances", x = "Category")
#> Warning: Ignoring unknown parameters: binwidth, bins, pad

  g


  ggplot2::ggsave(filename = "wasserportal_number-of-reach-substances.jpeg", 
                  plot = g,
                  width = 14, 
                  height = 11,
                  units = "cm")

  #plotly::ggplotly(g)

By Station


by_stations <- samples_by_para_and_station_n %>% 
  dplyr::select(.data$name_uba, .data$n_stations_sampled)
#> Adding missing grouping variables: `category`, `category_name`

wasserportal_substances <- samples_by_para_and_station_n %>% 
  dplyr::arrange(.data$category, 
                 dplyr::desc(.data$n_total), 
                 dplyr::desc(.data$n_stations_sampled), 
                 .data$name_uba) %>% 
  dplyr::select(.data$category, 
                .data$name_uba,
                .data$cas_number, 
                .data$n_total,
                .data$n_stations_sampled)
#> Adding missing grouping variables: `category_name`

 DT::datatable(wasserportal_substances, filter = "top", rownames = FALSE)