Topics


library(jsonld)
library(jsonlite)
library(magrittr)
library(codemetar)
library(purrr)
#> 
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#> 
#>     set_names
#> The following object is masked from 'package:jsonlite':
#> 
#>     flatten
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(printr)
#> Registered S3 method overwritten by 'printr':
#>   method                from     
#>   knit_print.data.frame rmarkdown
library(tibble)
##################################################################
### Importing "codemetar.json" 
##################################################################

### Option 1:
### Not working correctly (-> gives list that is twice as long as just
### using jsonlite::fromJSON as defined below

# frame <- system.file("schema/frame_schema.json", package="codemetar")
# 
# corpus <- jsonld::jsonld_frame("codemetar.json", frame) %>%
#  jsonlite::fromJSON("codemetar.json",
#                              simplifyVector = FALSE) %>%
#   getElement("@graph")


#"https://kwb-r.github.io/pkgmeta/codemetar.json" %>%  
#  jsonlite::fromJSON() %>% 
#  jsonlite::write_json("codemetar.json")

### Option 2: working as expected

corpus <- jsonlite::fromJSON("codemetar.json",
                             simplifyVector = FALSE)

### add Github topics to R package names
pkg_topics <- tibble::tibble(
       name = purrr::map_chr(corpus, "identifier"),
       topic = purrr::map_chr(
               lapply(purrr::map(corpus, "keywords"), 
                      function(x) {paste(unlist(x), 
                                         collapse = ", ")}),1))
pkg_topics
name topic
aquanes.report r, rstats, data-import, data-visualisation, automated-reporting, data-aggregation, data-export, project-aquanes
kwb.base r, rstats
kwb.code r, rstats, project-fakin
kwb.datetime r, rstats
kwb.db r, rstats, data-import
kwb.default r, rstats
kwb.demeau r, rstats, groundwater-modelling, project-demeau
kwb.dwa.m150 r, rstats
kwb.en13508.2 r, rstats
kwb.endnote data-management, project-fakin, knowledge-repo, publication, literature-data-management
kwb.event r, rstats
kwb.fakin r, rstats, project-fakin, research-data-management
kwb.file r, rstats, project-fakin
kwb.geosalz r, rstats, data-import, data-cleaning, data-visualization, project-fakin, project-geosalz
kwb.hantush r, rstats, infiltration-basin, groundwater-mounding, groundwater-modelling, project-demeau
kwb.lca r, rstats, data-import, data-aggregation, data-visualisation, data-export, project-fakin, life-cycle-assessment
kwb.logger r, rstats, data-import
kwb.monitoring r, rstats
kwb.orcid r, rstats, data-management, publication, project-fakin
kwb.pkgbuild r, rstats, r-package-automation, data-management, publication, project-fakin
kwb.pkgstatus r, rstats, data-management, publication, project-fakin
kwb.plot r, rstats
kwb.pubs r, rstats, hugo-academic, knowledge-repo, project-fakin, publications, publication
kwb.qmra r, rstats, project-demoware, project-aquanes, qmra-webapp-backend-engine
kwb.readxl r, rstats, data-import, project-fakin
kwb.resilience r, rstats, project-fakin, project-networks4
kwb.test r, rstats
kwb.umberto r, rstats, data-import, data-aggregation, data-visualisation, lca-modelling, modelling, project-fakin, life-cycle-assessment
kwb.utils r, rstats
kwb.vs2dh r, rstats, groundwater-modelling, project-demeau
kwb.wtaq r, rstats, groundwater-modelling, drawdown-model, project-optiwells2
pkgmeta r, rstats, project-fakin, knowledge-repo
topics <- tidyr::separate_rows(pkg_topics, topic, sep = ",\\s+") %>% 
  dplyr::count(topic) %>% 
  dplyr::filter(!topic %in% c("r", "rstats")) %>% 
  dplyr::arrange(dplyr::desc(n)) %>% 
  dplyr::rename(word = topic,
                freq = n) 


knitr::kable(topics)
word freq
project-fakin 14
data-import 7
publication 5
data-management 4
groundwater-modelling 4
data-aggregation 3
data-visualisation 3
knowledge-repo 3
project-demeau 3
data-export 2
life-cycle-assessment 2
project-aquanes 2
automated-reporting 1
data-cleaning 1
data-visualization 1
drawdown-model 1
groundwater-mounding 1
hugo-academic 1
infiltration-basin 1
lca-modelling 1
literature-data-management 1
modelling 1
project-demoware 1
project-geosalz 1
project-networks4 1
project-optiwells2 1
publications 1
qmra-webapp-backend-engine 1
r-package-automation 1
research-data-management 1
wordcloud2::wordcloud2(topics)

Authors


## deal with nulls explicitly by starting with map
pkgs <- purrr::map(corpus, "name") %>%
  purrr::compact() %>%
  as.character()

# keep only those with package identifiers (names)
keep <- purrr::map_lgl(corpus, ~ length(.x$identifier) > 0)
corpus <- corpus[keep]

## now we can just do
all_pkgs <- purrr::map_chr(corpus, "name")
head(all_pkgs)
#> [1] "aquanes.report: Automated Reporting Tool for Water Suppliers"
#> [2] "kwb.base: Functions supporting data analysis with R at KWB"  
#> [3] "kwb.code: Analyse Your R Code!"                              
#> [4] "kwb.datetime: Functions for date/time objects"               
#> [5] "kwb.db: Functions supporting data base access"               
#> [6] "kwb.default: Get and Set Function Argument Defaults"

## 3 unique maintainers
purrr::map(corpus, "maintainer") %>%  
  purrr::map(1) %>%  
  purrr::map("familyName") %>% 
  unique() %>%
  length()
#> [1] 3

Maintainer

## Mostly Hauke
maintainer <- purrr::map(corpus, "maintainer") %>%  
  purrr::map(1) %>%  
  purrr::map("familyName") %>%
  unlist() %>% 
  tibble::enframe(name = NULL) %>%
  dplyr::group_by(value) %>%
  dplyr::tally(sort=TRUE) %>%
  dplyr::rename(word = value,
                freq = n) 

knitr::kable(maintainer)
word freq
Sonnenberg 18
Rustler 13
Matzinger 1
wordcloud2::wordcloud2(maintainer,
                       minSize = 0.4, 
                       size = 0.6)
## number of co-authors ...
purrr::map_int(corpus, function(r) length(r$author)) %>%
  tibble::enframe() %>%
  dplyr::group_by(value) %>%
  dplyr::tally(sort=TRUE) 
value n
1 32

## Contributors isn't used as much...
purrr::map_int(corpus, function(r) length(r$contributor)) %>%
  tibble::enframe() %>%
  dplyr::group_by(value) %>%
  dplyr::tally(sort=TRUE)
value n
0 23
1 8
2 1

Package Dependencies

Depends Imports

purrr::map_int(corpus, function(r) length(r$softwareRequirements))  %>%
  tibble::enframe() %>%
  dplyr::group_by(value) %>%
  dplyr::tally(sort=TRUE)
value n
2 4
3 4
4 3
6 3
0 2
1 2
8 2
11 2
13 2
15 2
5 1
7 1
9 1
10 1
18 1
26 1

corpus %>%
  map_df(function(x){
    ## single, unboxed dep
    if("name" %in% names(x$softwareRequirements))
      dep <- x$identifier
    else if("name" %in% names(x$softwareRequirements))
      dep <- map_chr(x$softwareRequirements, "name")
    else { ## No requirements
      dep <- NA
    }

    tibble(identifier = x$identifier, dep = dep)
  }) 
identifier dep
aquanes.report NA
kwb.base NA
kwb.code NA
kwb.datetime NA
kwb.db NA
kwb.default NA
kwb.demeau NA
kwb.dwa.m150 NA
kwb.en13508.2 NA
kwb.endnote NA
kwb.event NA
kwb.fakin NA
kwb.file NA
kwb.geosalz NA
kwb.hantush NA
kwb.lca NA
kwb.logger NA
kwb.monitoring NA
kwb.orcid NA
kwb.pkgbuild NA
kwb.pkgstatus NA
kwb.plot NA
kwb.pubs NA
kwb.qmra NA
kwb.readxl NA
kwb.resilience NA
kwb.test NA
kwb.umberto NA
kwb.utils NA
kwb.vs2dh NA
kwb.wtaq NA
pkgmeta NA

deps_df <- tibble(identifier = purrr::map_chr(corpus, "identifier"),
       deps = lapply(purrr::map(corpus, "softwareRequirements"), 
                     FUN = function(x) {
                       deps <- purrr::map_chr(x, "name") %>%  
                         unlist() %>%  
                         paste(collapse = ",") %>% 
                         as.character()
                       
                       deps <- ifelse(stringr::str_length(deps) > 0, deps, NA_character_)}))

dep_df <- tidyr::separate_rows(deps_df, deps, sep = ",") %>% 
  dplyr::select(deps, identifier) %>%
  dplyr::arrange(deps, identifier) %>% 
  dplyr::rename(dependancy = deps, used_in_r_package = identifier)




#which dependencies are used most frequently?
dep_count <- dep_df %>%
  dplyr::group_by(dependancy) %>%
  dplyr::tally(sort = TRUE) %>%
  dplyr::rename(word = dependancy,
                freq = n)

knitr::kable(dep_count)
word freq
kwb.utils 21
dplyr 12
magrittr 11
stringr 11
R 7
tidyr 7
fs 6
ggplot2 6
data.table 5
lattice 5
kwb.datetime 4
kwb.event 4
kwb.plot 4
lubridate 4
readxl 4
xml2 4
digest 3
hydroGOF 3
janitor 3
kwb.db 3
openxlsx 3
plyr 3
readr 3
rlang 3
yaml 3
cellranger 2
crayon 2
forcats 2
ggforce 2
gh 2
httr 2
jsonlite 2
kwb.fakin 2
kwb.file 2
rmarkdown 2
rvest 2
shiny 2
tibble 2
usethis 2
withr 2
xts 2
NA 2
assertthat 1
blogdown 1
callr 1
codemetar 1
colorRamps 1
dbplyr 1
desc 1
dygraphs 1
EnvStats 1
fasttime 1
fst 1
glue 1
gplots 1
gridExtra 1
kwb.base 1
kwb.code 1
kwb.pkgstatus 1
kwb.read 1
kwb.readxl 1
kwb.vs2dh 1
latticeExtra 1
leaflet 1
magick 1
manipulate 1
openssl 1
pkgdown 1
plotrix 1
purrr 1
qmrparser 1
RCurl 1
rematch2 1
remotes 1
reshape 1
RMySQL 1
RODBC 1
rorcid 1
scales 1
sessioninfo 1
sfsmisc 1
shapefiles 1
shinythemes 1
sp 1
stringi 1
travis 1
treemap 1
wordcloud2 1
XML 1
wordcloud2::wordcloud2(dep_count,
                       minSize = 0.4, 
                       size = 1)

Depends Imports and Suggests

#Alternate approach using a frame instead of purrr functions for subsetting the
#Note that this gets all Depends and suggests (really all SoftwareApplication
#types mentioned)
dep_frame <- '{
  "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
"@explicit": "true",
"name": {}
}'
dep_sug_count <- jsonld_frame("codemetar.json", dep_frame) %>%
  fromJSON() %>%
  getElement("@graph") %>%
  filter(type == "SoftwareApplication") %>%
  group_by(name) %>%
  tally(sort = TRUE) %>%
  dplyr::rename(word = name,
                freq = n) 

knitr::kable(dep_sug_count)
word freq
testthat 28
kwb.utils 22
knitr 18
rmarkdown 18
covr 15
dplyr 12
magrittr 11
stringr 11
ggplot2 8
R 7
tidyr 7
fs 6
kwb.datetime 6
data.table 5
lattice 5
lubridate 5
kwb.event 4
kwb.plot 4
readxl 4
xml2 4
digest 3
hydroGOF 3
janitor 3
jsonlite 3
kwb.db 3
openxlsx 3
plyr 3
readr 3
rlang 3
tibble 3
withr 3
yaml 3
cellranger 2
crayon 2
devtools 2
forcats 2
ggforce 2
gh 2
httr 2
kwb.fakin 2
kwb.file 2
rvest 2
shiny 2
usethis 2
xts 2
assertthat 1
blogdown 1
callr 1
codemetar 1
colorRamps 1
compare 1
dbplyr 1
desc 1
dygraphs 1
EnvStats 1
fasttime 1
fst 1
ggrepel 1
glue 1
gplots 1
gridExtra 1
jsonld 1
kwb.base 1
kwb.code 1
kwb.endnote 1
kwb.logger 1
kwb.pkgstatus 1
kwb.read 1
kwb.readxl 1
kwb.vs2dh 1
latticeExtra 1
leaflet 1
magick 1
manipulate 1
networkD3 1
openssl 1
pkgdown 1
pkgnet 1
PKI 1
plotrix 1
printr 1
purrr 1
qmrparser 1
RCurl 1
rematch2 1
remotes 1
reshape 1
RMySQL 1
RODBC 1
rorcid 1
scales 1
sessioninfo 1
sfsmisc 1
shapefiles 1
shinythemes 1
sp 1
stringi 1
travis 1
treemap 1
wordcloud2 1
XML 1
wordcloud2::wordcloud2(dep_sug_count,
                       minSize = 0.4, 
                       size = 1)

7 Session Info

Plattform

name value
version R version 4.1.0 (2021-05-18)
os macOS Catalina 10.15.7
system x86_64, darwin17.0
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz UTC
date 2021-07-12

Packages

#>  package     * version date       lib source                              
#>  assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.1.0)                      
#>  bslib         0.2.5.1 2021-05-18 [1] CRAN (R 4.1.0)                      
#>  cachem        1.0.5   2021-05-15 [1] CRAN (R 4.1.0)                      
#>  cli           3.0.0   2021-06-30 [1] CRAN (R 4.1.0)                      
#>  codemetar   * 0.3.0   2021-07-12 [1] Github (ropensci/codemetar@5d8a3c5) 
#>  crayon        1.4.1   2021-02-08 [1] CRAN (R 4.1.0)                      
#>  curl          4.3.2   2021-06-23 [1] CRAN (R 4.1.0)                      
#>  DBI           1.1.1   2021-01-15 [1] CRAN (R 4.1.0)                      
#>  desc          1.3.0   2021-03-05 [1] CRAN (R 4.1.0)                      
#>  digest        0.6.27  2020-10-24 [1] CRAN (R 4.1.0)                      
#>  dplyr       * 1.0.7   2021-06-18 [1] CRAN (R 4.1.0)                      
#>  ellipsis      0.3.2   2021-04-29 [1] CRAN (R 4.1.0)                      
#>  evaluate      0.14    2019-05-28 [1] CRAN (R 4.1.0)                      
#>  fansi         0.5.0   2021-05-25 [1] CRAN (R 4.1.0)                      
#>  fastmap       1.1.0   2021-01-25 [1] CRAN (R 4.1.0)                      
#>  fs            1.5.0   2020-07-31 [1] CRAN (R 4.1.0)                      
#>  generics      0.1.0   2020-10-31 [1] CRAN (R 4.1.0)                      
#>  glue          1.4.2   2020-08-27 [1] CRAN (R 4.1.0)                      
#>  highr         0.9     2021-04-16 [1] CRAN (R 4.1.0)                      
#>  htmltools     0.5.1.1 2021-01-22 [1] CRAN (R 4.1.0)                      
#>  htmlwidgets   1.5.3   2020-12-10 [1] CRAN (R 4.1.0)                      
#>  jquerylib     0.1.4   2021-04-26 [1] CRAN (R 4.1.0)                      
#>  jsonld      * 2.2     2020-05-27 [1] CRAN (R 4.1.0)                      
#>  jsonlite    * 1.7.2   2020-12-09 [1] CRAN (R 4.1.0)                      
#>  knitr         1.33    2021-04-24 [1] CRAN (R 4.1.0)                      
#>  lifecycle     1.0.0   2021-02-15 [1] CRAN (R 4.1.0)                      
#>  magrittr    * 2.0.1   2020-11-17 [1] CRAN (R 4.1.0)                      
#>  memoise       2.0.0   2021-01-26 [1] CRAN (R 4.1.0)                      
#>  pillar        1.6.1   2021-05-16 [1] CRAN (R 4.1.0)                      
#>  pingr         2.0.1   2020-06-22 [1] CRAN (R 4.1.0)                      
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.1.0)                      
#>  pkgdown       1.6.1   2020-09-12 [1] CRAN (R 4.1.0)                      
#>  printr      * 0.1.1   2021-01-27 [1] CRAN (R 4.1.0)                      
#>  processx      3.5.2   2021-04-30 [1] CRAN (R 4.1.0)                      
#>  ps            1.6.0   2021-02-28 [1] CRAN (R 4.1.0)                      
#>  purrr       * 0.3.4   2020-04-17 [1] CRAN (R 4.1.0)                      
#>  R6            2.5.0   2020-10-28 [1] CRAN (R 4.1.0)                      
#>  ragg          1.1.3   2021-06-09 [1] CRAN (R 4.1.0)                      
#>  Rcpp          1.0.7   2021-07-07 [1] CRAN (R 4.1.0)                      
#>  rlang         0.4.11  2021-04-30 [1] CRAN (R 4.1.0)                      
#>  rmarkdown     2.9     2021-06-15 [1] CRAN (R 4.1.0)                      
#>  rprojroot     2.0.2   2020-11-15 [1] CRAN (R 4.1.0)                      
#>  sass          0.4.0   2021-05-12 [1] CRAN (R 4.1.0)                      
#>  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.1.0)                      
#>  stringi       1.6.2   2021-05-17 [1] CRAN (R 4.1.0)                      
#>  stringr       1.4.0   2019-02-10 [1] CRAN (R 4.1.0)                      
#>  systemfonts   1.0.2   2021-05-11 [1] CRAN (R 4.1.0)                      
#>  textshaping   0.3.5   2021-06-09 [1] CRAN (R 4.1.0)                      
#>  tibble      * 3.1.2   2021-05-16 [1] CRAN (R 4.1.0)                      
#>  tidyr         1.1.3   2021-03-03 [1] CRAN (R 4.1.0)                      
#>  tidyselect    1.1.1   2021-04-30 [1] CRAN (R 4.1.0)                      
#>  utf8          1.2.1   2021-03-12 [1] CRAN (R 4.1.0)                      
#>  V8            3.4.2   2021-05-01 [1] CRAN (R 4.1.0)                      
#>  vctrs         0.3.8   2021-04-29 [1] CRAN (R 4.1.0)                      
#>  withr         2.4.2   2021-04-18 [1] CRAN (R 4.1.0)                      
#>  wordcloud2    0.2.2   2021-07-12 [1] Github (Lchiffon/wordcloud2@8a12a3b)
#>  xfun          0.24    2021-06-15 [1] CRAN (R 4.1.0)                      
#>  yaml          2.2.1   2020-02-01 [1] CRAN (R 4.1.0)                      
#> 
#> [1] /Users/runner/work/_temp/Library
#> [2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library

Pandoc

pandoc_directory pandoc_version
/usr/local/bin/pandoc 2.7.3