Skip to contents

Provide some example R scripts locally

In order to test the functions of this package some R scripts are downloaded from GitHub and provided locally:

`%>%` <- magrittr::`%>%`

# Create a temporary folder
root <- kwb.utils::tempSubdirectory("test")

# Function to download a script file from a KWB package on GitHub
download_kwb_script <- function(repo, script)
{
  url <- sprintf(
    "https://raw.githubusercontent.com/%s/master/R/%s", 
    repo, script
  )
  destfile <- file.path(root, basename(url))
  download.file(url, destfile = destfile, mode = "wt")
  destfile
}

# Download three scripts to the temporary folder
scripts <- c(
  download_kwb_script("KWB-R/kwb.utils", "log.R"),
  download_kwb_script("KWB-R/kwb.utils", "main.R"),
  download_kwb_script("KWB-R/kwb.fakin", "plot_file_distribution.R")
)

Exported functions

analyse()

The parse tree is analysed. Each node of the tree is given the following attributes:

  • type
  • mode
  • class
  • length
  • text
  • is
  • n_modes
  • n_classes
  • path
  • fulltype
  • children

The idea probably was to use these information to extract objects of special interest from the parse tree (see below: get_elements_by_type())

x <- parse(scripts[1L])

result <- kwb.code::analyse(x)

arg_names()

This function returns the names of the arguments of a function:

kwb.code::arg_names(kwb.utils::selectColumns)
#> [1] "x"       "columns" "pattern" "drop"    "do.stop"

find_string_constants()

This function requires a directory of R scripts. All scripts are parsed. String constants that are used in the script are returned.

string_constants <- kwb.code::find_string_constants(root)
#> Splitting paths ... ok. (0.00 secs) 
#> Removing the first 8 path segments ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/log.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/main.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/plot_file_distribution.R ... ok. (0.00 secs)

knitr::kable(string_constants)
file_id folder_path file_name string count
file_01 test log.R 1
file_01 test log.R 1
file_01 test log.R *** 3
file_01 test log.R *** ok. 1
file_02 test main.R 2
file_02 test main.R *** The object is empty! 1
file_02 test main.R as requested but 1
file_02 test main.R _()$ 1
file_02 test main.R ! 1
file_02 test main.R . 1
file_02 test main.R ‘assign(x, value, envir = .GlobalEnv)’ 1
file_02 test main.R (returned in attribute ‘invalid’) 1
file_02 test main.R %s_%d 1
file_02 test main.R %s_1 1
file_02 test main.R $ 1
file_02 test main.R assignGlobally() 1
file_02 test main.R Bug in randomValuesWithSum(): The sum of generated values is not 1
file_02 test main.R Division by zero. Using substitute value of 1
file_02 test main.R invalid 1
file_02 test main.R parName 1
file_02 test main.R parVal 1
file_02 test main.R stringsAsFactors 1
file_02 test main.R stringsAsFactors must be TRUE or FALSE 1
file_02 test main.R The first element must not be NA 1
file_02 test main.R There are differences in parallel non-NA values 1
file_02 test main.R There are duplicate values: 1
file_03 test plot_file_distribution.R lr 1
file_03 test plot_file_distribution.R v 1

find_weaknesses_in_scripts()

Check for expressions in scripts that can be improved.

x <- parse(text = c(
  "texts <- c(", 
  "  paste('this is a very long', 'text'),",
  "  paste('this is a very long', 'string')",
  ")",
  "",
  "indices <- 1:length(texts)"
))

weaknesses <- kwb.code::find_weaknesses_in_scripts(
  x = list(test = x), 
  min_duplicate_frequency = 2L
)

knitr::kable(weaknesses)
file expression frequency recommendation
1 test 1:length(texts) 1 use seq_along()
3 test “this is a very long” 2 check for duplicated strings

get_elements_by_type

This function groups similar elements that are found in a parse tree.

# Parse an R script file (here, a file from kwb.utils)
x <- parse(scripts[1L])

# For each "type" of code segment, extract all occurrences
elements <- kwb.code::get_elements_by_type(x, result = result)

# Show all code blocks in curly braces
elements[["language|call|{|2|"]]
#> NULL

get_full_function_info()

This function analyses a list of parse trees each of which has been read from an R script.

It provides information on the functions that are defined in the scripts:

  • script: script name
  • functionName: function name
  • n.def: always 1 (?)
  • bodyClass: always “{” (?)
  • n.args: number of arguments
  • n.defaults: number of arguments with a default
  • n.expr: number of expressions
trees <- kwb.code::parse_scripts(root, dbg = FALSE)
function_info <- kwb.code::get_full_function_info(trees)
knitr::kable(function_info)
script functionName n.def bodyClass n.args n.defaults n.expr
log.R .log 1 { 1 0 1
log.R .logline 1 { 1 0 1
log.R .logok 1 { 1 1 1
log.R .logstart 1 { 2 1 1
plot_file_distribution.R arrange_file_in_depth_plots 1 { 3 0 5
main.R assignAll 1 { 3 1 2
main.R assignGlobally 1 { 2 0 2
main.R assignPackageObjects 1 { 1 0 1
main.R breakInSequence 1 { 2 1 2
main.R callWithStringsAsFactors 1 { 3 0 6
main.R extendLimits 1 { 4 2 3
main.R getEvenNumbers 1 { 1 0 1
main.R getGlobally 1 { 3 2 2
main.R getOddNumbers 1 { 1 0 1
main.R hsMatrixToListForm 1 { 6 4 4
main.R hsSafeName 1 { 2 0 3
main.R makeUnique 1 { 4 3 3
main.R naToLastNonNa 1 { 2 1 6
main.R parallelNonNA 1 { 2 0 12
plot_file_distribution.R plot_file_distribution 1 { 5 1 11
main.R quotient 1 { 4 2 6
main.R randomValuesWithSum 1 { 3 1 5
main.R recursiveNames 1 { 2 1 4
main.R warnIfEmpty 1 { 1 0 2

get_names_of_used_packages()

What packages are used in the scripts?

kwb.code::get_names_of_used_packages(root)
#> Analysing /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/log.R ... ok. (0.00 secs) 
#> Analysing /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/main.R ... ok. (0.00 secs) 
#> Analysing /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/plot_file_distribution.R ... ok. (0.00 secs)
#> character(0)

This function simply looks for calls to library(). It does not take into account functions that are called with :: as the following simple grep() reveals:

pattern <- "[^A-Za-z_.]([A-Za-z_.]+::[A-Za-z_.]+)[^A-Za-z_.]"

text <- grep(pattern, readLines(scripts[3L]), value = TRUE)

unique(kwb.utils::extractSubstring(pattern, text, index = 1))
#> [1] "kwb.pathdict::random_paths"        "kwb.utils::noFactorDataFrame"     
#> [3] "kwb.fakin::plot_file_distribution" "kwb.utils::assignPackageObjects"  
#> [5] "kwb.utils::preparePdfIf"           "kwb.utils::finishAndShowPdfIf"    
#> [7] "cowplot::plot_grid"

TODO: Use another function instead…

get_package_function_usage()

tree <- kwb.code::parse_scripts(root, dbg = FALSE)
function_usage <- kwb.code::get_package_function_usage(
  tree, 
  package = "kwb.utils"
)

knitr::kable(function_usage)
package name count explicit implicit
kwb.utils catIf 2 0 2
kwb.utils finishAndShowPdfIf 1 1 0
kwb.utils hsRestoreAttributes 1 0 1
kwb.utils isEvenNumber 1 0 1
kwb.utils isNullOrEmpty 1 0 1
kwb.utils isOddNumber 1 0 1
kwb.utils lastElement 1 0 1
kwb.utils preparePdfIf 1 1 0
kwb.utils rbindAll 1 0 1
kwb.utils recursiveNames 1 0 1
kwb.utils stringList 1 0 1
kwb.utils warningDeprecated 1 0 1

get_package_usage_per_script()

package_usage <- kwb.code::get_package_usage_per_script(
  root, 
  packages = "kwb.utils"
)
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/log.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/main.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/plot_file_distribution.R ... ok. (0.00 secs)
#> 3 scripts have been parsed.
#> Checking usage of kwb.utils ... 
#> ok. (0.01 secs)
knitr::kable(package_usage)
package script name count explicit implicit
kwb.utils log.R catIf 2 0 2
kwb.utils main.R hsRestoreAttributes 1 0 1
kwb.utils main.R isEvenNumber 1 0 1
kwb.utils main.R isNullOrEmpty 1 0 1
kwb.utils main.R isOddNumber 1 0 1
kwb.utils main.R lastElement 1 0 1
kwb.utils main.R rbindAll 1 0 1
kwb.utils main.R recursiveNames 1 0 1
kwb.utils main.R stringList 1 0 1
kwb.utils main.R warningDeprecated 1 0 1
kwb.utils plot_file_distribution.R finishAndShowPdfIf 1 1 0
kwb.utils plot_file_distribution.R preparePdfIf 1 1 0

get_string_constants_in_scripts()

string_constants <- kwb.code::get_string_constants_in_scripts(root)
#> Splitting paths ... ok. (0.00 secs) 
#> Removing the first 8 path segments ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/log.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/main.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/plot_file_distribution.R ... ok. (0.00 secs)
knitr::kable(string_constants)
file_id string count
file_01 1
file_01 1
file_01 *** 3
file_01 *** ok. 1
file_02 2
file_02 *** The object is empty! 1
file_02 as requested but 1
file_02 _()$ 1
file_02 ! 1
file_02 . 1
file_02 ‘assign(x, value, envir = .GlobalEnv)’ 1
file_02 (returned in attribute ‘invalid’) 1
file_02 %s_%d 1
file_02 %s_1 1
file_02 $ 1
file_02 assignGlobally() 1
file_02 Bug in randomValuesWithSum(): The sum of generated values is not 1
file_02 Division by zero. Using substitute value of 1
file_02 invalid 1
file_02 parName 1
file_02 parVal 1
file_02 stringsAsFactors 1
file_02 stringsAsFactors must be TRUE or FALSE 1
file_02 The first element must not be NA 1
file_02 There are differences in parallel non-NA values 1
file_02 There are duplicate values: 1
file_03 lr 1
file_03 v 1

parse_scripts()

x <- kwb.code::parse_scripts(root)
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/log.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/main.R ... ok. (0.00 secs) 
#> Reading /var/folders/3m/p59k4qdj0f17st0gn2cmj3640000gn/T//RtmpppsUPZ/test/plot_file_distribution.R ... ok. (0.00 secs)

str(x, 2)
#> List of 3
#>  $ log.R                   :length 4 expression(.log <- function(...) {  cat("***", ...) }, .logline <- function(...) {  cat("***", ..., "\n") }, .log| __truncated__ ...
#>   ..- attr(*, "n.lines")= int 50
#>  $ main.R                  :length 18 expression(randomValuesWithSum <- function(n, sumOfValues, names = seq_len(n)) {  breaks <- sort(sample(sumOfValu| __truncated__ ...
#>   ..- attr(*, "n.lines")= int 575
#>  $ plot_file_distribution.R:length 2 expression(plot_file_distribution <- function(file_data, start_path, n_root_parts,  ..., to_pdf = TRUE) {  data_s| __truncated__
#>   ..- attr(*, "n.lines")= int 93

to_full_script_info()

This function creates statistics about R scripts.

script_statistics <- kwb.code::to_full_script_info(trees)

knitr::kable(script_statistics)
script errors rows expr rpe <- fun
log.R 50 4 12.5 4 4
main.R 575 18 31.9 18 18
plot_file_distribution.R 93 2 46.5 2 2

walk_tree()

This function walks along a parse tree.

x <- parse(scripts[1L])

result <- kwb.code::walk_tree(x, dbg = FALSE)

Interesting base functions

getParseData()

"sum(kwb.utils::selectColumns(df, col))" %>% 
  parse(text = ., keep.source = TRUE) %>% 
  getParseData()
#>    line1 col1 line2 col2 id parent                token terminal          text
#> 26     1    1     1   38 26      0                 expr    FALSE              
#> 1      1    1     1    3  1      3 SYMBOL_FUNCTION_CALL     TRUE           sum
#> 3      1    1     1    3  3     26                 expr    FALSE              
#> 2      1    4     1    4  2     26                  '('     TRUE             (
#> 21     1    5     1   37 21     26                 expr    FALSE              
#> 7      1    5     1   28  7     21                 expr    FALSE              
#> 4      1    5     1   13  4      7       SYMBOL_PACKAGE     TRUE     kwb.utils
#> 5      1   14     1   15  5      7               NS_GET     TRUE            ::
#> 6      1   16     1   28  6      7 SYMBOL_FUNCTION_CALL     TRUE selectColumns
#> 8      1   29     1   29  8     21                  '('     TRUE             (
#> 9      1   30     1   31  9     11               SYMBOL     TRUE            df
#> 11     1   30     1   31 11     21                 expr    FALSE              
#> 10     1   32     1   32 10     21                  ','     TRUE             ,
#> 15     1   34     1   36 15     17               SYMBOL     TRUE           col
#> 17     1   34     1   36 17     21                 expr    FALSE              
#> 16     1   37     1   37 16     21                  ')'     TRUE             )
#> 22     1   38     1   38 22     26                  ')'     TRUE             )

"do.call(kwb.utils::selectColumns, list(df, col))" %>% 
  parse(text = ., keep.source = TRUE) %>% 
  getParseData()
#>    line1 col1 line2 col2 id parent                token terminal          text
#> 32     1    1     1   48 32      0                 expr    FALSE              
#> 1      1    1     1    7  1      3 SYMBOL_FUNCTION_CALL     TRUE       do.call
#> 3      1    1     1    7  3     32                 expr    FALSE              
#> 2      1    8     1    8  2     32                  '('     TRUE             (
#> 7      1    9     1   32  7     32                 expr    FALSE              
#> 4      1    9     1   17  4      7       SYMBOL_PACKAGE     TRUE     kwb.utils
#> 5      1   18     1   19  5      7               NS_GET     TRUE            ::
#> 6      1   20     1   32  6      7               SYMBOL     TRUE selectColumns
#> 8      1   33     1   33  8     32                  ','     TRUE             ,
#> 27     1   35     1   47 27     32                 expr    FALSE              
#> 12     1   35     1   38 12     14 SYMBOL_FUNCTION_CALL     TRUE          list
#> 14     1   35     1   38 14     27                 expr    FALSE              
#> 13     1   39     1   39 13     27                  '('     TRUE             (
#> 15     1   40     1   41 15     17               SYMBOL     TRUE            df
#> 17     1   40     1   41 17     27                 expr    FALSE              
#> 16     1   42     1   42 16     27                  ','     TRUE             ,
#> 21     1   44     1   46 21     23               SYMBOL     TRUE           col
#> 23     1   44     1   46 23     27                 expr    FALSE              
#> 22     1   47     1   47 22     27                  ')'     TRUE             )
#> 28     1   48     1   48 28     32                  ')'     TRUE             )