This package provides example files containing artificial path data. The files are located in the extdata
folder of the package. The function extdata_file()
returns the full path to a file in this folder when being given the filename:
(example_file <- fakin.path.app:::extdata_file("example_file_info_1.csv"))
#> [1] "/Users/runner/work/_temp/Library/fakin.path.app/extdata/example_file_info_1.csv"
The package contains functions named read_*
. These functions are described in the following.
This function reads a csv file either with read.table()
(version = 1) or with data.table::fread()
(version = 2). It reports about what it does and about the time it takes.
data_1 <- kwb.fakin::read_csv(example_file, version = 1)
#> Reading '/Users/runner/work/_temp/Library/fakin.path.app/extdata/example_file_info_1.csv' with utils::read.table() ... ok. (0.00s)
data_2 <- kwb.fakin::read_csv(example_file, version = 2)
#> Reading '/Users/runner/work/_temp/Library/fakin.path.app/extdata/example_file_info_1.csv' with data.table::fread() ... ok. (0.00s)
head(data_1, 3)
#> path size last_access type
#> 1 radio 0 2016-12-31T12:46:27Z directory
#> 2 radio/effect.png 729340270 2009-08-08T05:41:27Z file
#> 3 radio/person.pdf 812027222 2011-01-17T18:35:28Z file
head(data_2, 3)
#> path size last_access type
#> 1 radio 0 2016-12-31 12:46:27 directory
#> 2 radio/effect.png 729340270 2009-08-08 05:41:27 file
#> 3 radio/person.pdf 812027222 2011-01-17 18:35:28 file
identical(data_1, data_2)
#> [1] FALSE
This function reads a text file that contains any kind of file path information. The function is aimed to read files containing file paths only, one path per line, as well as files that contain additional information such as file size, creation time or last modification time. File sizes are assumed to be given in bytes and are converted to Mebibytes (MiB = 2^20 bytes) in order to avoid very large numbers that require a non-standard data type.
file_info <- fakin.path.app::read_file_paths(example_file)
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_file_info() ... Reading '/Users/runner/work/_temp/Library/fakin.path.app/extdata/example_file_info_1.csv' with data.table::fread() ... ok. (0.00s)
#> Converting file size to MiB ... ok. (0.00s)
#> ok. (0.00s)
head(file_info)
#> path size last_access type
#> 1 radio 0.0000 2016-12-31 12:46:27 directory
#> 2 radio/effect.png 695.5531 2009-08-08 05:41:27 file
#> 3 radio/person.pdf 774.4095 2011-01-17 18:35:28 file
#> 4 radio/please.R 111.6977 2016-05-21 02:07:34 file
#> 5 radio/provide 0.0000 2012-10-24 05:32:42 directory
#> 6 radio/provide/double 0.0000 2018-04-05 02:02:54 directory
When reading files that only contain file paths (without any additional columns) the function adds columns type
and size
. The type
is guessed from the filename extension whereas the size
is set to 0 for directories and to 2^20
for files. You can specify the fileEncoding
assumed for the file. When being set to NULL
the function uses utils::localeToCharset()
to guess an encoding. The encoding is passed to the function file()
that is used to open an explicit connection. This connection is then given to readLines()
. The lines read are assumed to be full file paths. Backslashes are converted to slashes. By default, all paths are sorted.
# Helper function
read_example <- function(x) {
fakin.path.app::read_file_paths(kwb.fakin::extdata_file(x))
}
file_paths <- read_example("example_file_paths.csv")
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_paths_only() ... Selected encoding: 'UTF-8'
#> Reading paths from '/Users/runner/work/_temp/Library/kwb.fakin/extdata/example_file_paths.csv' ... ok. (0.00s)
#> 546 lines have been read.
#> Sorting paths ... ok. (0.00s)
#> Guessing file path type ... ok. (0.00s)
#> ok. (0.01s)
folder_paths <- read_example("example_folder_paths.csv")
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_paths_only() ... Selected encoding: 'UTF-8'
#> Reading paths from '/Users/runner/work/_temp/Library/kwb.fakin/extdata/example_folder_paths.csv' ... ok. (0.00s)
#> 113 lines have been read.
#> Sorting paths ... ok. (0.00s)
#> Guessing file path type ... ok. (0.00s)
#> ok. (0.00s)
head(file_paths)
#> path type size
#> 1 radio/effect.png file 1048576
#> 2 radio/person.pdf file 1048576
#> 3 radio/please.R file 1048576
#> 4 radio/provide/double/between.xls file 1048576
#> 5 radio/provide/double/corner/describe.png file 1048576
#> 6 radio/provide/double/corner/select/final.pdf file 1048576
head(folder_paths)
#> path type size
#> 1 radio directory 0
#> 2 radio/provide directory 0
#> 3 radio/provide/double directory 0
#> 4 radio/provide/double/corner directory 0
#> 5 radio/provide/double/corner/select directory 0
#> 6 radio/provide/double/design directory 0
This function reads file information files specified by a name pattern matching the files to be read. It calls read_file_info()
in a loop over the files and returns a list with the list elements named according to the file names. The content of a file matching path-info_<yyyy-mm-dd_HHMM>_<name>.csv
appears in the list as element <name>
.
file_infos <- kwb.fakin::read_path_information(
file_info_dir = kwb.fakin::extdata_file(""),
pattern = "^example_file_info.*\\.csv$",
sep = ";"
)
#> No files matching '^example_file_info.*\.csv$' in
#> '/Users/runner/work/_temp/Library/kwb.fakin/extdata/'
#> Available files:
#> 'example_file_paths.csv'
#> 'example_folder_paths.csv'
#> 'example_sizeable_sankey.R'
#> 'folder_rules'
#> 'main.c'
#> 'makefile'
#> 'powershell_template_search_query.txt'
#> 'property_names.txt'
#> 'testcalls_1.R'
#> 'testcalls_2.R'
#> 'testcalls_3.R'
The functions treated so far have in common that they read file information from files. The following functions can be used to create these files. They use dir
or more advanced functions from the fs package.
kwb.fakin::list_files
#> function (root, file, use_batch = TRUE)
#> {
#> kwb.utils::safePath(dirname(file))
#> cat_time("Start")
#> if (use_batch) {
#> batchfile <- write_batch_list_files(root, file)
#> system2(batchfile)
#> }
#> else {
#> locale_all <- strsplit(Sys.getlocale("LC_ALL"), ";")[[1]]
#> Sys.setlocale("LC_ALL", "C")
#> on.exit(for (locale_one in locale_all) {
#> parts <- strsplit(locale_one, "=")[[1]]
#> Sys.setlocale(parts[1], parts[2])
#> })
#> paths <- kwb.utils::catAndRun(paste("Scanning all files in",
#> root), dir(root, all.files = TRUE, full.names = TRUE,
#> recursive = TRUE, no.. = TRUE))
#> kwb.utils::writeText(paths, file, "paths to")
#> }
#> cat_time("End")
#> }
#> <bytecode: 0x7ff53f6d14a0>
#> <environment: namespace:kwb.fakin>
This is just a wrapper around fs::dir_info()
with recurse = TRUE
. The function is run inside a call to kwb.utils::catAndRun()
.
root <- system.file(package = "kwb.fakin")
file_info <- fakin.path.app::get_recursive_file_info(root)
#> Getting file information on files below /Users/runner/work/_temp/Library/kwb.fakin ... ok. (0.01s)
head(file_info[, 1:3])
#> # A tibble: 6 × 3
#> path type size
#> <fs::path> <fct> <fs::b>
#> 1 /Users/runner/work/_temp/Library/kwb.fakin/DESCRIPTION file 1.98K
#> 2 /Users/runner/work/_temp/Library/kwb.fakin/INDEX file 2.24K
#> 3 /Users/runner/work/_temp/Library/kwb.fakin/LICENSE file 1.08K
#> 4 /Users/runner/work/_temp/Library/kwb.fakin/Meta directory 256
#> 5 /Users/runner/work/_temp/Library/kwb.fakin/Meta/Rd.rds file 1.56K
#> 6 /Users/runner/work/_temp/Library/kwb.fakin/Meta/features.rds file 121