Extract Pattern-Defined Properties from Strings

extract_properties(x, patterns, replacements, as_data_frame = FALSE)

Arguments

x

vector of character

patterns

vector of character

replacements

vector of character

as_data_frame

logical. If TRUE (the default is FALSE), a data frame is returned.

Examples

# Define patterns to be matched against
patterns <- c(
  "[Bb]ericht",
  "[- ](\\d+)$",
  "Abschluss",
  "Zwischen",
  "_HS$"
)

# Define property:value pairs (or even prop1:value1+prob2:value2+...)
# referring to parts of the pattern enclosed in parentheses with \1, \2, ...
replacements <- c(
  "type:report",
  "number:\\1",
  "stage:final",
  "stage:intermediate",
  "author:Sonnenberg+reviewed:true"
)

# Define strings in which to look for properties and their values
x <- c("Bericht", "Bericht 1", "Abschlussbericht", "Zwischenbericht_HS")

# Extract property values as strings
extract_properties(x = x, patterns, replacements)
#>                                                          Bericht 
#>                                                    "type:report" 
#>                                                        Bericht 1 
#>                                           "type:report+number:1" 
#>                                                 Abschlussbericht 
#>                                        "type:report+stage:final" 
#>                                               Zwischenbericht_HS 
#> "type:report+stage:intermediate+author:Sonnenberg+reviewed:true" 

# Arrange the properties in a data frame
extract_properties(x = x, patterns, replacements, as_data_frame = TRUE)
#>                 name     author number reviewed        stage   type
#> 1            Bericht       <NA>   <NA>     <NA>         <NA> report
#> 2          Bericht 1       <NA>      1     <NA>         <NA> report
#> 3   Abschlussbericht       <NA>   <NA>     <NA>        final report
#> 4 Zwischenbericht_HS Sonnenberg   <NA>     true intermediate report