5  String Functions

6 String Functions in Base R

Strings (or character vectors) are a fundamental data type in R, and base R provides a variety of functions to manipulate and analyze strings. Here are some commonly used string functions in base R:

# Create a character vector
text <- c("Hello, World!", "R is great", "Data Science")

6.1 Basic String Functions

# nchar(): Returns the number of characters in a string
nchar(text)
[1] 13 10 12
# tolower(): Converts a string to lowercase
tolower(text)
[1] "hello, world!" "r is great"    "data science" 
# toupper(): Converts a string to uppercase
toupper(text)
[1] "HELLO, WORLD!" "R IS GREAT"    "DATA SCIENCE" 
# substr(): Extracts a substring from a string
substr(text, start = 1, stop = 5)
[1] "Hello" "R is " "Data "
# paste(): Concatenates strings together
paste("Hello", "R", sep = " ")
[1] "Hello R"
# paste0(): Concatenates strings without any separator
paste0("Hello", "R")
[1] "HelloR"
# trimws(): Trims leading and trailing whitespace from a string
trimws("   Hello, World!   ")
[1] "Hello, World!"

6.2 Pattern Matching and Replacement

# grep(): Searches for patterns in a character vector and returns the indices of matches
grep("R", text)
[1] 2
# grepl(): Searches for patterns and returns a logical vector indicating matches
grepl("R", text)
[1] FALSE  TRUE FALSE
# sub(): Replaces the first occurrence of a pattern in a string
sub("great", "awesome", text)
[1] "Hello, World!" "R is awesome"  "Data Science" 
# gsub(): Replaces all occurrences of a pattern in a string
gsub(" ", "_", text)
[1] "Hello,_World!" "R_is_great"    "Data_Science" 

6.3 String Splitting and Joining

# strsplit(): Splits a string into substrings based on a specified delimiter
strsplit(text, split = " ")
[[1]]
[1] "Hello," "World!"

[[2]]
[1] "R"     "is"    "great"

[[3]]
[1] "Data"    "Science"
# unlist(): Converts a list to a vector (useful after strsplit)
unlist(strsplit(text, split = " "))
[1] "Hello,"  "World!"  "R"       "is"      "great"   "Data"    "Science"

6.4 Regular Expressions

# regexpr(): Finds the position of the first match of a pattern in a string
regexpr("R", text)
[1] -1  1 -1
attr(,"match.length")
[1] -1  1 -1
attr(,"index.type")
[1] "chars"
attr(,"useBytes")
[1] TRUE
# gregexpr(): Finds the positions of all matches of a pattern in a string
gregexpr(" ", text)
[[1]]
[1] 7
attr(,"match.length")
[1] 1
attr(,"index.type")
[1] "chars"
attr(,"useBytes")
[1] TRUE

[[2]]
[1] 2 5
attr(,"match.length")
[1] 1 1
attr(,"index.type")
[1] "chars"
attr(,"useBytes")
[1] TRUE

[[3]]
[1] 5
attr(,"match.length")
[1] 1
attr(,"index.type")
[1] "chars"
attr(,"useBytes")
[1] TRUE
# regmatches(): Extracts the matched substrings based on the positions returned by regexpr or gregexpr
regmatches(text, gregexpr(" ", text))
[[1]]
[1] " "

[[2]]
[1] " " " "

[[3]]
[1] " "

6.5 Example Usage

# Example: Convert text to lowercase and replace spaces with underscores
cleaned_text <- tolower(gsub(" ", "_", text))
cleaned_text
[1] "hello,_world!" "r_is_great"    "data_science" 

These functions provide a solid foundation for string manipulation in R. For more advanced string operations, you # using the stringr package, which offers a more consistent and user-friendly interface for string handling.

7 Overall differences

We’ll begin with a lookup table between the most important stringr functions and their base R equivalents.

library(stringr)
data_stringr_base_diff <- tibble::tribble(
  ~stringr,                                        ~base_r,
  "str_detect(string, pattern)",                   "grepl(pattern, x)",
  "str_dup(string, times)",                        "strrep(x, times)",
  "str_extract(string, pattern)",                  "regmatches(x, m = regexpr(pattern, text))",
  "str_extract_all(string, pattern)",              "regmatches(x, m = gregexpr(pattern, text))",
  "str_length(string)",                            "nchar(x)",
  "str_locate(string, pattern)",                   "regexpr(pattern, text)",
  "str_locate_all(string, pattern)",               "gregexpr(pattern, text)",
  "str_match(string, pattern)",                    "regmatches(x, m = regexec(pattern, text))",
  "str_order(string)",                             "order(...)",
  "str_replace(string, pattern, replacement)",     "sub(pattern, replacement, x)",
  "str_replace_all(string, pattern, replacement)", "gsub(pattern, replacement, x)",
  "str_sort(string)",                              "sort(x)",
  "str_split(string, pattern)",                    "strsplit(x, split)",
  "str_sub(string, start, end)",                   "substr(x, start, stop)",
  "str_subset(string, pattern)",                   "grep(pattern, x, value = TRUE)",
  "str_to_lower(string)",                          "tolower(x)",
  "str_to_title(string)",                          "tools::toTitleCase(text)",
  "str_to_upper(string)",                          "toupper(x)",
  "str_trim(string)",                              "trimws(x)",
  "str_which(string, pattern)",                    "grep(pattern, x)",
  "str_wrap(string)",                              "strwrap(x)"
)

# create MD table, arranged alphabetically by stringr fn name
data_stringr_base_diff |> 
  dplyr::mutate(dplyr::across(
      .cols = everything(),
      .fns = ~ paste0("`", .x, "`"))
  ) |> 
  dplyr::arrange(stringr) |> 
  dplyr::rename(`base R` = base_r) |> 
  gt::gt() |> 
  gt::fmt_markdown(columns = everything()) |> 
  gt::tab_options(column_labels.font.weight = "bold")
stringr base R
str_detect(string, pattern) grepl(pattern, x)
str_dup(string, times) strrep(x, times)
str_extract(string, pattern) regmatches(x, m = regexpr(pattern, text))
str_extract_all(string, pattern) regmatches(x, m = gregexpr(pattern, text))
str_length(string) nchar(x)
str_locate(string, pattern) regexpr(pattern, text)
str_locate_all(string, pattern) gregexpr(pattern, text)
str_match(string, pattern) regmatches(x, m = regexec(pattern, text))
str_order(string) order(...)
str_replace(string, pattern, replacement) sub(pattern, replacement, x)
str_replace_all(string, pattern, replacement) gsub(pattern, replacement, x)
str_sort(string) sort(x)
str_split(string, pattern) strsplit(x, split)
str_sub(string, start, end) substr(x, start, stop)
str_subset(string, pattern) grep(pattern, x, value = TRUE)
str_to_lower(string) tolower(x)
str_to_title(string) tools::toTitleCase(text)
str_to_upper(string) toupper(x)
str_trim(string) trimws(x)
str_which(string, pattern) grep(pattern, x)
str_wrap(string) strwrap(x)

Overall the main differences between base R and stringr are:

  1. stringr functions start with str_ prefix; base R string functions have no consistent naming scheme.

  2. The order of inputs is usually different between base R and stringr. In base R, the pattern to match usually comes first; in stringr, the string to manupulate always comes first. This makes stringr easier to use in pipes, and with lapply() or purrr::map().

  3. Functions in stringr tend to do less, where many of the string processing functions in base R have multiple purposes.

  4. The output and input of stringr functions has been carefully designed. For example, the output of str_locate() can be fed directly into str_sub(); the same is not true of regexpr() and substr().

  5. Base functions use arguments (like perl, fixed, and ignore.case) to control how the pattern is interpreted. To avoid dependence between arguments, stringr instead uses helper functions (like fixed(), regex(), and coll()).

Next we’ll walk through each of the functions, noting the similarities and important differences. These examples are adapted from the stringr documentation and here they are contrasted with the analogous base R operations.

8 Detect matches

8.1 str_detect(): Detect the presence or absence of a pattern in a string

Suppose you want to know whether each word in a vector of fruit names contains an “a”.

fruit <- c("apple", "banana", "pear", "pineapple")

# base
grepl(pattern = "a", x = fruit)
[1] TRUE TRUE TRUE TRUE
#stringr
stringr::str_detect(fruit, pattern = "a")
[1] TRUE TRUE TRUE TRUE

In base you would use grepl() (see the “l” and think logical) while in stringr you use str_detect() (see the verb “detect” and think of a yes/no action).

8.2 str_which(): Find positions matching a pattern

Now you want to identify the positions of the words in a vector of fruit names that contain an “a”.

# base
grep(pattern = "a", x = fruit)
[1] 1 2 3 4
# stringr
str_which(fruit, pattern = "a")
[1] 1 2 3 4

In base you would use grep() while in stringr you use str_which() (by analogy to which()).

8.3 str_count(): Count the number of matches in a string

How many “a”s are in each fruit?

# base 
loc <- gregexpr(pattern = "a", text = fruit, fixed = TRUE)
sapply(loc, function(x) length(attr(x, "match.length")))
[1] 1 3 1 1
# stringr
str_count(fruit, pattern = "a")
[1] 1 3 1 1

This information can be gleaned from gregexpr() in base, but you need to look at the match.length attribute as the vector uses a length-1 integer vector (-1) to indicate no match.

8.4 str_locate(): Locate the position of patterns in a string

Within each fruit, where does the first “p” occur? Where are all of the “p”s?

fruit3 <- c("papaya", "lime", "apple")

# base
str(gregexpr(pattern = "p", text = fruit3))
List of 3
 $ : int [1:2] 1 3
  ..- attr(*, "match.length")= int [1:2] 1 1
  ..- attr(*, "index.type")= chr "chars"
  ..- attr(*, "useBytes")= logi TRUE
 $ : int -1
  ..- attr(*, "match.length")= int -1
  ..- attr(*, "index.type")= chr "chars"
  ..- attr(*, "useBytes")= logi TRUE
 $ : int [1:2] 2 3
  ..- attr(*, "match.length")= int [1:2] 1 1
  ..- attr(*, "index.type")= chr "chars"
  ..- attr(*, "useBytes")= logi TRUE
# stringr
str_locate(fruit3, pattern = "p")
     start end
[1,]     1   1
[2,]    NA  NA
[3,]     2   2
str_locate_all(fruit3, pattern = "p")
[[1]]
     start end
[1,]     1   1
[2,]     3   3

[[2]]
     start end

[[3]]
     start end
[1,]     2   2
[2,]     3   3

8.5 str_subset(): Keep strings matching a pattern, or find positions

We may want to retrieve strings that contain a pattern of interest:

# base
grep(pattern = "g", x = fruit, value = TRUE)
character(0)
# stringr
str_subset(fruit, pattern = "g")
character(0)

8.6 str_extract(): Extract matching patterns from a string

We may want to pick out certain patterns from a string, for example, the digits in a shopping list:

shopping_list <- c("apples x4", "bag of flour", "10", "milk x2")

# base
matches <- regexpr(pattern = "\\d+", text = shopping_list) # digits
regmatches(shopping_list, m = matches)
[1] "4"  "10" "2" 
matches <- gregexpr(pattern = "[a-z]+", text = shopping_list) # words
regmatches(shopping_list, m = matches)
[[1]]
[1] "apples" "x"     

[[2]]
[1] "bag"   "of"    "flour"

[[3]]
character(0)

[[4]]
[1] "milk" "x"   
# stringr
str_extract(shopping_list, pattern = "\\d+") 
[1] "4"  NA   "10" "2" 
str_extract_all(shopping_list, "[a-z]+")
[[1]]
[1] "apples" "x"     

[[2]]
[1] "bag"   "of"    "flour"

[[3]]
character(0)

[[4]]
[1] "milk" "x"   

Base R requires the combination of regexpr() with regmatches(); but note that the strings without matches are dropped from the output. stringr provides str_extract() and str_extract_all(), and the output is always the same length as the input.

8.7 str_match(): Extract matched groups from a string

We may also want to extract groups from a string. Here I’m going to use the scenario from Section 14.4.3 in R for Data Science.

head(sentences)
[1] "The birch canoe slid on the smooth planks." 
[2] "Glue the sheet to the dark blue background."
[3] "It's easy to tell the depth of a well."     
[4] "These days a chicken leg is a rare dish."   
[5] "Rice is often served in round bowls."       
[6] "The juice of lemons makes fine punch."      
noun <- "([A]a|[Tt]he) ([^ ]+)"

# base
matches <- regexec(pattern = noun, text = head(sentences))
do.call("rbind", regmatches(x = head(sentences), m = matches))
     [,1]        [,2]  [,3]   
[1,] "The birch" "The" "birch"
[2,] "the sheet" "the" "sheet"
[3,] "the depth" "the" "depth"
[4,] "The juice" "The" "juice"
# stringr
str_match(head(sentences), pattern = noun)
     [,1]        [,2]  [,3]   
[1,] "The birch" "The" "birch"
[2,] "the sheet" "the" "sheet"
[3,] "the depth" "the" "depth"
[4,] NA          NA    NA     
[5,] NA          NA    NA     
[6,] "The juice" "The" "juice"

As for extracting the full match base R requires the combination of two functions, and inputs with no matches are dropped from the output.

9 Manage lengths

9.1 str_length(): The length of a string

To determine the length of a string, base R uses nchar() (not to be confused with length() which gives the length of vectors, etc.) while stringr uses str_length().

# base
nchar(letters)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# stringr
str_length(letters)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

There are some subtle differences between base and stringr here. nchar() requires a character vector, so it will return an error if used on a factor. str_length() can handle a factor input.

# base
nchar(factor("abc")) 
Error in nchar(factor("abc")): 'nchar()' requires a character vector
# stringr
str_length(factor("abc"))
[1] 3

Note that “characters” is a poorly defined concept, and technically both nchar() and str_length() returns the number of code points. This is usually the same as what you’d consider to be a charcter, but not always:

x <- c("\u00fc", "u\u0308")
x
[1] "ü" "ü"
nchar(x)
[1] 1 2
str_length(x)
[1] 1 2

9.2 str_pad(): Pad a string

To pad a string to a certain width, use stringr’s str_pad(). In base R you could use sprintf(), but unlike str_pad(), sprintf() has many other functionalities.

# base
sprintf("%30s", "Sriram")
[1] "                        Sriram"
sprintf("%-30s", "Sriram")
[1] "Sriram                        "
# "both" is not as straightforward

# stringr
rbind(
  str_pad("Sriram", 30, "left"),
  str_pad("Sriram", 30, "right"),
  str_pad("Sriram", 30, "both")
)
     [,1]                            
[1,] "                        Sriram"
[2,] "Sriram                        "
[3,] "            Sriram            "

9.3 str_trunc(): Truncate a character string

The stringr package provides an easy way to truncate a character string: str_trunc(). Base R has no function to do this directly.

x <- "This string is moderately long"

# stringr
rbind(
  str_trunc(x, 20, "right"),
  str_trunc(x, 20, "left"),
  str_trunc(x, 20, "center")
)
     [,1]                  
[1,] "This string is mo..."
[2,] "...s moderately long"
[3,] "This stri...ely long"

9.4 str_trim(): Trim whitespace from a string

Similarly, stringr provides str_trim() to trim whitespace from a string. This is analogous to base R’s trimws() added in R 3.3.0.

# base
trimws(" String with trailing and leading white space\t")
[1] "String with trailing and leading white space"
trimws("\n\nString with trailing and leading white space\n\n")
[1] "String with trailing and leading white space"
# stringr
str_trim(" String with trailing and leading white space\t")
[1] "String with trailing and leading white space"
str_trim("\n\nString with trailing and leading white space\n\n")
[1] "String with trailing and leading white space"

The stringr function str_squish() allows for extra whitespace within a string to be trimmed (in contrast to str_trim() which removes whitespace at the beginning and/or end of string). In base R, one might take advantage of gsub() to accomplish the same effect.

# stringr
str_squish(" String with trailing, middle,   and leading white space\t")
[1] "String with trailing, middle, and leading white space"
str_squish("\n\nString with excess, trailing and leading white space\n\n")
[1] "String with excess, trailing and leading white space"

9.5 str_wrap(): Wrap strings into nicely formatted paragraphs

strwrap() and str_wrap() use different algorithms. str_wrap() uses the famous Knuth-Plass algorithm.

gettysburg <- "Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal."

# base
cat(strwrap(gettysburg, width = 60), sep = "\n")
Four score and seven years ago our fathers brought forth on
this continent, a new nation, conceived in Liberty, and
dedicated to the proposition that all men are created
equal.
# stringr
cat(str_wrap(gettysburg, width = 60), "\n")
Four score and seven years ago our fathers brought forth
on this continent, a new nation, conceived in Liberty, and
dedicated to the proposition that all men are created equal. 

Note that strwrap() returns a character vector with one element for each line; str_wrap() returns a single string containing line breaks.

10 Mutate strings

10.1 str_replace(): Replace matched patterns in a string

To replace certain patterns within a string, stringr provides the functions str_replace() and str_replace_all(). The base R equivalents are sub() and gsub(). Note the difference in default input order again.

fruits <- c("apple", "banana", "pear", "pineapple")

# base
sub("[aeiou]", "-", fruits)
[1] "-pple"     "b-nana"    "p-ar"      "p-neapple"
gsub("[aeiou]", "-", fruits)
[1] "-ppl-"     "b-n-n-"    "p--r"      "p-n--ppl-"
# stringr
str_replace(fruits, "[aeiou]", "-")
[1] "-pple"     "b-nana"    "p-ar"      "p-neapple"
str_replace_all(fruits, "[aeiou]", "-")
[1] "-ppl-"     "b-n-n-"    "p--r"      "p-n--ppl-"

10.2 case: Convert case of a string

Both stringr and base R have functions to convert to upper and lower case. Title case is also provided in stringr.

dog <- "The quick brown dog"

# base
toupper(dog)
[1] "THE QUICK BROWN DOG"
tolower(dog)
[1] "the quick brown dog"
tools::toTitleCase(dog)
[1] "The Quick Brown Dog"
# stringr
str_to_upper(dog)
[1] "THE QUICK BROWN DOG"
str_to_lower(dog)
[1] "the quick brown dog"
str_to_title(dog)
[1] "The Quick Brown Dog"

In stringr we can control the locale, while in base R locale distinctions are controlled with global variables. Therefore, the output of your base R code may vary across different computers with different global settings.

# stringr
str_to_upper("i") # English
[1] "I"
str_to_upper("i", locale = "tr") # Turkish
[1] "İ"

11 Join and split

11.1 str_flatten(): Flatten a string

If we want to take elements of a string vector and collapse them to a single string we can use the collapse argument in paste() or use stringr’s str_flatten().

# base
paste0(letters, collapse = "-")
[1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z"
# stringr
str_flatten(letters, collapse = "-")
[1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z"

The advantage of str_flatten() is that it always returns a vector the same length as its input; to predict the return length of paste() you must carefully read all arguments.

11.2 str_dup(): duplicate strings within a character vector

To duplicate strings within a character vector use strrep() (in R 3.3.0 or greater) or str_dup():

fruit <- c("apple", "pear", "banana")

# base
strrep(fruit, 2)
[1] "appleapple"   "pearpear"     "bananabanana"
strrep(fruit, 1:3)
[1] "apple"              "pearpear"           "bananabananabanana"
# stringr
str_dup(fruit, 2)
[1] "appleapple"   "pearpear"     "bananabanana"
str_dup(fruit, 1:3)
[1] "apple"              "pearpear"           "bananabananabanana"

11.3 str_split(): Split up a string into pieces

To split a string into pieces with breaks based on a particular pattern match stringr uses str_split() and base R uses strsplit(). Unlike most other functions, strsplit() starts with the character vector to modify.

fruits <- c(
  "apples and oranges and pears and bananas",
  "pineapples and mangos and guavas"
)
# base
strsplit(fruits, " and ")
[[1]]
[1] "apples"  "oranges" "pears"   "bananas"

[[2]]
[1] "pineapples" "mangos"     "guavas"    
# stringr
str_split(fruits, " and ")
[[1]]
[1] "apples"  "oranges" "pears"   "bananas"

[[2]]
[1] "pineapples" "mangos"     "guavas"    

The stringr package’s str_split() allows for more control over the split, including restricting the number of possible matches.

# stringr
str_split(fruits, " and ", n = 3)
[[1]]
[1] "apples"            "oranges"           "pears and bananas"

[[2]]
[1] "pineapples" "mangos"     "guavas"    
str_split(fruits, " and ", n = 2)
[[1]]
[1] "apples"                        "oranges and pears and bananas"

[[2]]
[1] "pineapples"        "mangos and guavas"

11.4 str_glue(): Interpolate strings

It’s often useful to interpolate varying values into a fixed string. In base R, you can use sprintf() for this purpose; stringr provides a wrapper for the more general purpose glue package.

name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")

# base
sprintf(
  "My name is %s my age next year is %s and my anniversary is %s.", 
  name,
  age + 1,
  format(anniversary, "%A, %B %d, %Y")
)
[1] "My name is Fred my age next year is 51 and my anniversary is Saturday, October 12, 1991."
# stringr
str_glue(
  "My name is {name}, ",
  "my age next year is {age + 1}, ",
  "and my anniversary is {format(anniversary, '%A, %B %d, %Y')}."
)
My name is Fred, my age next year is 51, and my anniversary is Saturday, October 12, 1991.

12 Order strings

12.1 str_order(): Order or sort a character vector

Both base R and stringr have separate functions to order and sort strings.

# base
order(letters)
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26
sort(letters)
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
[20] "t" "u" "v" "w" "x" "y" "z"
# stringr
str_order(letters)
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26
str_sort(letters)
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
[20] "t" "u" "v" "w" "x" "y" "z"

Some options in str_order() and str_sort() don’t have analogous base R options. For example, the stringr functions have a locale argument to control how to order or sort. In base R the locale is a global setting, so the outputs of sort() and order() may differ across different computers. For example, in the Norwegian alphabet, å comes after z:

x <- c("å", "a", "z")
str_sort(x)
[1] "a" "å" "z"
str_sort(x, locale = "no")
[1] "a" "z" "å"

The stringr functions also have a numeric argument to sort digits numerically instead of treating them as strings.

# stringr
x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)
[1] "100a10" "100a5"  "2a"     "2b"    
str_sort(x, numeric = TRUE)
[1] "2a"     "2b"     "100a5"  "100a10"
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
ae <- haven::read_sas("./data/sdtm/ae.sas7bdat")
ae1 <- ae |> 
  select (USUBJID,AEDECOD,AEBODSYS)

#find function in sas, str_detect

ae2 <- ae1 |> 
  filter(str_detect(AEDECOD,"HER"))

ae2 <- ae1 |> 
  filter(str_detect(AEDECOD,"^HIATUS ")) # start of the string

ae2 <- ae1 |> 
  filter(str_detect(AEDECOD,"HERNIA$")) # end of the string

#substr in sas

ae2 <- ae1 |> 
  mutate(newvar1=str_sub(AEDECOD,1,6),
         newvar2=str_sub(AEDECOD,2,6),
         newvar3=str_sub(AEDECOD,-3),
         newvar4=str_length(AEDECOD))

#cat function in sas , Str_c in r

ae2 <- ae1 |> 
  mutate(newvar1=str_c(AEDECOD,AEBODSYS,sep="/"),
         newvar2=paste(AEDECOD,AEBODSYS,sep="/"))

# scan function in sas, word in r

ae2 <- ae1 |> 
  mutate(newvar1=word(AEDECOD,1),
         newvar2=word(AEDECOD,2))

# upper & lower case

ae2 <- ae1 |> 
  mutate(newvar1=str_to_lower(AEDECOD),
         newvar2=str_to_upper(AEDECOD),
         newvar3=str_to_title(AEDECOD),
         newvar4=str_to_sentence(AEDECOD))

#str_trim

a <- "  this is    my String   "
b <- str_trim(a)
c <- str_replace_all(a," "," ")
d <- str_squish(a)