When doing data cleaning, we often have to clean up strings of text. Doing this in R used to be a pain until the birth of the stringr package. For example, it contains a function called str_trim() that allows you to easily remove any leading and trailing whitespace of a string. It also contains a function called str_sub() that allows you to easily extract substrings from any string. Run the following code to see these two functions work.
library(stringr)
gmlang = "\t Guangming Lang \n"
# trim whitespace on both sides
str_trim(gmlang)## [1] "Guangming Lang"# trim whitespace on the left side
str_trim(gmlang, side="left")## [1] "Guangming Lang \n"# trim whitespace on the right side
str_trim(gmlang, side="right")## [1] "\t Guangming Lang"# re-assign trimmed value to gmlang
gmlang = str_trim(gmlang)
# extract first name
str_sub(gmlang, start=1, end=9)## [1] "Guangming"str_sub(gmlang, end=9)## [1] "Guangming"# extract last name
str_sub(gmlang, start=11, end=14)## [1] "Lang"str_sub(gmlang, start=11)## [1] "Lang"# extact first and last name at the same time
str_sub(gmlang, start=c(1, 11), end=c(9, 14))## [1] "Guangming" "Lang"# something fun :)
str_sub(gmlang, start = seq_len(str_length(gmlang)))## [1] "Guangming Lang" "uangming Lang" "angming Lang" "ngming Lang"
## [5] "gming Lang" "ming Lang" "ing Lang" "ng Lang"
## [9] "g Lang" " Lang" "Lang" "ang"
## [13] "ng" "g"str_sub(gmlang, end = seq_len(str_length(gmlang)))## [1] "G" "Gu" "Gua" "Guan"
## [5] "Guang" "Guangm" "Guangmi" "Guangmin"
## [9] "Guangming" "Guangming " "Guangming L" "Guangming La"
## [13] "Guangming Lan" "Guangming Lang"