This function creates a data.frame with features based on a text vector
See also
Other Data Wrangling:
balance_data(),
categ_reducer(),
cleanText(),
date_cuts(),
date_feats(),
file_name(),
formatHTML(),
holidays(),
impute(),
left(),
normalize(),
num_abbr(),
ohe_commas(),
ohse(),
quants(),
removenacols(),
replaceall(),
replacefactor(),
textTokenizer(),
vector2text(),
year_month(),
zerovar()
Other Text Mining:
cleanText(),
ngrams(),
remove_stopwords(),
replaceall(),
sentimentBreakdown(),
textCloud(),
textTokenizer(),
topics_rake()
Examples
textFeats("Bernardo Lares")
#> # A tibble: 1 × 12
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 Bernard… 14 2 5 0 0 0 0 0 2 0 0
textFeats("Bernardo Lares 123!", prc = TRUE)
#> # A tibble: 1 × 23
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 Bernard… 19 2 5 1 0 0 1 3 3 0 0
#> # ℹ 11 more variables: length_pct <dbl>, ncap_pct <dbl>, nvoc_pct <dbl>,
#> # nexcl_pct <dbl>, nquest_pct <dbl>, nats_pct <dbl>, npunct_pct <dbl>,
#> # ndig_pct <dbl>, nword_pct <dbl>, nsymb_pct <dbl>, nsmile_pct <dbl>
textFeats("I'm 100% Lares...", contains = c("Lares", "lares"))
#> # A tibble: 1 × 14
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 I'm 100… 17 2 3 0 0 0 5 3 3 1 0
#> # ℹ 2 more variables: Lares <int>, lares <int>
textFeats(c("GREAT library!!", "Have you tried this 2?", "Happy faces :D :-)"))
#> # A tibble: 3 × 12
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 GREAT l… 15 5 4 2 0 0 2 0 2 0 0
#> 2 Have yo… 22 1 7 0 1 0 1 1 5 0 0
#> 3 Happy f… 18 2 3 0 0 0 4 0 4 0 2
