This function creates a data.frame with features based on a text vector
See also
Other Data Wrangling:
balance_data()
,
categ_reducer()
,
cleanText()
,
date_cuts()
,
date_feats()
,
file_name()
,
formatHTML()
,
holidays()
,
impute()
,
left()
,
normalize()
,
num_abbr()
,
ohe_commas()
,
ohse()
,
quants()
,
removenacols()
,
replaceall()
,
replacefactor()
,
textTokenizer()
,
vector2text()
,
year_month()
,
zerovar()
Other Text Mining:
cleanText()
,
ngrams()
,
remove_stopwords()
,
replaceall()
,
sentimentBreakdown()
,
textCloud()
,
textTokenizer()
,
topics_rake()
Examples
textFeats("Bernardo Lares")
#> # A tibble: 1 × 12
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 Bernard… 14 2 5 0 0 0 0 0 2 0 0
textFeats("Bernardo Lares 123!", prc = TRUE)
#> # A tibble: 1 × 23
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 Bernard… 19 2 5 1 0 0 1 3 3 0 0
#> # ℹ 11 more variables: length_pct <dbl>, ncap_pct <dbl>, nvoc_pct <dbl>,
#> # nexcl_pct <dbl>, nquest_pct <dbl>, nats_pct <dbl>, npunct_pct <dbl>,
#> # ndig_pct <dbl>, nword_pct <dbl>, nsymb_pct <dbl>, nsmile_pct <dbl>
textFeats("I'm 100% Lares...", contains = c("Lares", "lares"))
#> # A tibble: 1 × 14
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 I'm 100… 17 2 3 0 0 0 5 3 3 1 0
#> # ℹ 2 more variables: Lares <int>, lares <int>
textFeats(c("GREAT library!!", "Have you tried this 2?", "Happy faces :D :-)"))
#> # A tibble: 3 × 12
#> text length ncap nvoc nexcl nquest nats npunct ndig nword nsymb nsmile
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 GREAT l… 15 5 4 2 0 0 2 0 2 0 0
#> 2 Have yo… 22 1 7 0 1 0 1 1 5 0 0
#> 3 Happy f… 18 2 3 0 0 0 4 0 4 0 2