Skip to content

Latest commit

 

History

History
20 lines (15 loc) · 655 Bytes

EXAMPLE.md

File metadata and controls

20 lines (15 loc) · 655 Bytes

Code for building the example dataset in the package:

library(cleanNLP)
library(sotu)

cnlp_init_spacy(model_name = "en_core_web_sm", entity_flag = TRUE)
txt <- sotu_text[sotu_meta$president == "Barack Obama"]
obama <- cnlp_annotate(txt,
                        as_strings = TRUE,
                        doc_ids = sprintf("doc%d", 2009:2016),
                        meta = sotu_meta[sotu_meta$president == "Barack Obama",])

# fix encoding issue from BOMs in spacy:
Encoding(obama$token$lemma) <- "latin1"
obama$token$lemma <- iconv(obama$token$lemma, from = "latin1", to = "ASCII", sub = "")

save(obama, file = "cleanNLP/data/obama.rda")