1+
2+ # #######################################
3+ # TOPIC MODELLING #
4+ # #######################################
5+ install.packages(" topicmodels" )
6+ install.packages(" SnowballC" )
7+ library(SnowballC )
8+ library(topicmodels )
9+
10+ path <- " C:/Users/JM/Documents/project"
11+ quest <- read.csv(" C:/Users/JM/Documents/questionnaire.csv" ,sep = " ;" )
12+
13+ q1 <- quest [" question1" ]
14+ question <- q1 [! is.na(q1 [,1 ]),1 ]
15+ my.stopwords <- c()
16+
17+ # ##############################
18+ # Generation of the DocumentTerm Matrix
19+ # ##############################
20+
21+ DTM <- DocumentTermMatrix(generateCorpus(question ,my.stopwords ),control = list (tokenize = NGramTokenizer ,stemming = function (x ) wordStem(x , language = " french" )))
22+ rowTotals <- apply(DTM , 1 , sum )
23+ DTM.new <- DTM [rowTotals > 0 , ]
24+
25+ # ###############
26+ # Latent Dirichlet allocation
27+ # ##########################
28+
29+ lda <- LDA(DTM.new ,7 )
30+ terms(lda ,8 )
31+ TermDocumentMatrix()
32+
33+ # #####################
34+ # Display using wordcloud
35+ # #########################
36+ wordcloud.generate = function (corpus ,min.freq = 3 ,minWordLength = 3 ,dictionnaire ){
37+ doc.m = TermDocumentMatrix(corpus , control = list (minWordLength = 3 ,tokenize = NGramTokenizer ))
38+ dm = as.matrix(doc.m )
39+ # calculate the frequency of words
40+ v = sort(rowSums(dm ), decreasing = TRUE )
41+ d = data.frame (word = names(v ), freq = v )
42+ d <- d [d $ word %in% dict ,]
43+ # Create palette
44+ pal <- brewer.pal(11 , " Dark2" )
45+ # Generate the wordcloud
46+ wc = wordcloud(d $ word ,
47+ d $ freq ,
48+ min.freq = 5 ,
49+ scale = c(2 , 0.5 ),
50+ colors = pal ,
51+ max.words = 100 ,
52+ rot.per = 0.2 ,
53+ random.order = FALSE )
54+ # Display the wordcloud
55+ wc
56+ }
0 commit comments