library(udpipe)
# download the model if need be:
# udpipe_download_model(language = "english")
udmodel_english = udpipe_load_model(file = "C:/Users/Dave/Documents/english-ewt-ud-2.5-191206.udpipe")
library(readxl)
salience_texts <- read_excel("salience_texts.xlsx")
library(tidyverse)
library(tidytext)
library(stopwords)
salience_texts <- unnest_tokens(tbl = salience_texts, token = "words", input = Content, output = Content, to_lower = TRUE)
salience_texts <- anti_join(salience_texts, get_stopwords(language = "en", source = "stopwords-iso"), by = c("Content" = "word"))
annotated_data = data.frame(udpipe_annotate(udmodel_english, salience_texts$Content))
nouns <- subset(annotated_data, upos %in% c("NOUN"))
nouns_frequency <- txt_freq(nouns$lemma)
# remove salience from the list:
# nouns_frequency <- nouns_frequency[-1,]
head(nouns_frequency, n=200)
## key freq freq_pct
## 1 salience 493 6.4571054
## 2 language 153 2.0039293
## 3 attention 152 1.9908317
## 4 salient 98 1.2835625
## 5 concept 82 1.0740013
## 6 context 81 1.0609037
## 7 form 64 0.8382449
## 8 structure 64 0.8382449
## 9 feature 63 0.8251473
## 10 term 61 0.7989522
## 11 item 55 0.7203667
## 12 linguistic 52 0.6810740
## 13 study 49 0.6417813
## 14 category 48 0.6286837
## 15 relation 47 0.6155861
## 16 expectation 46 0.6024885
## 17 source 46 0.6024885
## 18 frequency 45 0.5893910
## 19 question 45 0.5893910
## 20 type 43 0.5631958
## 21 effect 43 0.5631958
## 22 cue 42 0.5500982
## 23 dimension 41 0.5370007
## 24 situation 41 0.5370007
## 25 processe 40 0.5239031
## 26 sentence 40 0.5239031
## 27 meaning 39 0.5108055
## 28 role 39 0.5108055
## 29 property 38 0.4977079
## 30 event 37 0.4846103
## 31 definition 36 0.4715128
## 32 word 36 0.4715128
## 33 hand 36 0.4715128
## 34 memory 35 0.4584152
## 35 speaker 35 0.4584152
## 36 choice 35 0.4584152
## 37 focus 34 0.4453176
## 38 target 33 0.4322200
## 39 knowledge 33 0.4322200
## 40 factor 32 0.4191225
## 41 perception 31 0.4060249
## 42 level 30 0.3929273
## 43 perspective 29 0.3798297
## 44 instance 29 0.3798297
## 45 time 28 0.3667322
## 46 mechanism 28 0.3667322
## 47 account 27 0.3536346
## 48 theory 27 0.3536346
## 49 processis 27 0.3536346
## 50 variation 26 0.3405370
## 51 finding 26 0.3405370
## 52 discipline 25 0.3274394
## 53 phenomena 25 0.3274394
## 54 difference 25 0.3274394
## 55 experience 25 0.3274394
## 56 scene 24 0.3143418
## 57 action 24 0.3143418
## 58 field 23 0.3012443
## 59 distinction 23 0.3012443
## 60 subject 23 0.3012443
## 61 view 23 0.3012443
## 62 influence 22 0.2881467
## 63 english 21 0.2750491
## 64 issue 21 0.2750491
## 65 participant 21 0.2750491
## 66 involve 21 0.2750491
## 67 entrenchment 21 0.2750491
## 68 expression 21 0.2750491
## 69 sentences 21 0.2750491
## 70 function 20 0.2619515
## 71 relationship 20 0.2619515
## 72 selection 20 0.2619515
## 73 chapter 20 0.2619515
## 74 linguist 19 0.2488540
## 75 discourse 19 0.2488540
## 76 position 19 0.2488540
## 77 range 19 0.2488540
## 78 eye 19 0.2488540
## 79 movement 19 0.2488540
## 80 space 19 0.2488540
## 81 tomlin 18 0.2357564
## 82 occur 18 0.2357564
## 83 signal 18 0.2357564
## 84 semantic 17 0.2226588
## 85 aspect 17 0.2226588
## 86 domain 17 0.2226588
## 87 cueing 17 0.2226588
## 88 task 17 0.2226588
## 89 input 17 0.2226588
## 90 model 17 0.2226588
## 91 representation 17 0.2226588
## 92 evidence 17 0.2226588
## 93 distractor 17 0.2226588
## 94 geeraert 17 0.2226588
## 95 referent 17 0.2226588
## 96 beer 17 0.2226588
## 97 landmark 17 0.2226588
## 98 review 16 0.2095612
## 99 overview 16 0.2095612
## 100 paradigm 16 0.2095612
## 101 probability 16 0.2095612
## 102 analysis 16 0.2095612
## 103 investigation 16 0.2095612
## 104 picture 16 0.2095612
## 105 production 16 0.2095612
## 106 interaction 15 0.1964637
## 107 giora 15 0.1964637
## 108 variant 15 0.1964637
## 109 voice 15 0.1964637
## 110 entity 15 0.1964637
## 111 constitute 15 0.1964637
## 112 constructions 15 0.1964637
## 113 agent 15 0.1964637
## 114 snow 15 0.1964637
## 115 sociolinguistic 14 0.1833661
## 116 expressions 14 0.1833661
## 117 change 14 0.1833661
## 118 stimuli 14 0.1833661
## 119 future 14 0.1833661
## 120 condition 14 0.1833661
## 121 activation 14 0.1833661
## 122 sense 14 0.1833661
## 123 interact 14 0.1833661
## 124 perspectival 14 0.1833661
## 125 understand 13 0.1702685
## 126 schmid 13 0.1702685
## 127 günther 13 0.1702685
## 128 psychology 13 0.1702685
## 129 goal 13 0.1702685
## 130 müller 13 0.1702685
## 131 pattern 13 0.1702685
## 132 reality 13 0.1702685
## 133 element 13 0.1702685
## 134 levels 13 0.1702685
## 135 application 13 0.1702685
## 136 surprise 12 0.1571709
## 137 models 12 0.1571709
## 138 basis 12 0.1571709
## 139 cognition 12 0.1571709
## 140 process 12 0.1571709
## 141 discussion 12 0.1571709
## 142 response 12 0.1571709
## 143 subset 12 0.1571709
## 144 idea 12 0.1571709
## 145 location 12 0.1571709
## 146 observer 12 0.1571709
## 147 behavior 12 0.1571709
## 148 prediction 12 0.1571709
## 149 operationalization 11 0.1440733
## 150 usage 11 0.1440733
## 151 william 11 0.1440733
## 152 zarcone 11 0.1440733
## 153 contrast 11 0.1440733
## 154 read 11 0.1440733
## 155 speech 11 0.1440733
## 156 langacker 11 0.1440733
## 157 grammar 11 0.1440733
## 158 notion 11 0.1440733
## 159 patient 11 0.1440733
## 160 surface 11 0.1440733
## 161 name 11 0.1440733
## 162 trajector 11 0.1440733
## 163 kerswill 10 0.1309758
## 164 markedness 10 0.1309758
## 165 phenomenon 10 0.1309758
## 166 jaeger 10 0.1309758
## 167 result 10 0.1309758
## 168 degree 10 0.1309758
## 169 extent 10 0.1309758
## 170 table 10 0.1309758
## 171 line 10 0.1309758
## 172 example 10 0.1309758
## 173 complex 10 0.1309758
## 174 user 10 0.1309758
## 175 prototypicality 10 0.1309758
## 176 regard 10 0.1309758
## 177 combination 10 0.1309758
## 178 weight 10 0.1309758
## 179 dutch 10 0.1309758
## 180 reference 9 0.1178782
## 181 framework 9 0.1178782
## 182 literature 9 0.1178782
## 183 science 9 0.1178782
## 184 environment 9 0.1178782
## 185 note 9 0.1178782
## 186 lexicon 9 0.1178782
## 187 approach 9 0.1178782
## 188 step 9 0.1178782
## 189 addition 9 0.1178782
## 190 increase 9 0.1178782
## 191 bock 9 0.1178782
## 192 comprehension 9 0.1178782
## 193 formulation 9 0.1178782
## 194 preference 9 0.1178782
## 195 possibility 9 0.1178782
## 196 sla 8 0.1047806
## 197 reason 8 0.1047806
## 198 literal 8 0.1047806
## 199 marker 8 0.1047806
## 200 trudgill 8 0.1047806
library(RColorBrewer)
library(wordcloud)
wordcloud(words = nouns_frequency$key, freq = nouns_frequency$freq, max.words = 150, random.color = TRUE, color = brewer.pal(8, "Dark2"))
library(wordcloud2)
wordcloud2(data = nouns_frequency, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)
adjectives <- subset(annotated_data, upos %in% c("ADJ"))
adjectives_frequency <- txt_freq(adjectives$lemma)
# remove salience from the list:
# adjectives_frequency <- adjectives_frequency[-1,]
head(adjectives_frequency, n=200)
## key freq freq_pct
## 1 linguistic 130 5.72435051
## 2 cognitive 83 3.65477763
## 3 visual 66 2.90620872
## 4 lexical 60 2.64200793
## 5 onomasiological 52 2.28974020
## 6 semantic 50 2.20167327
## 7 perceptual 47 2.06957288
## 8 specific 42 1.84940555
## 9 attentional 37 1.62923822
## 10 surprisal 36 1.58520476
## 11 social 35 1.54117129
## 12 structural 33 1.45310436
## 13 active 32 1.40907089
## 14 passive 31 1.36503743
## 15 major 30 1.32100396
## 16 syntactic 28 1.23293703
## 17 individual 27 1.18890357
## 18 current 24 1.05680317
## 19 relevant 21 0.92470277
## 20 psychological 20 0.88066931
## 21 external 18 0.79260238
## 22 mental 18 0.79260238
## 23 semasiological 18 0.79260238
## 24 central 17 0.74856891
## 25 sensory 17 0.74856891
## 26 involve 15 0.66050198
## 27 sociolinguistic 14 0.61646852
## 28 contextual 14 0.61646852
## 29 human 14 0.61646852
## 30 grammatical 14 0.61646852
## 31 conceptual 13 0.57243505
## 32 potential 12 0.52840159
## 33 lexicological 12 0.52840159
## 34 syntagmatic 12 0.52840159
## 35 situational 11 0.48436812
## 36 alternative 11 0.48436812
## 37 typical 11 0.48436812
## 38 terminological 10 0.44033465
## 39 internal 10 0.44033465
## 40 variable 10 0.44033465
## 41 initial 10 0.44033465
## 42 prominent 10 0.44033465
## 43 actual 10 0.44033465
## 44 joint 10 0.44033465
## 45 formal 10 0.44033465
## 46 communicative 10 0.44033465
## 47 basic 10 0.44033465
## 48 single 9 0.39630119
## 49 intrinsic 9 0.39630119
## 50 multiple 9 0.39630119
## 51 theoretical 9 0.39630119
## 52 distinguish 9 0.39630119
## 53 phonological 9 0.39630119
## 54 pragmatic 9 0.39630119
## 55 direct 9 0.39630119
## 56 surprising 9 0.39630119
## 57 frequent 9 0.39630119
## 58 crucial 9 0.39630119
## 59 nonlinguistic 9 0.39630119
## 60 previous 9 0.39630119
## 61 accessible 8 0.35226772
## 62 inherent 8 0.35226772
## 63 local 8 0.35226772
## 64 real 8 0.35226772
## 65 distinctive 8 0.35226772
## 66 common 7 0.30823426
## 67 implicit 7 0.30823426
## 68 limit 7 0.30823426
## 69 complete 7 0.30823426
## 70 prior 7 0.30823426
## 71 easier 7 0.30823426
## 72 characteristic 7 0.30823426
## 73 short 7 0.30823426
## 74 spatial 7 0.30823426
## 75 global 7 0.30823426
## 76 russian 7 0.30823426
## 77 distinguished 7 0.30823426
## 78 dimensional 7 0.30823426
## 79 scientific 6 0.26420079
## 80 exogenous 6 0.26420079
## 81 unexpected 6 0.26420079
## 82 narrow 6 0.26420079
## 83 main 6 0.26420079
## 84 interdisciplinary 6 0.26420079
## 85 experimental 6 0.26420079
## 86 strong 6 0.26420079
## 87 construal 6 0.26420079
## 88 semiotic 6 0.26420079
## 89 variational 6 0.26420079
## 90 natural 5 0.22016733
## 91 endogenous 5 0.22016733
## 92 paradigmatic 5 0.22016733
## 93 physical 5 0.22016733
## 94 stronger 5 0.22016733
## 95 relate 5 0.22016733
## 96 considerable 5 0.22016733
## 97 psycholinguistic 5 0.22016733
## 98 observ 5 0.22016733
## 99 accurate 5 0.22016733
## 100 irrelevant 5 0.22016733
## 101 sensitive 5 0.22016733
## 102 systematic 5 0.22016733
## 103 dependent 5 0.22016733
## 104 independent 5 0.22016733
## 105 functional 5 0.22016733
## 106 equivalent 5 0.22016733
## 107 prototypical 5 0.22016733
## 108 absolute 4 0.17613386
## 109 signifier 4 0.17613386
## 110 organize 4 0.17613386
## 111 concerned 4 0.17613386
## 112 standard 4 0.17613386
## 113 unmarked 4 0.17613386
## 114 simple 4 0.17613386
## 115 determine 4 0.17613386
## 116 lectal 4 0.17613386
## 117 extrinsic 4 0.17613386
## 118 figurative 4 0.17613386
## 119 illustrate 4 0.17613386
## 120 valid 4 0.17613386
## 121 empirical 4 0.17613386
## 122 selective 4 0.17613386
## 123 interrelate 4 0.17613386
## 124 saccadic 4 0.17613386
## 125 interactive 4 0.17613386
## 126 intentional 4 0.17613386
## 127 special 4 0.17613386
## 128 conventional 4 0.17613386
## 129 experience 4 0.17613386
## 130 signifiant 4 0.17613386
## 131 collective 4 0.17613386
## 132 equal 4 0.17613386
## 133 syllable 4 0.17613386
## 134 canonical 4 0.17613386
## 135 logical 4 0.17613386
## 136 transitive 4 0.17613386
## 137 key 4 0.17613386
## 138 predictable 4 0.17613386
## 139 statistical 4 0.17613386
## 140 sic 4 0.17613386
## 141 belgian 4 0.17613386
## 142 netherlandic 4 0.17613386
## 143 white 3 0.13210040
## 144 unified 3 0.13210040
## 145 fresh 3 0.13210040
## 146 regular 3 0.13210040
## 147 additional 3 0.13210040
## 148 aware 3 0.13210040
## 149 female 3 0.13210040
## 150 mere 3 0.13210040
## 151 perceive 3 0.13210040
## 152 faster 3 0.13210040
## 153 familiar 3 0.13210040
## 154 neurocognitive 3 0.13210040
## 155 obvious 3 0.13210040
## 156 integrative 3 0.13210040
## 157 robust 3 0.13210040
## 158 covert 3 0.13210040
## 159 neutral 3 0.13210040
## 160 predictive 3 0.13210040
## 161 larger 3 0.13210040
## 162 essential 3 0.13210040
## 163 traditional 3 0.13210040
## 164 flexible 3 0.13210040
## 165 holistic 3 0.13210040
## 166 multidimensional 3 0.13210040
## 167 biological 3 0.13210040
## 168 rich 3 0.13210040
## 169 constituent 3 0.13210040
## 170 uninformative 3 0.13210040
## 171 prime 3 0.13210040
## 172 informative 3 0.13210040
## 173 incremental 3 0.13210040
## 174 commercial 3 0.13210040
## 175 mutual 3 0.13210040
## 176 double 3 0.13210040
## 177 denotational 3 0.13210040
## 178 taxonomical 3 0.13210040
## 179 preponderant 3 0.13210040
## 180 unclear 2 0.08806693
## 181 diverse 2 0.08806693
## 182 acoustic 2 0.08806693
## 183 circular 2 0.08806693
## 184 black 2 0.08806693
## 185 evident 2 0.08806693
## 186 schematic 2 0.08806693
## 187 phonetic 2 0.08806693
## 188 prosodic 2 0.08806693
## 189 extralinguistic 2 0.08806693
## 190 classic 2 0.08806693
## 191 generic 2 0.08806693
## 192 oriented 2 0.08806693
## 193 noticeable 2 0.08806693
## 194 feature 2 0.08806693
## 195 infrequent 2 0.08806693
## 196 contrary 2 0.08806693
## 197 worth 2 0.08806693
## 198 personal 2 0.08806693
## 199 fast 2 0.08806693
## 200 peripheral 2 0.08806693
wordcloud2(data = adjectives_frequency, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)
content_words <- subset(annotated_data, upos %in% c("ADJ","VERB","NOUN","ADV"))
content_words_frequency <- txt_freq(content_words$lemma)
# remove salience from the list:
# adjectives_frequency <- adjectives_frequency[-1,]
head(content_words_frequency, n=200)
## key freq freq_pct
## 1 salience 493 4.0710157
## 2 linguistic 182 1.5028902
## 3 language 153 1.2634187
## 4 attention 152 1.2551610
## 5 salient 98 0.8092486
## 6 concept 83 0.6853840
## 7 cognitive 83 0.6853840
## 8 context 81 0.6688687
## 9 semantic 67 0.5532618
## 10 structure 67 0.5532618
## 11 form 66 0.5450041
## 12 visual 66 0.5450041
## 13 feature 65 0.5367465
## 14 term 61 0.5037159
## 15 lexical 60 0.4954583
## 16 focus 59 0.4872007
## 17 item 55 0.4541701
## 18 study 54 0.4459125
## 19 mean 52 0.4293972
## 20 cue 52 0.4293972
## 21 onomasiological 52 0.4293972
## 22 processe 49 0.4046243
## 23 category 48 0.3963666
## 24 referent 48 0.3963666
## 25 perceptual 47 0.3881090
## 26 relation 47 0.3881090
## 27 expectation 46 0.3798514
## 28 source 46 0.3798514
## 29 frequency 45 0.3715937
## 30 question 45 0.3715937
## 31 base 44 0.3633361
## 32 type 43 0.3550784
## 33 effect 43 0.3550784
## 34 involve 42 0.3468208
## 35 specific 42 0.3468208
## 36 speaker 41 0.3385632
## 37 dimension 41 0.3385632
## 38 situation 41 0.3385632
## 39 sentence 41 0.3385632
## 40 meaning 39 0.3220479
## 41 define 39 0.3220479
## 42 role 39 0.3220479
## 43 property 38 0.3137903
## 44 approach 38 0.3137903
## 45 attentional 37 0.3055326
## 46 event 37 0.3055326
## 47 definition 36 0.2972750
## 48 word 36 0.2972750
## 49 surprisal 36 0.2972750
## 50 hand 36 0.2972750
## 51 memory 35 0.2890173
## 52 social 35 0.2890173
## 53 choice 35 0.2890173
## 54 figure 34 0.2807597
## 55 target 33 0.2725021
## 56 knowledge 33 0.2725021
## 57 structural 33 0.2725021
## 58 refer 32 0.2642444
## 59 factor 32 0.2642444
## 60 object 32 0.2642444
## 61 active 32 0.2642444
## 62 individual 31 0.2559868
## 63 perception 31 0.2559868
## 64 passive 31 0.2559868
## 65 level 30 0.2477291
## 66 major 30 0.2477291
## 67 perspective 29 0.2394715
## 68 experience 29 0.2394715
## 69 instance 29 0.2394715
## 70 prim 29 0.2394715
## 71 sociolinguistic 28 0.2312139
## 72 syntactic 28 0.2312139
## 73 express 28 0.2312139
## 74 time 28 0.2312139
## 75 mechanism 28 0.2312139
## 76 account 27 0.2229562
## 77 theory 27 0.2229562
## 78 processis 27 0.2229562
## 79 include 26 0.2146986
## 80 variation 26 0.2146986
## 81 finding 26 0.2146986
## 82 discipline 25 0.2064410
## 83 phenomena 25 0.2064410
## 84 difference 25 0.2064410
## 85 scene 24 0.1981833
## 86 subject 24 0.1981833
## 87 represent 24 0.1981833
## 88 action 24 0.1981833
## 89 current 24 0.1981833
## 90 drive 24 0.1981833
## 91 field 23 0.1899257
## 92 direct 23 0.1899257
## 93 relative 23 0.1899257
## 94 distinction 23 0.1899257
## 95 influence 23 0.1899257
## 96 view 23 0.1899257
## 97 english 21 0.1734104
## 98 relevant 21 0.1734104
## 99 issue 21 0.1734104
## 100 function 21 0.1734104
## 101 participant 21 0.1734104
## 102 learn 21 0.1734104
## 103 occur 21 0.1734104
## 104 position 21 0.1734104
## 105 entrenchment 21 0.1734104
## 106 expression 21 0.1734104
## 107 sentences 21 0.1734104
## 108 consider 20 0.1651528
## 109 psychological 20 0.1651528
## 110 relationship 20 0.1651528
## 111 tomlin 20 0.1651528
## 112 selection 20 0.1651528
## 113 investigate 20 0.1651528
## 114 chapter 20 0.1651528
## 115 name 20 0.1651528
## 116 identify 19 0.1568951
## 117 understand 19 0.1568951
## 118 linguist 19 0.1568951
## 119 discourse 19 0.1568951
## 120 range 19 0.1568951
## 121 signal 19 0.1568951
## 122 eye 19 0.1568951
## 123 movement 19 0.1568951
## 124 space 19 0.1568951
## 125 external 18 0.1486375
## 126 input 18 0.1486375
## 127 alternative 18 0.1486375
## 128 mental 18 0.1486375
## 129 highly 18 0.1486375
## 130 human 18 0.1486375
## 131 semasiological 18 0.1486375
## 132 review 17 0.1403799
## 133 discuss 17 0.1403799
## 134 aspect 17 0.1403799
## 135 domain 17 0.1403799
## 136 central 17 0.1403799
## 137 stand 17 0.1403799
## 138 cueing 17 0.1403799
## 139 task 17 0.1403799
## 140 model 17 0.1403799
## 141 representation 17 0.1403799
## 142 sensory 17 0.1403799
## 143 constitute 17 0.1403799
## 144 observer 17 0.1403799
## 145 evidence 17 0.1403799
## 146 distractor 17 0.1403799
## 147 geeraert 17 0.1403799
## 148 beer 17 0.1403799
## 149 landmark 17 0.1403799
## 150 overview 16 0.1321222
## 151 trigger 16 0.1321222
## 152 search 16 0.1321222
## 153 compare 16 0.1321222
## 154 variant 16 0.1321222
## 155 paradigm 16 0.1321222
## 156 probability 16 0.1321222
## 157 mention 16 0.1321222
## 158 analysis 16 0.1321222
## 159 investigation 16 0.1321222
## 160 condition 16 0.1321222
## 161 set 16 0.1321222
## 162 induce 16 0.1321222
## 163 picture 16 0.1321222
## 164 referential 16 0.1321222
## 165 production 16 0.1321222
## 166 provide 15 0.1238646
## 167 interaction 15 0.1238646
## 168 propose 15 0.1238646
## 169 giora 15 0.1238646
## 170 variable 15 0.1238646
## 171 change 15 0.1238646
## 172 voice 15 0.1238646
## 173 entity 15 0.1238646
## 174 constructions 15 0.1238646
## 175 tend 15 0.1238646
## 176 agent 15 0.1238646
## 177 snow 15 0.1238646
## 178 schmid 14 0.1156069
## 179 start 14 0.1156069
## 180 expressions 14 0.1156069
## 181 stimuli 14 0.1156069
## 182 contextual 14 0.1156069
## 183 activate 14 0.1156069
## 184 future 14 0.1156069
## 185 activation 14 0.1156069
## 186 characteristic 14 0.1156069
## 187 sense 14 0.1156069
## 188 interact 14 0.1156069
## 189 grammatical 14 0.1156069
## 190 perspectival 14 0.1156069
## 191 surprise 13 0.1073493
## 192 address 13 0.1073493
## 193 günther 13 0.1073493
## 194 depend 13 0.1073493
## 195 speak 13 0.1073493
## 196 psychology 13 0.1073493
## 197 goal 13 0.1073493
## 198 expect 13 0.1073493
## 199 müller 13 0.1073493
## 200 pattern 13 0.1073493
wordcloud2(data = content_words_frequency, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)