library(udpipe)
# download the model if need be:
# udpipe_download_model(language = "english")
udmodel_english = udpipe_load_model(file = "C:/Users/Dave/Documents/english-ewt-ud-2.5-191206.udpipe")
library(readxl)
salience_texts <- read_excel("salience_texts.xlsx")
Clean_String <- function(string){
# Lowercase
temp <- tolower(string)
# Remove everything that is not a number or letter (may want to keep more
# stuff in your actual analyses).
# temp <- stringr::str_replace_all(temp,"[^a-zA-Z\\s]", " ")
# Shrink down to just one white space
# temp <- stringr::str_replace_all(temp,"[\\s]+", " ")
# Split it
# temp <- stringr::str_split(temp, " ")[[1]]
# Get rid of trailing "" if necessary
#indexes <- which(temp == "")
#if(length(indexes) > 0){
# temp <- temp[-indexes]
#}
return(temp)
}
for (row in 1:nrow(salience_texts)) {
salience_texts$Content[row] <- Clean_String(salience_texts$Content[row])
}
annotated_data = data.frame(udpipe_annotate(udmodel_english, salience_texts$Content))
nouns <- subset(annotated_data, upos %in% c("NOUN"))
nouns_frequency <- txt_freq(nouns$lemma)
# remove salience from the list:
# nouns_frequency <- nouns_frequency[-1,]
head(nouns_frequency, n=200)
## key freq freq_pct
## 1 salience 487 5.7273903
## 2 language 153 1.7993649
## 3 attention 150 1.7640833
## 4 concept 78 0.9173233
## 5 context 76 0.8938022
## 6 research 73 0.8585205
## 7 word 68 0.7997177
## 8 meaning 66 0.7761966
## 9 effect 63 0.7409150
## 10 term 61 0.7173939
## 11 sentence 61 0.7173939
## 12 structure 60 0.7056333
## 13 feature 59 0.6938728
## 14 information 58 0.6821122
## 15 form 58 0.6821122
## 16 item 55 0.6468305
## 17 linguistic 54 0.6350700
## 18 example 52 0.6115489
## 19 up 49 0.5762672
## 20 category 48 0.5645066
## 21 study 46 0.5409855
## 22 expectation 46 0.5409855
## 23 source 46 0.5409855
## 24 relation 46 0.5409855
## 25 way 45 0.5292250
## 26 bottom 45 0.5292250
## 27 question 44 0.5174644
## 28 cue 44 0.5174644
## 29 level 43 0.5057039
## 30 frequency 42 0.4939433
## 31 type 42 0.4939433
## 32 case 41 0.4821828
## 33 situation 41 0.4821828
## 34 dimension 40 0.4704222
## 35 role 39 0.4586616
## 36 referent 39 0.4586616
## 37 property 38 0.4469011
## 38 speaker 38 0.4469011
## 39 event 37 0.4351405
## 40 hand 36 0.4233800
## 41 process 36 0.4233800
## 42 salient 35 0.4116194
## 43 memory 35 0.4116194
## 44 down 35 0.4116194
## 45 expression 35 0.4116194
## 46 choice 35 0.4116194
## 47 definition 32 0.3763378
## 48 use 32 0.3763378
## 49 factor 32 0.3763378
## 50 perception 31 0.3645772
## 51 target 31 0.3645772
## 52 point 30 0.3528167
## 53 instance 29 0.3410561
## 54 knowledge 29 0.3410561
## 55 model 28 0.3292955
## 56 line 28 0.3292955
## 57 name 28 0.3292955
## 58 section 27 0.3175350
## 59 surprisal 27 0.3175350
## 60 time 27 0.3175350
## 61 focus 27 0.3175350
## 62 finding 27 0.3175350
## 63 perspective 26 0.3057744
## 64 variation 26 0.3057744
## 65 mechanism 26 0.3057744
## 66 view 26 0.3057744
## 67 figure 25 0.2940139
## 68 processing 25 0.2940139
## 69 difference 25 0.2940139
## 70 phenomena 24 0.2822533
## 71 scene 24 0.2822533
## 72 experience 24 0.2822533
## 73 approach 24 0.2822533
## 74 action 24 0.2822533
## 75 theory 23 0.2704928
## 76 object 23 0.2704928
## 77 one 22 0.2587322
## 78 account 22 0.2587322
## 79 distinction 22 0.2587322
## 80 eye 22 0.2587322
## 81 field 21 0.2469717
## 82 result 21 0.2469717
## 83 participant 21 0.2469717
## 84 myachykov 21 0.2469717
## 85 stimulus 21 0.2469717
## 86 chapter 21 0.2469717
## 87 issue 20 0.2352111
## 88 part 20 0.2352111
## 89 input 20 0.2352111
## 90 entrenchment 20 0.2352111
## 91 other 19 0.2234505
## 92 relationship 19 0.2234505
## 93 discourse 19 0.2234505
## 94 tomlin 19 0.2234505
## 95 selection 19 0.2234505
## 96 position 19 0.2234505
## 97 range 19 0.2234505
## 98 movement 19 0.2234505
## 99 space 19 0.2234505
## 100 function 18 0.2116900
## 101 task 18 0.2116900
## 102 semantic 17 0.1999294
## 103 rácz 17 0.1999294
## 104 system 17 0.1999294
## 105 representation 17 0.1999294
## 106 construction 17 0.1999294
## 107 geeraert 17 0.1999294
## 108 value 17 0.1999294
## 109 beer 17 0.1999294
## 110 landmark 17 0.1999294
## 111 overview 16 0.1881689
## 112 search 16 0.1881689
## 113 aspect 16 0.1881689
## 114 sense 16 0.1881689
## 115 paradigm 16 0.1881689
## 116 top 16 0.1881689
## 117 probability 16 0.1881689
## 118 analysis 16 0.1881689
## 119 investigation 16 0.1881689
## 120 evidence 16 0.1881689
## 121 picture 16 0.1881689
## 122 production 16 0.1881689
## 123 interaction 15 0.1764083
## 124 review 15 0.1764083
## 125 variant 15 0.1764083
## 126 cueing 15 0.1764083
## 127 voice 15 0.1764083
## 128 entity 15 0.1764083
## 129 number 15 0.1764083
## 130 set 15 0.1764083
## 131 distractor 15 0.1764083
## 132 kind 15 0.1764083
## 133 notion 15 0.1764083
## 134 agent 15 0.1764083
## 135 order 14 0.1646478
## 136 sociolinguistic 14 0.1646478
## 137 text 14 0.1646478
## 138 giora 14 0.1646478
## 139 linguist 14 0.1646478
## 140 change 14 0.1646478
## 141 fact 14 0.1646478
## 142 condition 14 0.1646478
## 143 activation 14 0.1646478
## 144 signal 14 0.1646478
## 145 perspectival 14 0.1646478
## 146 günther 13 0.1528872
## 147 domain 13 0.1528872
## 148 psychology 13 0.1528872
## 149 influence 13 0.1528872
## 150 müller 13 0.1528872
## 151 reality 13 0.1528872
## 152 element 13 0.1528872
## 153 application 13 0.1528872
## 154 snow 13 0.1528872
## 155 surprise 12 0.1411267
## 156 sub-discipline 12 0.1411267
## 157 schmid 12 0.1411267
## 158 individual 12 0.1411267
## 159 work 12 0.1411267
## 160 basis 12 0.1411267
## 161 cognition 12 0.1411267
## 162 stimuli 12 0.1411267
## 163 goal 12 0.1411267
## 164 discussion 12 0.1411267
## 165 degree 12 0.1411267
## 166 response 12 0.1411267
## 167 idea 12 0.1411267
## 168 location 12 0.1411267
## 169 behavior 12 0.1411267
## 170 operationalization 11 0.1293661
## 171 william 11 0.1293661
## 172 english 11 0.1293661
## 173 speech 11 0.1293661
## 174 subset 11 0.1293661
## 175 observer 11 0.1293661
## 176 user 11 0.1293661
## 177 grammar 11 0.1293661
## 178 patient 11 0.1293661
## 179 surface 11 0.1293661
## 180 trajector 11 0.1293661
## 181 discipline 10 0.1176056
## 182 kerswill 10 0.1176056
## 183 markedness 10 0.1176056
## 184 phenomenon 10 0.1176056
## 185 shift 10 0.1176056
## 186 alternative 10 0.1176056
## 187 approaches 10 0.1176056
## 188 table 10 0.1176056
## 189 trial 10 0.1176056
## 190 prediction 10 0.1176056
## 191 usage 10 0.1176056
## 192 langacker 10 0.1176056
## 193 pattern 10 0.1176056
## 194 prototypicality 10 0.1176056
## 195 interact 10 0.1176056
## 196 member 10 0.1176056
## 197 subject 10 0.1176056
## 198 regard 10 0.1176056
## 199 priming 10 0.1176056
## 200 combination 10 0.1176056
library(RColorBrewer)
library(wordcloud)
wordcloud(words = nouns_frequency$key, freq = nouns_frequency$freq, max.words = 150, random.color = TRUE, color = brewer.pal(8, "Dark2"))
library(wordcloud2)
wordcloud2(data = nouns_frequency, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)
adjectives <- subset(annotated_data, upos %in% c("ADJ"))
adjectives_frequency <- txt_freq(adjectives$lemma)
# remove salience from the list:
# adjectives_frequency <- adjectives_frequency[-1,]
head(adjectives_frequency, n=200)
## key freq freq_pct
## 1 linguistic 119 3.64248546
## 2 other 86 2.63238445
## 3 different 83 2.54055709
## 4 cognitive 79 2.41812060
## 5 such 72 2.20385675
## 6 visual 66 2.02020202
## 7 particular 61 1.86715641
## 8 lexical 60 1.83654729
## 9 onomasiological 52 1.59167432
## 10 semantic 50 1.53045608
## 11 perceptual 45 1.37741047
## 12 salient 41 1.25497398
## 13 specific 38 1.16314662
## 14 attentional 35 1.07131925
## 15 social 34 1.04071013
## 16 structural 33 1.01010101
## 17 same 32 0.97949189
## 18 active 32 0.97949189
## 19 major 30 0.91827365
## 20 passive 29 0.88766452
## 21 syntactic 28 0.85705540
## 22 top 25 0.76522804
## 23 general 25 0.76522804
## 24 many 24 0.73461892
## 25 high 23 0.70400979
## 26 likely 23 0.70400979
## 27 current 23 0.70400979
## 28 possible 22 0.67340067
## 29 related 21 0.64279155
## 30 relevant 21 0.64279155
## 31 more 19 0.58157331
## 32 individual 19 0.58157331
## 33 psychological 18 0.55096419
## 34 external 18 0.55096419
## 35 mental 18 0.55096419
## 36 various 18 0.55096419
## 37 semasiological 18 0.55096419
## 38 first 17 0.52035507
## 39 central 17 0.52035507
## 40 sensory 17 0.52035507
## 41 clear 15 0.45913682
## 42 sociolinguistic 14 0.42852770
## 43 important 14 0.42852770
## 44 grammatical 14 0.42852770
## 45 referential 14 0.42852770
## 46 certain 13 0.39791858
## 47 conceptual 13 0.39791858
## 48 human 12 0.36730946
## 49 similar 12 0.36730946
## 50 contextual 12 0.36730946
## 51 bottom 12 0.36730946
## 52 lexicological 12 0.36730946
## 53 syntagmatic 12 0.36730946
## 54 situational 11 0.33670034
## 55 much 11 0.33670034
## 56 whole 11 0.33670034
## 57 typical 11 0.33670034
## 58 terminological 10 0.30609122
## 59 low 10 0.30609122
## 60 initial 10 0.30609122
## 61 subject 10 0.30609122
## 62 higher 10 0.30609122
## 63 actual 10 0.30609122
## 64 future 10 0.30609122
## 65 available 10 0.30609122
## 66 formal 10 0.30609122
## 67 communicative 10 0.30609122
## 68 basic 10 0.30609122
## 69 single 9 0.27548209
## 70 long 9 0.27548209
## 71 intrinsic 9 0.27548209
## 72 multiple 9 0.27548209
## 73 pragmatic 9 0.27548209
## 74 surprisal 9 0.27548209
## 75 frequent 9 0.27548209
## 76 useful 9 0.27548209
## 77 crucial 9 0.27548209
## 78 nonlinguistic 9 0.27548209
## 79 previous 9 0.27548209
## 80 potential 9 0.27548209
## 81 complex 9 0.27548209
## 82 prominent 9 0.27548209
## 83 present 9 0.27548209
## 84 literal 8 0.24487297
## 85 latter 8 0.24487297
## 86 phonological 8 0.24487297
## 87 recent 8 0.24487297
## 88 accessible 8 0.24487297
## 89 second 8 0.24487297
## 90 alternative 8 0.24487297
## 91 local 8 0.24487297
## 92 real 8 0.24487297
## 93 distinctive 8 0.24487297
## 94 common 7 0.21426385
## 95 internal 7 0.21426385
## 96 implicit 7 0.21426385
## 97 theoretical 7 0.21426385
## 98 complete 7 0.21426385
## 99 prior 7 0.21426385
## 100 joint 7 0.21426385
## 101 own 7 0.21426385
## 102 short 7 0.21426385
## 103 overall 7 0.21426385
## 104 global 7 0.21426385
## 105 less 7 0.21426385
## 106 referent 7 0.21426385
## 107 english 7 0.21426385
## 108 next 6 0.18365473
## 109 scientific 6 0.18365473
## 110 variable 6 0.18365473
## 111 involve 6 0.18365473
## 112 direct 6 0.18365473
## 113 down 6 0.18365473
## 114 easier 6 0.18365473
## 115 main 6 0.18365473
## 116 several 6 0.18365473
## 117 interdisciplinary 6 0.18365473
## 118 experimental 6 0.18365473
## 119 inherent 6 0.18365473
## 120 strong 6 0.18365473
## 121 spatial 6 0.18365473
## 122 most 6 0.18365473
## 123 semiotic 6 0.18365473
## 124 russian 6 0.18365473
## 125 variational 6 0.18365473
## 126 natural 5 0.15304561
## 127 exogenous 5 0.15304561
## 128 endogenous 5 0.15304561
## 129 narrow 5 0.15304561
## 130 physical 5 0.15304561
## 131 stronger 5 0.15304561
## 132 further 5 0.15304561
## 133 necessary 5 0.15304561
## 134 third 5 0.15304561
## 135 considerable 5 0.15304561
## 136 psycholinguistic 5 0.15304561
## 137 accurate 5 0.15304561
## 138 irrelevant 5 0.15304561
## 139 sensitive 5 0.15304561
## 140 characteristic 5 0.15304561
## 141 smaller 5 0.15304561
## 142 relative 5 0.15304561
## 143 systematic 5 0.15304561
## 144 functional 5 0.15304561
## 145 absolute 4 0.12243649
## 146 concerned 4 0.12243649
## 147 standard 4 0.12243649
## 148 unmarked 4 0.12243649
## 149 simple 4 0.12243649
## 150 lectal 4 0.12243649
## 151 unexpected 4 0.12243649
## 152 surprising 4 0.12243649
## 153 paradigmatic 4 0.12243649
## 154 extrinsic 4 0.12243649
## 155 figurative 4 0.12243649
## 156 valid 4 0.12243649
## 157 empirical 4 0.12243649
## 158 selective 4 0.12243649
## 159 gaze 4 0.12243649
## 160 intentional 4 0.12243649
## 161 like 4 0.12243649
## 162 special 4 0.12243649
## 163 conventional 4 0.12243649
## 164 good 4 0.12243649
## 165 construal 4 0.12243649
## 166 equivalent 4 0.12243649
## 167 interested 4 0.12243649
## 168 equal 4 0.12243649
## 169 canonical 4 0.12243649
## 170 logical 4 0.12243649
## 171 transitive 4 0.12243649
## 172 finnish 4 0.12243649
## 173 key 4 0.12243649
## 174 predictable 4 0.12243649
## 175 distinguished 4 0.12243649
## 176 statistical 4 0.12243649
## 177 sic 4 0.12243649
## 178 prototypical 4 0.12243649
## 179 belgian 4 0.12243649
## 180 netherlandic 4 0.12243649
## 181 feature 3 0.09182736
## 182 sure 3 0.09182736
## 183 white 3 0.09182736
## 184 non-salient 3 0.09182736
## 185 unified 3 0.09182736
## 186 explicit 3 0.09182736
## 187 fresh 3 0.09182736
## 188 regular 3 0.09182736
## 189 additional 3 0.09182736
## 190 aware 3 0.09182736
## 191 explanatory 3 0.09182736
## 192 large 3 0.09182736
## 193 female 3 0.09182736
## 194 mere 3 0.09182736
## 195 medium 3 0.09182736
## 196 auditory 3 0.09182736
## 197 familiar 3 0.09182736
## 198 limited 3 0.09182736
## 199 few 3 0.09182736
## 200 neurocognitive 3 0.09182736
wordcloud2(data = adjectives_frequency, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)
content_words <- subset(annotated_data, upos %in% c("ADJ","VERB","NOUN","ADV"))
content_words_frequency <- txt_freq(content_words$lemma)
# remove salience from the list:
# adjectives_frequency <- adjectives_frequency[-1,]
head(content_words_frequency, n=200)
## key freq freq_pct
## 1 salience 492 3.0084383
## 2 linguistic 173 1.0578452
## 3 language 153 0.9355509
## 4 attention 151 0.9233215
## 5 more 132 0.8071420
## 6 other 105 0.6420448
## 7 use 101 0.6175859
## 8 salient 94 0.5747829
## 9 be 93 0.5686682
## 10 different 83 0.5075211
## 11 cognitive 82 0.5014064
## 12 have 79 0.4830622
## 13 concept 78 0.4769475
## 14 how 78 0.4769475
## 15 context 76 0.4647181
## 16 research 75 0.4586034
## 17 such 72 0.4402593
## 18 see 69 0.4219151
## 19 meaning 68 0.4158004
## 20 word 68 0.4158004
## 21 semantic 67 0.4096857
## 22 also 66 0.4035710
## 23 structure 66 0.4035710
## 24 form 66 0.4035710
## 25 visual 66 0.4035710
## 26 when 64 0.3913416
## 27 feature 63 0.3852269
## 28 effect 63 0.3852269
## 29 term 61 0.3729974
## 30 particular 61 0.3729974
## 31 sentence 61 0.3729974
## 32 lexical 60 0.3668827
## 33 information 58 0.3546533
## 34 bottom 57 0.3485386
## 35 item 55 0.3363092
## 36 study 54 0.3301944
## 37 process 54 0.3301944
## 38 focus 53 0.3240797
## 39 up 53 0.3240797
## 40 example 52 0.3179650
## 41 onomasiological 52 0.3179650
## 42 cue 50 0.3057356
## 43 category 48 0.2935062
## 44 referent 48 0.2935062
## 45 perceptual 47 0.2873915
## 46 expectation 46 0.2812768
## 47 source 46 0.2812768
## 48 relation 46 0.2812768
## 49 frequency 45 0.2751620
## 50 way 45 0.2751620
## 51 question 44 0.2690473
## 52 base 44 0.2690473
## 53 down 43 0.2629326
## 54 level 43 0.2629326
## 55 type 43 0.2629326
## 56 give 42 0.2568179
## 57 make 42 0.2568179
## 58 case 41 0.2507032
## 59 top 41 0.2507032
## 60 indicate 41 0.2507032
## 61 situation 41 0.2507032
## 62 consider 40 0.2445885
## 63 define 40 0.2445885
## 64 dimension 40 0.2445885
## 65 speaker 39 0.2384738
## 66 role 39 0.2384738
## 67 property 38 0.2323591
## 68 follow 38 0.2323591
## 69 point 38 0.2323591
## 70 specific 38 0.2323591
## 71 first 37 0.2262443
## 72 however 37 0.2262443
## 73 event 37 0.2262443
## 74 name 37 0.2262443
## 75 provide 36 0.2201296
## 76 involve 36 0.2201296
## 77 surprisal 36 0.2201296
## 78 hand 36 0.2201296
## 79 attentional 36 0.2201296
## 80 become 36 0.2201296
## 81 memory 35 0.2140149
## 82 expression 35 0.2140149
## 83 choice 35 0.2140149
## 84 work 34 0.2079002
## 85 social 34 0.2079002
## 86 figure 34 0.2079002
## 87 target 33 0.2017855
## 88 knowledge 33 0.2017855
## 89 structural 33 0.2017855
## 90 definition 32 0.1956708
## 91 refer 32 0.1956708
## 92 thus 32 0.1956708
## 93 factor 32 0.1956708
## 94 take 32 0.1956708
## 95 same 32 0.1956708
## 96 active 32 0.1956708
## 97 individual 31 0.1895561
## 98 model 31 0.1895561
## 99 perception 31 0.1895561
## 100 passive 31 0.1895561
## 101 likely 30 0.1834414
## 102 major 30 0.1834414
## 103 object 30 0.1834414
## 104 find 29 0.1773266
## 105 instance 29 0.1773266
## 106 present 29 0.1773266
## 107 view 29 0.1773266
## 108 sociolinguistic 28 0.1712119
## 109 account 28 0.1712119
## 110 processing 28 0.1712119
## 111 only 28 0.1712119
## 112 syntactic 28 0.1712119
## 113 most 28 0.1712119
## 114 approach 28 0.1712119
## 115 line 28 0.1712119
## 116 section 27 0.1650972
## 117 mean 27 0.1650972
## 118 experience 27 0.1650972
## 119 time 27 0.1650972
## 120 finding 27 0.1650972
## 121 even 26 0.1589825
## 122 result 26 0.1589825
## 123 perspective 26 0.1589825
## 124 variation 26 0.1589825
## 125 express 26 0.1589825
## 126 mechanism 26 0.1589825
## 127 just 25 0.1528678
## 128 include 25 0.1528678
## 129 difference 25 0.1528678
## 130 general 25 0.1528678
## 131 stimulus 25 0.1528678
## 132 play 25 0.1528678
## 133 phenomena 24 0.1467531
## 134 describe 24 0.1467531
## 135 many 24 0.1467531
## 136 scene 24 0.1467531
## 137 represent 24 0.1467531
## 138 action 24 0.1467531
## 139 current 24 0.1467531
## 140 high 23 0.1406384
## 141 theory 23 0.1406384
## 142 relative 23 0.1406384
## 143 as 23 0.1406384
## 144 possible 22 0.1345237
## 145 field 22 0.1345237
## 146 one 22 0.1345237
## 147 then 22 0.1345237
## 148 distinction 22 0.1345237
## 149 influence 22 0.1345237
## 150 eye 22 0.1345237
## 151 related 21 0.1284090
## 152 direct 21 0.1284090
## 153 relevant 21 0.1284090
## 154 issue 21 0.1284090
## 155 function 21 0.1284090
## 156 participant 21 0.1284090
## 157 myachykov 21 0.1284090
## 158 input 21 0.1284090
## 159 subject 21 0.1284090
## 160 position 21 0.1284090
## 161 so 21 0.1284090
## 162 chapter 21 0.1284090
## 163 entrenchment 21 0.1284090
## 164 suggest 20 0.1222942
## 165 change 20 0.1222942
## 166 part 20 0.1222942
## 167 occur 20 0.1222942
## 168 investigate 20 0.1222942
## 169 range 20 0.1222942
## 170 rather 20 0.1222942
## 171 second 19 0.1161795
## 172 english 19 0.1161795
## 173 relationship 19 0.1161795
## 174 where 19 0.1161795
## 175 discourse 19 0.1161795
## 176 tomlin 19 0.1161795
## 177 selection 19 0.1161795
## 178 here 19 0.1161795
## 179 signal 19 0.1161795
## 180 movement 19 0.1161795
## 181 space 19 0.1161795
## 182 identify 18 0.1100648
## 183 psychological 18 0.1100648
## 184 external 18 0.1100648
## 185 task 18 0.1100648
## 186 alternative 18 0.1100648
## 187 mental 18 0.1100648
## 188 highly 18 0.1100648
## 189 human 18 0.1100648
## 190 various 18 0.1100648
## 191 tend 18 0.1100648
## 192 semasiological 18 0.1100648
## 193 review 17 0.1039501
## 194 discuss 17 0.1039501
## 195 rácz 17 0.1039501
## 196 aspect 17 0.1039501
## 197 central 17 0.1039501
## 198 seem 17 0.1039501
## 199 system 17 0.1039501
## 200 stand 17 0.1039501
wordcloud2(data = content_words_frequency, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)