Load data

library(tidyverse)
keywords <- read_tsv("weighted_keywords_clean.tsv")
keywords_academic <- read_tsv("weighted_keywords_academic_clean.tsv")
head(keywords, n = 200)
## # A tibble: 200 × 7
##    Term            Focus rel. freq. (S…¹ Refer…² Simpl…³ Log l…⁴ % dif…⁵ Log r…⁶
##    <chr>                           <dbl>   <dbl>   <dbl>   <dbl> <chr>     <dbl>
##  1 salience                       15914.    2.61   156.    8647. 610,30…   12.6 
##  2 linguistic                      4108.   10.8     38.0  13854. 38,111…    8.58
##  3 salient                         3210.    3.71    31.9   3863. 86,422…    9.76
##  4 attention                       4971.  119.      23.1 159395. 4,071.…    5.38
##  5 language                        4280.  110.      20.9 193479. 3,799.…    5.29
##  6 lexical                         2037.    5.32    20.3   4477  38,172…    8.58
##  7 cognitive                       2623.   35.1     20.2    653. 7,373.…    6.22
##  8 bottom-up                       1899.    1.9     19.6    732. 99,614…    9.96
##  9 onomasiological                 1795.    0       19.0    850. Infini…   47.4 
## 10 e.g                             1795.    0.5     18.9    879. 360,36…   11.8 
## # … with 190 more rows, and abbreviated variable names
## #   ¹​`Focus rel. freq. (Salience clean: whole corpus)`,
## #   ²​`Reference rel. freq. (BNC2014: whole corpus)`, ³​`Simple maths`,
## #   ⁴​`Log likelihood`, ⁵​`% difference`, ⁶​`Log ratio`
head(keywords_academic, n = 200)
## # A tibble: 200 × 7
##    Term            Focus rel. freq. (S…¹ Refer…² Simpl…³ Log l…⁴ % dif…⁵ Log r…⁶
##    <chr>                           <dbl>   <dbl>   <dbl>   <dbl> <chr>     <dbl>
##  1 salience                       15914.   13.3    141.    6294. 119,73…   10.2 
##  2 salient                         3210.   16.3     28.5    935. 19,570…    7.62
##  3 linguistic                      4108.   48.2     28.4   1030. 8,422.…    6.41
##  4 onomasiological                 1795.    0       19.0    679. Infini…   47.4 
##  5 e.g                             1795.    0.96    18.8    596. 186,29…   10.9 
##  6 bottom-up                       1899.    7.45    18.6    575. 25,381…    7.99
##  7 linguistics                     1795.    5.58    18.0    560. 32,095…    8.33
##  8 lexical                         2037.   27.1     16.8    497. 7,410.…    6.23
##  9 attention                       4971.  228.      15.5  38289. 2,080.…    4.45
## 10 top-down                        1553.   12.0     14.8   1041. 12,831…    7.01
## # … with 190 more rows, and abbreviated variable names
## #   ¹​`Focus rel. freq. (Salience clean: whole corpus)`,
## #   ²​`Reference rel. freq. (BNC2014: academic prose)`, ³​`Simple maths`,
## #   ⁴​`Log likelihood`, ⁵​`% difference`, ⁶​`Log ratio`

Produce a wordcloud

library(RColorBrewer)
library(wordcloud2)
# Whole BNC2014 Corpus:
keyword_list <- keywords %>% select(Term,`Simple maths`)
wordcloud2(data = keyword_list, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)
# BNC2014 Academic prose:
keyword_list <- keywords_academic %>% select(Term,`Simple maths`)
wordcloud2(data = keyword_list, color = brewer.pal(8, "Dark2"), gridSize = 10, minSize = 6, size = 2)