We’ll now turn to a different type of Twitter data – static data, either recent tweets or user-level information. This type of data can be retrieved with Twitter’s REST API. We will use the tweetscores package here – this is a package that I created to facilitate the collection and analysis of Twitter data.

Searching recent tweets

It is possible to download recent tweets, but only up those less than 7 days old, and in some cases not all of them.

load("~/my_oauth")
library(tweetscores)
## Loading required package: R2WinBUGS
## Loading required package: coda
## Loading required package: boot
## ##
## ## tweetscores: tools for the analysis of Twitter data
## ## Pablo Barbera (LSE)
## ## www.tweetscores.com
## ##
library(streamR)
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: rjson
## Warning: package 'rjson' was built under R version 3.4.4
## Loading required package: ndjson
## Warning: package 'ndjson' was built under R version 3.4.4
searchTweets(q=c("kennedy", "supreme court"), 
  filename="../data/kennedy-tweets.json",
  n=1000, until="2018-07-01", 
  oauth=my_oauth)
## 100 tweets. Max id: 1013210573645938688
## 148 hits left
## 200 tweets. Max id: 1013210313162883072
## 147 hits left
## 300 tweets. Max id: 1013210021675577344
## 146 hits left
## 400 tweets. Max id: 1013209744461348864
## 145 hits left
## 500 tweets. Max id: 1013209510113107968
## 144 hits left
## 600 tweets. Max id: 1013209268152135680
## 143 hits left
## 700 tweets. Max id: 1013209045967212544
## 142 hits left
## 800 tweets. Max id: 1013208803217567744
## 141 hits left
## 900 tweets. Max id: 1013208558740140032
## 140 hits left
## 1000 tweets. Max id: 1013208361247100928
tweets <- parseTweets("../data/kennedy-tweets.json")
## 1000 tweets have been parsed.

What are the most popular hashtags?

library(stringr)
ht <- str_extract_all(tweets$text, "#(\\d|\\w)+")
ht <- unlist(ht)
head(sort(table(ht), decreasing = TRUE))
## ht
##         #MN08 #Election2018    #Minnesota        #Trump    #MNprimary 
##            21            19            19            14            13 
##        #MNpol 
##            10

You can check the documentation about the options for string search here.

Extracting users’ profile information

This is how you would extract information from user profiles:

wh <- c("realDonaldTrump", "POTUS", "VP", "FLOTUS")
users <- getUsersBatch(screen_names=wh,
                       oauth=my_oauth)
## 1--4 users left
str(users)
## 'data.frame':    4 obs. of  9 variables:
##  $ id_str         : chr  "818910970567344128" "25073877" "822215679726100480" "818876014390603776"
##  $ screen_name    : chr  "VP" "realDonaldTrump" "POTUS" "FLOTUS"
##  $ name           : chr  "Vice President Mike Pence" "Donald J. Trump" "President Trump" "Melania Trump"
##  $ description    : chr  "Vice President Mike Pence. Husband, father, & honored to serve as the 48th Vice President of the United States."| __truncated__ "45th President of the United States of America\U0001f1fa\U0001f1f8" "45th President of the United States of America, @realDonaldTrump. Tweets archived: https://t.co/eVVzoBb3Zr" "This account is run by the Office of First Lady Melania Trump. Tweets may be archived. More at https://t.co/eVVzoBb3Zr"
##  $ followers_count: int  6384634 53219516 23538114 10679230
##  $ statuses_count : int  4380 38100 3349 327
##  $ friends_count  : int  11 47 39 6
##  $ created_at     : chr  "Tue Jan 10 20:02:44 +0000 2017" "Wed Mar 18 13:46:38 +0000 2009" "Thu Jan 19 22:54:28 +0000 2017" "Tue Jan 10 17:43:50 +0000 2017"
##  $ location       : chr  "Washington, D.C." "Washington, DC" "Washington, D.C." "Washington, D.C."

Which of these has the most followers?

users[which.max(users$followers_count),]
##     id_str     screen_name            name
## 2 25073877 realDonaldTrump Donald J. Trump
##                                                          description
## 2 45th President of the United States of America\U0001f1fa\U0001f1f8
##   followers_count statuses_count friends_count
## 2        53219516          38100            47
##                       created_at       location
## 2 Wed Mar 18 13:46:38 +0000 2009 Washington, DC
users$screen_name[which.max(users$followers_count)]
## [1] "realDonaldTrump"

Download up to 3,200 recent tweets from a Twitter account:

getTimeline(filename="../data/realDonaldTrump.json", screen_name="realDonaldTrump", n=1000, oauth=my_oauth)
## 200 tweets. Max id: 1008725438972211200
## 883 hits left
## 400 tweets. Max id: 1002510522032541701
## 882 hits left
## 600 tweets. Max id: 994182263960162304
## 881 hits left
## 800 tweets. Max id: 985504808646971392
## 880 hits left
## 1000 tweets. Max id: 973187513731944448

What are the most common hashtags?

tweets <- parseTweets("../data/realDonaldTrump.json")
## 1000 tweets have been parsed.
ht <- str_extract_all(tweets$text, "#(\\d|\\w)+")
ht <- unlist(ht)
head(sort(table(ht), decreasing = TRUE))
## ht
##          #MAGA    #RightToTry        #TaxDay      #G7Summit #DrainTheSwamp 
##             22              4              4              3              2 
##   #MemorialDay 
##              2

Building friend and follower networks

Download friends and followers:

followers <- getFollowers("MethodologyLSE", 
    oauth=my_oauth)
## 12 API calls left
## 1389 followers. Next cursor: 0
## 11 API calls left
friends <- getFriends("MethodologyLSE", 
    oauth=my_oauth)
## 8 API calls left
## 121 friends. Next cursor: 0
## 7 API calls left

What are the most common words that friends of the LSE Methodology Twitter account use to describe themselves on Twitter?

# extract profile descriptions
users <- getUsersBatch(ids=friends, oauth=my_oauth)
## 1--121 users left
## 2--21 users left
# create table with frequency of word use
library(quanteda)
## Warning: package 'quanteda' was built under R version 3.4.4
## Package version: 1.3.0
## Parallel computing: 2 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View
tw <- corpus(users$description[users$description!=""])
dfm <- dfm(tw, remove=c(stopwords("english"), stopwords("spanish"),
                                 "t.co", "https", "rt", "rts", "http"),
           remove_punct=TRUE)
topfeatures(dfm, n = 30)
##        lse   research    science     london     school  economics 
##         48         39         30         27         25         25 
##  political     social department     centre     public     policy 
##         24         21         14         11         11         11 
##       news   teaching     events      lse's university         uk 
##         11         10         10         10          9          8 
##    twitter    account         us    society   official  programme 
##          8          8          8          8          8          7 
##  institute       data   politics   analysis   academic   european 
##          7          7          7          6          6          6
# create wordcloud
par(mar=c(0,0,0,0))
textplot_wordcloud(dfm, rotation=0, min_size=1, max_size=5, max_words=100)