We’ll now turn to a different type of Twitter data – static data, either recent tweets or user-level information. This type of data can be retrieved with Twitter’s REST API. We will use the tweetscores
package here – this is a package that I created to facilitate the collection and analysis of Twitter data.
It is possible to download recent tweets, but only up those less than 7 days old, and in some cases not all of them.
load("~/my_oauth")
library(tweetscores)
## Loading required package: R2WinBUGS
## Loading required package: coda
## Loading required package: boot
## ##
## ## tweetscores: tools for the analysis of Twitter data
## ## Pablo Barbera (LSE)
## ## www.tweetscores.com
## ##
library(streamR)
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: rjson
## Warning: package 'rjson' was built under R version 3.4.4
## Loading required package: ndjson
## Warning: package 'ndjson' was built under R version 3.4.4
searchTweets(q=c("brexit", "survey"),
filename="~/data/survey-tweets.json",
n=1000, until="2018-07-31",
oauth=my_oauth)
## 100 tweets. Max id: 1024082209102282752
## 168 hits left
## 200 tweets. Max id: 1024081776484839424
## 167 hits left
## 300 tweets. Max id: 1024081377489313792
## 166 hits left
## 400 tweets. Max id: 1024080981903458304
## 165 hits left
## 500 tweets. Max id: 1024080636372504576
## 164 hits left
## 600 tweets. Max id: 1024080257140379648
## 163 hits left
## 700 tweets. Max id: 1024079875890716672
## 162 hits left
## 800 tweets. Max id: 1024079568678936576
## 161 hits left
## 900 tweets. Max id: 1024079294723760128
## 160 hits left
## 1000 tweets. Max id: 1024079085377441792
tweets <- parseTweets("~/data/survey-tweets.json")
## 1000 tweets have been parsed.
What are the most popular hashtags?
library(stringr)
ht <- str_extract_all(tweets$text, "#(\\d|\\w)+")
ht <- unlist(ht)
head(sort(table(ht), decreasing = TRUE))
## ht
## #Brexit #brexit #PeoplesVote #BREXIT #LoveIsland
## 120 25 17 13 11
## #newsnight
## 8
You can check the documentation about the options for string search here.
This is how you would extract information from user profiles:
wh <- c("realDonaldTrump", "POTUS", "VP", "FLOTUS")
users <- getUsersBatch(screen_names=wh,
oauth=my_oauth)
## 1--4 users left
str(users)
## 'data.frame': 4 obs. of 9 variables:
## $ id_str : chr "818876014390603776" "25073877" "822215679726100480" "818910970567344128"
## $ screen_name : chr "FLOTUS" "realDonaldTrump" "POTUS" "VP"
## $ name : chr "Melania Trump" "Donald J. Trump" "President Trump" "Vice President Mike Pence"
## $ description : chr "This account is run by the Office of First Lady Melania Trump. Tweets may be archived. More at https://t.co/eVVzoBb3Zr" "45th President of the United States of America\U0001f1fa\U0001f1f8" "45th President of the United States of America, @realDonaldTrump. Tweets archived: https://t.co/eVVzoBb3Zr" "Vice President Mike Pence. Husband, father, & honored to serve as the 48th Vice President of the United States."| __truncated__
## $ followers_count: int 10714631 53444080 23682056 6432401
## $ statuses_count : int 345 38408 3655 4642
## $ friends_count : int 6 47 39 11
## $ created_at : chr "Tue Jan 10 17:43:50 +0000 2017" "Wed Mar 18 13:46:38 +0000 2009" "Thu Jan 19 22:54:28 +0000 2017" "Tue Jan 10 20:02:44 +0000 2017"
## $ location : chr "Washington, D.C." "Washington, DC" "Washington, D.C." "Washington, D.C."
Which of these has the most followers?
users[which.max(users$followers_count),]
## id_str screen_name name
## 2 25073877 realDonaldTrump Donald J. Trump
## description
## 2 45th President of the United States of America\U0001f1fa\U0001f1f8
## followers_count statuses_count friends_count
## 2 53444080 38408 47
## created_at location
## 2 Wed Mar 18 13:46:38 +0000 2009 Washington, DC
users$screen_name[which.max(users$followers_count)]
## [1] "realDonaldTrump"
Download up to 3,200 recent tweets from a Twitter account:
getTimeline(filename="~/data/realDonaldTrump.json", screen_name="realDonaldTrump", n=1000, oauth=my_oauth)
## 200 tweets. Max id: 1018956970143858688
## 893 hits left
## 400 tweets. Max id: 1011577303023980544
## 892 hits left
## 600 tweets. Max id: 1006286790536323074
## 891 hits left
## 800 tweets. Max id: 999626347361206274
## 890 hits left
## 1000 tweets. Max id: 989834048796266498
What are the most common hashtags?
tweets <- parseTweets("~/data/realDonaldTrump.json")
## 1000 tweets have been parsed.
ht <- str_extract_all(tweets$text, "#(\\d|\\w)+")
ht <- unlist(ht)
head(sort(table(ht), decreasing = TRUE))
## ht
## #MAGA #1 #HELSINKI2018 #RightToTry #G7Summit
## 15 5 4 4 3
## #SCOTUS
## 3
Download friends and followers:
followers <- getFollowers("ECPR",
oauth=my_oauth)
## 12 API calls left
## 5000 followers. Next cursor: 1556878267714917122
## 11 API calls left
## 10000 followers. Next cursor: 1464105674343453827
## 10 API calls left
## 13172 followers. Next cursor: 0
## 9 API calls left
friends <- getFriends("ECPR",
oauth=my_oauth)
## 12 API calls left
## 1072 friends. Next cursor: 0
## 11 API calls left
What are the most common words that friends of the ECPR account use to describe themselves on Twitter?
# extract profile descriptions
users <- getUsersBatch(ids=friends, oauth=my_oauth)
## 1--1072 users left
## 2--972 users left
## 3--872 users left
## 4--772 users left
## 5--672 users left
## 6--572 users left
## 7--472 users left
## 8--372 users left
## 9--272 users left
## 10--172 users left
## 11--72 users left
# create table with frequency of word use
library(quanteda)
## Warning: package 'quanteda' was built under R version 3.4.4
## Package version: 1.3.0
## Parallel computing: 2 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
tw <- corpus(users$description[users$description!=""])
dfm <- dfm(tw, remove=c(stopwords("english"), stopwords("spanish"),
"t.co", "https", "rt", "rts", "http"),
remove_punct=TRUE)
topfeatures(dfm, n = 30)
## politics university political research international
## 267 267 256 176 137
## science professor social european policy
## 130 107 97 92 76
## relations studies twitter tweets official
## 69 67 65 64 63
## public department sciences lecturer news
## 57 55 54 51 51
## account school centre institute der
## 48 46 43 41 41
## uk scientist editor eu director
## 40 39 38 36 36
# create wordcloud
par(mar=c(0,0,0,0))
textplot_wordcloud(dfm, rotation=0, min_size=1, max_size=5, max_words=100)