It is possible to download recent tweets, but only up those less than 7 days old, and in some cases not all of them. We will use the netdemR
package for this (and the other functions that scrape Twitter’s REST API).
library(netdemR)
## Loading required package: ROAuth
## Loading required package: rjson
## Loading required package: jsonlite
##
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
##
## fromJSON, toJSON
## Loading required package: httr
## Warning: package 'httr' was built under R version 3.4.1
## ##
## ## netdemR: tools for analysis of Twitter data
## ## Networked Democracy Lab at USC
## ## netdem.org
## ##
library(streamR)
## Loading required package: RCurl
## Loading required package: bitops
searchTweets(q=c("mcconell", "mccain"),
filename="senator-tweets.json",
n=1000, until="2017-10-20",
oauth_folder="../credentials")
## Warning in strptime(x, fmt, tz = "GMT"): unknown timezone 'default/America/
## Los_Angeles'
## 100 tweets. Max id: 921102245349023744
## 178 hits left
## 200 tweets. Max id: 920343471919108096
## 177 hits left
## 300 tweets. Max id: 920012411238678528
## 176 hits left
## 400 tweets. Max id: 919876079577387008
## 175 hits left
## 500 tweets. Max id: 919749125733867520
## 174 hits left
## 600 tweets. Max id: 919706924131680256
## 173 hits left
## 700 tweets. Max id: 919661077285175296
## 172 hits left
## 789 tweets. Max id: 919623270936260608
tweets <- parseTweets("senator-tweets.json")
## 1473 tweets have been parsed.
What are the most popular hashtags?
library(stringr)
ht <- str_extract_all(tweets$text, "#(\\d|\\w)+")
ht <- unlist(ht)
head(sort(table(ht), decreasing = TRUE))
## ht
## #TheBeat #mo #mosen #Senate #Trump #FoxNews
## 22 6 6 6 6 4
You can check the documentation about the options for string search here.
This is how you would extract information from user profiles:
wh <- c("realDonaldTrump", "POTUS", "VP", "FLOTUS")
users <- getUsersBatch(screen_names=wh,
oauth_folder="../credentials")
## 1--4 users left
str(users)
## 'data.frame': 4 obs. of 14 variables:
## $ id_str : chr "818876014390603776" "818910970567344128" "822215679726100480" "25073877"
## $ screen_name : chr "FLOTUS" "VP" "POTUS" "realDonaldTrump"
## $ name : chr "Melania Trump" "Vice President Pence" "President Trump" "Donald J. Trump"
## $ description : chr "This account is run by the Office of First Lady Melania Trump. Tweets may be archived. More at https://t.co/eVVzoBb3Zr" "Vice President Mike Pence. Husband, father, and honored to serve as the 48th Vice President of the United State"| __truncated__ "45th President of the United States of America, @realDonaldTrump. Tweets archived: https://t.co/eVVzoBb3Zr" "45th President of the United States of America\U0001f1fa\U0001f1f8"
## $ followers_count : int 8751270 4731807 20780202 41081854
## $ statuses_count : int 168 2657 1427 36181
## $ friends_count : int 5 9 42 45
## $ created_at : chr "Tue Jan 10 17:43:50 +0000 2017" "Tue Jan 10 20:02:44 +0000 2017" "Thu Jan 19 22:54:28 +0000 2017" "Wed Mar 18 13:46:38 +0000 2009"
## $ location : chr "Washington, D.C." "Washington, D.C." "Washington, D.C." "Washington, DC"
## $ lang : chr "en" "en" "en" "en"
## $ time_zone : chr "Eastern Time (US & Canada)" "Eastern Time (US & Canada)" "Eastern Time (US & Canada)" "Eastern Time (US & Canada)"
## $ status.id_str : chr "922641298074558466" "922960325686722561" "922931058902958081" "922951009277825024"
## $ status.created_at: chr "Tue Oct 24 01:50:01 +0000 2017" "Tue Oct 24 22:57:44 +0000 2017" "Tue Oct 24 21:01:26 +0000 2017" "Tue Oct 24 22:20:42 +0000 2017"
## $ status.text : chr "Thank you for all you do! @MichStatePolice @oaklandsheriff @WestBlmfldPD W Bloomfield Fire Dept & Waterford"| __truncated__ "Deeply humbling to commemorate the 34th anniversary of the Beirut Marine barracks bombing—the opening salvo in "| __truncated__ "RT @DeptofDefense: .@USArmy Capt. Gary M. Rose recognized with the #MedalofHonor more than 47 years after the #"| __truncated__ "So nice being with Republican Senators today. Multiple standing ovations! Most are great people who want big Ta"| __truncated__
Which of these has the most followers?
users[which.max(users$followers_count),]
## id_str screen_name name
## 4 25073877 realDonaldTrump Donald J. Trump
## description
## 4 45th President of the United States of America\U0001f1fa\U0001f1f8
## followers_count statuses_count friends_count
## 4 41081854 36181 45
## created_at location lang
## 4 Wed Mar 18 13:46:38 +0000 2009 Washington, DC en
## time_zone status.id_str
## 4 Eastern Time (US & Canada) 922951009277825024
## status.created_at
## 4 Tue Oct 24 22:20:42 +0000 2017
## status.text
## 4 So nice being with Republican Senators today. Multiple standing ovations! Most are great people who want big Tax Cuts and success for U.S.
users$screen_name[which.max(users$followers_count)]
## [1] "realDonaldTrump"
Download up to 3,200 recent tweets from a Twitter account:
getTimeline(filename="realDonaldTrump.json", screen_name="realDonaldTrump",
n=1000, oauth_folder="../credentials")
## 200 tweets. Max id: 915335645719121920
## 898 hits left
## 400 tweets. Max id: 909191177810915328
## 897 hits left
## 600 tweets. Max id: 899008521726861312
## 896 hits left
## 800 tweets. Max id: 890642377131515904
## 895 hits left
## 1000 tweets. Max id: 881373634585079808
What are the most common hashtags?
tweets <- parseTweets("realDonaldTrump.json")
## 1000 tweets have been parsed.
ht <- str_extract_all(tweets$text, "#(\\d|\\w)+")
ht <- unlist(ht)
head(sort(table(ht), decreasing = TRUE))
## ht
## #MAGA #USA #HurricaneHarvey #UNGA
## 21 14 11 10
## #PuertoRico #FakeNews
## 8 7
Download friends and followers:
followers <- getFollowers("uscpoir",
oauth_folder="../credentials")
## ../credentials/oauth_token_24
## 15 API calls left
## 64 followers. Next cursor: 0
## 14 API calls left
friends <- getFriends("uscpoir",
oauth_folder="../credentials")
## ../credentials/oauth_token_24
## 15 API calls left
## 135 friends. Next cursor: 0
## 14 API calls left
What are the most common words that friends of the ECPR Twitter account use to describe themselves on Twitter?
# extract profile descriptions
users <- getUsersBatch(ids=friends, oauth_folder="../credentials")
## 1--135 users left
## 2--35 users left
# create table with frequency of word use
library(quanteda)
## Warning: package 'quanteda' was built under R version 3.4.2
## quanteda version 0.99.9
## Using 3 of 4 threads for parallel computing
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
tw <- corpus(users$description[users$description!=""])
dfm <- dfm(tw, remove=c(stopwords("english"), stopwords("spanish"),
"t.co", "https", "rt", "rts", "http"),
remove_punct=TRUE)
# create wordcloud
par(mar=c(0,0,0,0))
textplot_wordcloud(dfm, rot.per=0, scale=c(3, .50), max.words=100)
The REST API offers also a long list of other endpoints that could be of use at some point, depending on your research interests.
users <- searchUsers(q="usc political science", count=100, oauth_folder="../credentials")
## ../credentials/oauth_token_28
# Downloading tweets when you know the ID
getStatuses(ids=c("474134260149157888", "266038556504494082"), filename="old-tweets.json",
oauth_folder="../credentials")
## ../credentials/oauth_token_28
## 900 API calls left
## 899 API calls left
parseTweets("old-tweets.json")
## 2 tweets have been parsed.
## text
## 1 Are you allowed to impeach a president for gross incompetence?
## 2 The electoral college is a disaster for a democracy.
## retweet_count favorited truncated id_str
## 1 0 FALSE FALSE 474134260149157888
## 2 0 FALSE FALSE 266038556504494082
## in_reply_to_screen_name
## 1 NA
## 2 NA
## source
## 1 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 2 <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>
## retweeted created_at in_reply_to_status_id_str
## 1 FALSE Wed Jun 04 10:23:11 +0000 2014 NA
## 2 FALSE Wed Nov 07 04:45:09 +0000 2012 NA
## in_reply_to_user_id_str lang listed_count verified location
## 1 NA en 78369 TRUE Washington, DC
## 2 NA en 78369 TRUE Washington, DC
## user_id_str
## 1 25073877
## 2 25073877
## description
## 1 45th President of the United States of America\U0001f1fa\U0001f1f8
## 2 45th President of the United States of America\U0001f1fa\U0001f1f8
## geo_enabled user_created_at statuses_count
## 1 TRUE Wed Mar 18 13:46:38 +0000 2009 36181
## 2 TRUE Wed Mar 18 13:46:38 +0000 2009 36181
## followers_count favourites_count protected user_url name
## 1 41081868 19 FALSE NA Donald J. Trump
## 2 41081868 19 FALSE NA Donald J. Trump
## time_zone user_lang utc_offset friends_count
## 1 Eastern Time (US & Canada) en -14400 45
## 2 Eastern Time (US & Canada) en -14400 45
## screen_name country_code country place_type full_name place_name
## 1 realDonaldTrump NA NA NA NA NA
## 2 realDonaldTrump NA NA NA NA NA
## place_id place_lat place_lon lat lon expanded_url url
## 1 NA NaN NaN NA NA NA NA
## 2 NA NaN NaN NA NA NA NA
# download user information from a list
MCs <- getList(list_name="new-members-of-congress",
screen_name="cspan", oauth_folder="../credentials")
## ../credentials/oauth_token_30
## 900 API calls left
## 20 users in list. Next cursor: 5427698142933319684
## 899 API calls left
## 40 users in list. Next cursor: 4611686021745187729
## 898 API calls left
## 60 users in list. Next cursor: 0
## 897 API calls left
head(MCs)
## id id_str name screen_name
## 1 8.272798e+17 827279765287559171 Rep. Mike Johnson RepMikeJohnson
## 2 8.235530e+17 823552974253342721 Anthony G. Brown RepAnthonyBrown
## 3 8.171385e+17 817138492614524928 Ted Budd RepTedBudd
## 4 8.170763e+17 817076257770835968 Adriano Espaillat RepEspaillat
## 5 8.170502e+17 817050219007328258 Rep. Blunt Rochester RepBRochester
## 6 8.168339e+17 816833925456789505 Nanette D. Barragán RepBarragan
## location
## 1 Washington, DC
## 2 Washington, DC
## 3 Davie County, North Carolina
## 4 https://www.facebook.com/Congr
## 5 Delaware, USA - Washington, DC
## 6 San Pedro, CA
## description
## 1 Proudly serving Louisiana's 4th Congressional District. Member on @HouseJudiciary & @NatResources.
## 2 Congressman proudly representing Maryland's Fourth District. Member of @HASCDemocrats & @NRDems. Father, husband & retired @USArmyReserve Colonel
## 3 Proudly serving the 13th district of North Carolina. #NC13
## 4 U. S. Representative proudly serving New York’s 13th Congressional District. Follow my work in Washington and #NY13
## 5 Official Twitter page for U.S. Representative Lisa Blunt Rochester (D-DE). Tweets from Rep. Blunt Rochester signed -LBR.
## 6 Official account. Honored to represent California's 44th Congressional District #CA44. Member of the @HispanicCaucus @USProgressives @Dodgers fan.
## url followers_count friends_count
## 1 https://t.co/qLAyhrFbRl 1918 368
## 2 https://t.co/2u5X332ICM 5047 144
## 3 https://t.co/VTsvWe0pia 2104 177
## 4 https://t.co/lcRqmQFAbz 6646 1035
## 5 https://t.co/Fe3XCG51wO 2930 289
## 6 https://t.co/Mt3nPi7hSH 5432 479
## created_at time_zone lang
## 1 Thu Feb 02 22:17:20 +0000 2017 Eastern Time (US & Canada) en
## 2 Mon Jan 23 15:28:24 +0000 2017 Eastern Time (US & Canada) en
## 3 Thu Jan 05 22:39:33 +0000 2017 Pacific Time (US & Canada) en
## 4 Thu Jan 05 18:32:15 +0000 2017 <NA> en
## 5 Thu Jan 05 16:48:47 +0000 2017 <NA> en
## 6 Thu Jan 05 02:29:18 +0000 2017 <NA> en
This is also useful if e.g. you’re interested in compiling lists of journalists, because media outlets offer these lists in their profiles.
# Download list of users who retweeted a tweet (unfortunately, only up to 100)
rts <- getRetweets(id='892147656319004672', oauth_folder="../credentials")
## ../credentials/oauth_token_26
## 75 API calls left
## 83 retweeters. Next cursor: 0
## 74 API calls left
# https://twitter.com/realDonaldTrump/status/892147656319004672
users <- getUsersBatch(ids=rts, oauth_folder="../credentials")
## 1--83 users left
# create table with frequency of word use
library(quanteda)
tw <- corpus(users$description[users$description!=""])
dfm <- dfm(tw, remove=c(stopwords("english"), stopwords("spanish"),
"t.co", "https", "rt", "rts", "http"),
remove_punct = TRUE)
# create wordcloud
par(mar=c(0,0,0,0))
textplot_wordcloud(dfm, rot.per=0, scale=c(5, .50), max.words=100)
# format Twitter dates to facilitate analysis
tweets <- parseTweets("realDonaldTrump.json")
## 1000 tweets have been parsed.
tweets$date <- formatTwDate(tweets$created_at, format="date")
hist(tweets$date, breaks="month")