I've used the following script before but now it is not working. I don't see any tweets being printed in my terminal (as coded in line 38) nor are any tweets being stored in my csv. I don't what is the issue.
import tweepy
import csv
import time
access_token = "xxxxxxxxxx"
access_token_secret = "xxxxxxxxxx"
consumer_key = "xxxxxxxxxx"
consumer_secret = "xxxxxxxxxx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
print "Starting search..."
#
# Open/Create a file to append data
csvFile = open('somesearch.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
searchTerms = ["#Xfactor", "#Yfactor"]
tweets= tweepy.Cursor(api.search,q=[searchTerms], \
since="2015-10-18",
until="2015-10-23",
include_entities=True).items(999999999)
#csvWriter.writerow([tweet.created_at, tweet.id_str, tweet.screen_name, tweet.user_id, tweet.coordinates, tweet.place, tweet.text.encode('utf-8'), tweet.retweet_count, tweet.favorite_count])
#tweet.in_reply_to_user_id_str, tweet.in_reply_to_screen_name, tweet.in_reply_to_status_id_str, tweet.retweeted, tweet.truncated, tweet.source
while True:
try:
for tweet in tweets:
print tweet.created_at, tweet.text.encode('utf-8')
csvWriter.writerow([tweet.created_at, tweet.id_str, tweet.author.name.encode('utf-8'), tweet.author.screen_name.encode('utf-8'),
tweet.user.location.encode('utf-8'), tweet.coordinates, tweet.text.encode('utf-8'), tweet.retweet_count, tweet.favorite_count])
except tweepy.TweepError:
time.sleep(60 * 15)
continue
except StopIteration:
break
print "Done!"
The problem lies in this line:
tweets= tweepy.Cursor(api.search,q=[searchTerms],
What you've done is created a list containing a list. Look at this code:
searchTerms = ["#Xfactor", "#Yfactor"]
q=[searchTerms]
print(searchTerms)
>>> ['#Xfactor', '#Yfactor']
print(type(q))
>>> [['#Xfactor', '#Yfactor']]
What you're searching for is not searchTerms, you're searching for a list of that.
So now the tweet you're interested for must contain the literal ['#Xfactor', '#Yfactor']. To fix that issue change the q into:
tweets= tweepy.Cursor(api.search,q=searchTerms,
Related
I used the tweepy library (for twitter api-v1.1) to get some metadata (e.g., tweet text, #retweets, userid, etc.) for a list of tweet ids. Here is my code:
consumer_key = 'xxxxxxxxxxxx'
consumer_key_secret = 'xxxxxxxxxxxx'
access_token = 'xxxxxxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxxxxxxx'
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
def createTrainingSet(corpusFile, tweetContent):
import csv
import time
import json
counter = 0
corpus = []
with open(corpusFile, 'r') as csvfile:
lineReader = csv.reader(csvfile, delimiter=',')
for row in lineReader:
corpus.append({"tweet_id": row[0], "unreliable": row[1], "conspiracy": row[2],\
"clickbait": row[3], "political/biased": row[4], "date": row[5]})
sleepTime = 2
trainingDataSet = []
for tweet in corpus:
try:
tweetFetched = api.get_status(tweet["tweet_id"])
print("Tweet fetched" + tweetFetched.text)
print("followers_count: "+ str(tweetFetched.user.followers_count))
print("friends_count: " + str(tweetFetched.user.friends_count))
tweet["text"] = tweetFetched.text
tweet["retweet_count"] = tweetFetched.retweet_count
tweet["favorite_count"] = tweetFetched.favorite_count
tweet["created_at"] = tweetFetched.created_at
tweet["user_id"] = tweetFetched.user.id_str
tweet["user_created_at"] = tweetFetched.user.created_at
trainingDataSet.append(tweet)
time.sleep(sleepTime)
except:
print("Inside the exception - no:2")
continue
# This is corpus dataset
corpusFile = "sample.csv"
# This is my target file
tweetContent = "tweetContent.csv"
# Call the method
resultFile = createTrainingSet(corpusFile, tweetContent)
I don't know why this code doesn't work any more (the last time it worked was a bout a couple of months ago). However, when I run it now, it returns "Inside the exception - no:2". Why is that?
Here is the two lines of code that helped me find the erros:
except tweepy.TweepError as e:
print ('the error code:', e.args[0][0]['code'])
print ('the error message:', e.args[0][0]['message'])
Also, thanks to Jeyekomon's answer in this post, I found that the e.message[0]['code'] is not working anymore:
The error code used to be accessed using e.message[0]['code'] which no longer works. The message attribute has been deprecated in Python 2.6 and removed in Python 3.0. Currently you get an error 'TweepError' object has no attribute 'message'
In addition, it seems there are some other helpful attributes (api_code, reason and response) in TweepError exception class that are not in the documentation.
enter image description here
I have tried many different things, and yes my python skill is limited, but why do I get 10 lists instead of one with my code. How to change my loop to only append to one list not 10? I get 10 results from my query, and I want to just add those ten items to a single list.
import os
import tweepy as tw
import pandas as pd
from collections import Counter
list = []
consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)
search_words = "test"
date_since = "2020-06-24"
date_to = "2020-06-26"
# Collect tweets
tweets = tw.Cursor(api.search,
q=search_words,
lang="en",
since=date_since,
until=date_to).items(10)
# Iterate and print tweets
for tweet in tweets:
list.append(tweet.user.screen_name)
print(list)
Change this part:
for tweet in tweets:
list.append(tweet.user.screen_name)
To:
for tweet in tweets:
list.extend(tweet.user.screen_name)
print(list)
I'm writing a Python code to extract tweets from a twitter account. I'm having a bit of trouble at the moment.
Below is my code (I'm removed my cosumer and access ID for this):
import csv
import tweepy
from tweepy import OAuthHandler
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
def get_all_tweets(screen_name):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
alltweets = []
new_tweets = api.user_timeline(screen_name = screen_name,count=200)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
while len(new_tweets) > 0:
print ("getting tweets before %s" % (oldest))
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
print ("...%s tweets downloaded so far" % (len(alltweets)))
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]
with open('%s_tweets.csv' % screen_name, 'wb') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
get_all_tweets("hello")
When I run it, I get this error:
Does anybody know where I'm going wrong?
Just close the opened file where you are going to write.
Here it is hello_tweets.csv
Check if you have permission to open the file and permission to read/write in the folder.
I wouldn't recommend it, but if you NEED to run the code and can't find the issue, try doing it as admin.
I am trying to scrape last 1-10 tweets from approx 500 user names on twitter.
Code works perfectly when grabbing 1 user, but falls over when introducing a range of users.
First code is single user - Will grab last 7 tweets from Gavinfree and write to CSV
import tweepy
import csv
#Twitter API credentials
consumer_key = "secretcode"
consumer_secret = "secretcode"
access_key = "secretcode"
access_secret = "secretcode"
def get_all_tweets(GavinFree):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
alltweets = []
new_tweets = api.user_timeline(screen_name = GavinFree,count=7)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
while len(new_tweets) > 0:
print "getting tweets before %s" % (oldest)
new_tweets = api.user_timeline(screen_name = GavinFree,count=7,max_id=10)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
outtweets = [[tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]
with open('%s_tweets.csv' % GavinFree ,'wb') as f:
writer = csv.writer(f)
writer.writerow(["created_at","text"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
#pass in the username of the account you want to download
get_all_tweets("GavinFree")
Second code is range of users - Will grab 7 tweets from each user and write to CSV and apart from the range - is completely identical.
import tweepy
import csv
#Twitter API credentials
consumer_key = "secretcode"
consumer_secret = "secretcode"
access_key = "secretcode"
access_secret = "secretcode"
handles_list = ["gavinFree","bdunkelman","burnie","ashleyj",]
def get_all_tweets(handles_list):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
alltweets = []
new_tweets = api.user_timeline(screen_name = handles_list,count=10)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
while len(new_tweets) > 0:
print "getting tweets before %s" % (oldest)
new_tweets = api.user_timeline(screen_name = handles_list,count=10,max_id=10)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]
with open('%s_tweets.csv' % handles_list, 'wb') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
#pass in the username of the account you want to download
get_all_tweets("handles_list")
The Error code i receive is tweepy.error.TweepError: [(u'message' : u'sorry, that page does not exist.' , u'code :34)]
I have checked out the user names and have tried both with # and without.
I'm just wondering what the issue could be, as code 34 indicates a 404 error on the twitter api page, yet the error is only being introduced when the range is added.
Any insights would be greatly appreciated.
You're passing handles_list as a string literal, and the function doesn't seem modified to handle a list.
Try this:
if __name__ == '__main__':
for handle in handles_list:
get_all_tweets(handle)
My code gives continuous data, but I wanted to filter the data to last five minutes. Additionally, I wanted to report it every 1 minute. What I need to do for that?
try:
import json
except ImportError:
import simplejson as json
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
ACCESS_TOKEN = 'secret'
ACCESS_SECRET = 'secret'
CONSUMER_KEY = 'secret'
CONSUMER_SECRET = 'secret'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_stream = TwitterStream(auth=oauth)
iterator = twitter_stream.statuses.filter(track="car", language="en")
for tweet in iterator:
try:
if 'text' in tweet:
print tweet['user']['name']
print tweet['user']['statuses_count']
# print '\n'
for hashtag in tweet['entities']['hashtags']:
hashtags.append(hashtag['text'])
print hashtags
except:
continue
Thanks in advance.