I've written folowing code in python for followers scraping:
import tweepy
import time
import csv
import sys
import random
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, retry_errors=set([401, 404, 500, 502, 503, 504]))
account = 'setavakfi'
log_file = '{}_followers.csv'.format(account)
retry = 0
errorCount = 0
tweepy_cursor = tweepy.Cursor(api.followers, screen_name=account, count=200, cursor=1574812962976647290).pages()
followers_count = []
while True:
try:
retry =0
user = tweepy_cursor.next()
cursor = tweepy_cursor.next_cursor
followers_count += user
print 'Retrieved {} followers accounts'.format(len(followers_count))
print 'Current cursor: {}'.format(cursor)
with open(log_file, 'ab') as fd:
writer = csv.writer(fd)
for i, user in enumerate(user):
writer.writerow([str("#"+user.screen_name), unicode(user.name).encode('utf-8'), str(user.lang), unicode(user.location).encode('utf-8')])
print "Resting..."
time.sleep(random.randint(60, 70)
except tweepy.TweepError as e:
print "Error code: {} with message: {}".format(e.api_code, e.message[0]['message'])
errorCount +=1
retry += 1
print 'Retrying in {} seconds'.format(60+retry*5)
time.sleep(60+retry*5)
if retry == 10:
break
except StopIteration:
break
print 'Done with {} errors'.format(errorCount)
Problem is that with given cursor (1574812962976647290) and account (#setavakfi) I'm only getting error 503 (code 130). This cursor is stuck exacly at page with 8000 folllowers. Whole account is >60,000 followers. I've tried this code on different accounts with more than 60,000 folowers and it works. Have tried to change ISP, IP address and twitter dev account. Nothing change.
Can you see what could be wrong with this code? Is it a problem with this single account? Is there a way to automaticly jump over problematic cursor to see if other cursors will have same problem?
Thanks in advance.
M.
I think that error is because a server overload on the twitter side with this account... I don't think it's an error with your code.
Related
I used the tweepy library (for twitter api-v1.1) to get some metadata (e.g., tweet text, #retweets, userid, etc.) for a list of tweet ids. Here is my code:
consumer_key = 'xxxxxxxxxxxx'
consumer_key_secret = 'xxxxxxxxxxxx'
access_token = 'xxxxxxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxxxxxxx'
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
def createTrainingSet(corpusFile, tweetContent):
import csv
import time
import json
counter = 0
corpus = []
with open(corpusFile, 'r') as csvfile:
lineReader = csv.reader(csvfile, delimiter=',')
for row in lineReader:
corpus.append({"tweet_id": row[0], "unreliable": row[1], "conspiracy": row[2],\
"clickbait": row[3], "political/biased": row[4], "date": row[5]})
sleepTime = 2
trainingDataSet = []
for tweet in corpus:
try:
tweetFetched = api.get_status(tweet["tweet_id"])
print("Tweet fetched" + tweetFetched.text)
print("followers_count: "+ str(tweetFetched.user.followers_count))
print("friends_count: " + str(tweetFetched.user.friends_count))
tweet["text"] = tweetFetched.text
tweet["retweet_count"] = tweetFetched.retweet_count
tweet["favorite_count"] = tweetFetched.favorite_count
tweet["created_at"] = tweetFetched.created_at
tweet["user_id"] = tweetFetched.user.id_str
tweet["user_created_at"] = tweetFetched.user.created_at
trainingDataSet.append(tweet)
time.sleep(sleepTime)
except:
print("Inside the exception - no:2")
continue
# This is corpus dataset
corpusFile = "sample.csv"
# This is my target file
tweetContent = "tweetContent.csv"
# Call the method
resultFile = createTrainingSet(corpusFile, tweetContent)
I don't know why this code doesn't work any more (the last time it worked was a bout a couple of months ago). However, when I run it now, it returns "Inside the exception - no:2". Why is that?
Here is the two lines of code that helped me find the erros:
except tweepy.TweepError as e:
print ('the error code:', e.args[0][0]['code'])
print ('the error message:', e.args[0][0]['message'])
Also, thanks to Jeyekomon's answer in this post, I found that the e.message[0]['code'] is not working anymore:
The error code used to be accessed using e.message[0]['code'] which no longer works. The message attribute has been deprecated in Python 2.6 and removed in Python 3.0. Currently you get an error 'TweepError' object has no attribute 'message'
In addition, it seems there are some other helpful attributes (api_code, reason and response) in TweepError exception class that are not in the documentation.
I'm trying to make a twitter bot using tweepy and python but I can't figure out how to reply to a tweet.
import tweepy
from Keys import keys
import time
CONSUMER_KEY = keys['consumer_key']
CONSUMER_SECRET = keys['consumer_secret']
ACCESS_TOKEN = keys['access_token']
ACCESS_TOKEN_SECRET = keys['access_token_secret']
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
auth.secure = True
api = tweepy.API(auth)
message = " Test message"
for tweet in tweepy.Cursor(api.search, q='search_item', lang = 'en').items(1):
try:
print ("Found tweet by:#" + tweet.user.screen_name)
api.update_status('#' + tweet.user.screen_name + message)
print 'responded to #' + tweet.user.screen_name
if tweet.user.following == False:
tweet.user.follow()
print ("following #" + tweet.user.screen_name)
except tweepy.TweepError as e:
print(e.reason)
time.sleep(3)
continue
except tweepy.RateLimitError:
time.sleep(15*60)
except StopIteration:
break
I've got it to post a tweet with #username and then say the message but can't get it to reply.
Well then, it was something simple. I had to specify who the tweet was directed towards using the # notation.
api.update_status('My status update #whoIReplyTo',tweetId)
I figured it out eventually.
The code for replying to a tweet is:
api.update_status(status = "your message here", in_reply_to_status_id = tweet.id_str)
I've used the following script before but now it is not working. I don't see any tweets being printed in my terminal (as coded in line 38) nor are any tweets being stored in my csv. I don't what is the issue.
import tweepy
import csv
import time
access_token = "xxxxxxxxxx"
access_token_secret = "xxxxxxxxxx"
consumer_key = "xxxxxxxxxx"
consumer_secret = "xxxxxxxxxx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
print "Starting search..."
#
# Open/Create a file to append data
csvFile = open('somesearch.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
searchTerms = ["#Xfactor", "#Yfactor"]
tweets= tweepy.Cursor(api.search,q=[searchTerms], \
since="2015-10-18",
until="2015-10-23",
include_entities=True).items(999999999)
#csvWriter.writerow([tweet.created_at, tweet.id_str, tweet.screen_name, tweet.user_id, tweet.coordinates, tweet.place, tweet.text.encode('utf-8'), tweet.retweet_count, tweet.favorite_count])
#tweet.in_reply_to_user_id_str, tweet.in_reply_to_screen_name, tweet.in_reply_to_status_id_str, tweet.retweeted, tweet.truncated, tweet.source
while True:
try:
for tweet in tweets:
print tweet.created_at, tweet.text.encode('utf-8')
csvWriter.writerow([tweet.created_at, tweet.id_str, tweet.author.name.encode('utf-8'), tweet.author.screen_name.encode('utf-8'),
tweet.user.location.encode('utf-8'), tweet.coordinates, tweet.text.encode('utf-8'), tweet.retweet_count, tweet.favorite_count])
except tweepy.TweepError:
time.sleep(60 * 15)
continue
except StopIteration:
break
print "Done!"
The problem lies in this line:
tweets= tweepy.Cursor(api.search,q=[searchTerms],
What you've done is created a list containing a list. Look at this code:
searchTerms = ["#Xfactor", "#Yfactor"]
q=[searchTerms]
print(searchTerms)
>>> ['#Xfactor', '#Yfactor']
print(type(q))
>>> [['#Xfactor', '#Yfactor']]
What you're searching for is not searchTerms, you're searching for a list of that.
So now the tweet you're interested for must contain the literal ['#Xfactor', '#Yfactor']. To fix that issue change the q into:
tweets= tweepy.Cursor(api.search,q=searchTerms,
My code gives continuous data, but I wanted to filter the data to last five minutes. Additionally, I wanted to report it every 1 minute. What I need to do for that?
try:
import json
except ImportError:
import simplejson as json
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
ACCESS_TOKEN = 'secret'
ACCESS_SECRET = 'secret'
CONSUMER_KEY = 'secret'
CONSUMER_SECRET = 'secret'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_stream = TwitterStream(auth=oauth)
iterator = twitter_stream.statuses.filter(track="car", language="en")
for tweet in iterator:
try:
if 'text' in tweet:
print tweet['user']['name']
print tweet['user']['statuses_count']
# print '\n'
for hashtag in tweet['entities']['hashtags']:
hashtags.append(hashtag['text'])
print hashtags
except:
continue
Thanks in advance.
So I'm following the tutorial of a certain reddit to twitter bot thats coded in python using PRAW and I am stuck hitting errors.
Running this code in the command console gives me the error on line 74
import praw
import json
import requests
import tweepy
import time
access_token = 'secret'
access_token_secret = ' secret'
consumer_key = 'secret'
consumer_secret = 'secret'
def strip_title(title):
if len(title) < 94:
return title
else:
return title[:93] + "..."
def tweet_creator(subreddit_info):
post_dict = {}
post_ids = []
print "[bot] Getting posts from Reddit"
for submission in subreddit_info.get_hot(limit=20):
post_dict[strip_title(submission.title)] = submission.url
post_ids.append(submission.id)
print "[bot] Generating short link using goo.gl"
mini_post_dict = {}
for post in post_dict:
post_title = post
post_link = post_dict[post]
short_link = shorten(post_link)
mini_post_dict[post_title] = short_link
return mini_post_dict, post_ids
def setup_connection_reddit(subreddit):
print "[bot] setting up connection with Reddit"
r = praw.Reddit('yasoob_python reddit twitter bot '
'monitoring %s' %(subreddit))
subreddit = r.get_subreddit(subreddit)
return subreddit
def shorten(url):
headers = {'content-type': 'application/json'}
payload = {"longUrl": url}
url = "https://www.googleapis.com/urlshortener/v1/url"
r = requests.post(url, data=json.dumps(payload), headers=headers)
link = json.loads(r.text)
return link
def duplicate_check(id):
found = 0
with open('posted_posts.txt', 'r') as file:
for line in file:
if id in line:
found = 1
return found
def add_id_to_file(id):
with open('posted_posts.txt', 'a') as file:
file.write(str(id) + "\n")
def main():
subreddit = setup_connection_reddit('showerthoughts')
post_dict, post_ids = tweet_creator(subreddit)
tweeter(post_dict, post_ids)
def tweeter(post_dict, post_ids):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
for post, post_id in zip(post_dict, post_ids):
found = duplicate_check(post_id)
if found == 0:
print "[bot] Posting this link on twitter"
print post+" "+post_dict[post]+" #Python #reddit #bot"
api.update_status(post+" "+post_dict[post]+" #Python #reddit #bot")
add_id_to_file(post_id)
time.sleep(30)
else:
print "[bot] Already posted"
if __name__ == '__main__':
main()
Error :
print post+" "+post_dict[post]+"#python #reddit #bot"
TypeError: coercing to Unicode: need string or buffer, dict found
My understanding of the code and error is that it needs a string to be send but is somehow getting the entire key-dictionary set. I thought by sending the [post] parameter into post_dict that it will be able to get the certain post for the bot to utalize, but instead, its fetching the dictionary!
There are two lines, 74 and 75 that both call post_dict[post] and is not utalizing the dictionary's value when calling post key.
Try printing post and post_dict before you call that concatenation in the the tweeter function's For loop. That should show you what those structs look like and make the solution evident.