I'm experimenting with a Python script (taken from here) that traces the retweet path of a given tweetID.
I'm aware of the very restrictive rate limits on the Twitter API, but I'm hitting the following error every time I execute the script:
Caught TweepError: [{u'message': u'Rate limit exceeded', u'code': 88}]
The script I'm using is as follows:
#!/usr/bin/python -u
#
# Usage: ./trace.py <tweetId>
#
import sys
import tweepy
import Queue
import time
import json
import redis
CONSUMER_KEY = 'x'
CONSUMER_SECRET = 'x'
ACCESS_KEY = 'x'
ACCESS_SECRET = 'x'
REDIS_FOLLOWERS_KEY = "followers:%s"
# Retweeter who have not yet been connected to the social graph
unconnected = {}
# Retweeters connected to the social graph...become seeds for deeper search
connected = Queue.Queue()
# Social graph
links = []
nodes = []
#----------------------------------------
def addUserToSocialGraph (parent, child):
# parent: tweepy.models.User
# child: tweepy.models.User
#----------------------------------------
global links;
if (child):
nodes.append ({'id':child.id,
'screen_name':child.screen_name,
'followers_count':child.followers_count,
'profile_image_url':child.profile_image_url})
# TODO: Find child and parent indices in nodes in order to create the links
if (parent):
print (nodes)
print ("Adding to socialgraph: %s ==> %s" % (parent.screen_name, child.screen_name))
links.append ({'source':getNodeIndex (parent),
'target':getNodeIndex (child)})
#----------------------------------------
def getNodeIndex (user):
# node: tweepy.models.User
#----------------------------------------
global nodes
for i in range(len(nodes)):
if (user.id == nodes[i]["id"]):
return i
return -1
#----------------------------------------
def isFollower (parent, child):
# parent: tweepy.models.User
# child: tweepy.models.User
#----------------------------------------
global red
# Fetch data from Twitter if we dont have it
key = REDIS_FOLLOWERS_KEY % parent.screen_name
if ( not red.exists (key) ):
print ("No follower data for user %s" % parent.screen_name)
crawlFollowers (parent)
cache_count = red.hlen (key)
if ( parent.followers_count > (cache_count*1.1) ):
# print ("Incomplete follower data for user %s. Have %d followers but should have %d (exceeds 10% margin for error)."
# % (parent.screen_name, cache_count, parent.followers_count))
crawlFollowers (parent)
return red.hexists (key, child.screen_name)
#----------------------------------------
def crawlFollowers (user):
# user: tweepy.models.User
#----------------------------------------
print ("Retrieving followers for %s (%d)" % (user.screen_name, user.followers_count))
count = 0
follower_cursors = tweepy.Cursor (api.followers, id = user.id, count = 15)
followers_iter = follower_cursors.items()
follower = None
while True:
try:
# We may have to retry a failed follower lookup
if ( follower is None ):
follower = followers_iter.next()
# Add link to Redis
red.hset ("followers:%s" % user.screen_name, follower.screen_name, follower.followers_count)
follower = None
count += 1
except StopIteration:
break
except tweepy.error.TweepError as (err):
print ("Caught TweepError: %s" % (err))
if (err.reason == "Not authorized" ):
print ("Not authorized to see users followers. Skipping.")
break
limit = api.rate_limit_status()
if (limit['remaining_hits'] == 0):
seconds_until_reset = int (limit['reset_time_in_seconds'] - time.time())
print ("API request limit reached. Sleeping for %s seconds" % seconds_until_reset)
time.sleep (seconds_until_reset + 5)
else:
print ("Sleeping a few seconds and then retrying")
time.sleep (5)
print ("Added %d followers of user %s" % (count, user.screen_name))
#----------------------------------------
# Main
#----------------------------------------
tweetId = sys.argv[1]
# Connect to Redis
red = redis.Redis(unix_socket_path="/tmp/redis.sock")
# Connect to Twitter
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
print (api.rate_limit_status())
# Get original Tweet details
status = api.get_status (tweetId)
connected.put(status.user)
addUserToSocialGraph (None, status.user)
retweets = api.retweets (status.id)
print ("Tweet %s, originally posted by %s, was retweeted by..." % (status.id, status.user.screen_name))
for retweet in retweets:
print (retweet.user.screen_name)
unconnected[retweet.user.screen_name] = retweet.user;
# Pivot
while not (connected.empty() or len(unconnected)==0):
# Get next user
pivot = connected.get()
# Check followers of this user against unconnected retweeters
print ("Looking through followers of %s" % pivot.screen_name)
for (screen_name, retweeter) in unconnected.items():
if (isFollower(pivot, retweeter)):
print ("%s <=== %s" % (pivot.screen_name, retweeter.screen_name))
connected.put (retweeter)
addUserToSocialGraph (pivot, retweeter)
del unconnected[retweeter.screen_name]
else:
print ("%s <=X= %s" % (pivot.screen_name, retweeter.screen_name))
# Add unconnected nodes to social graph
for (screen_name, user) in unconnected.items():
addUserToSocialGraph (None, user)
# Encode data as JSON
filename = "%s.json" % status.id
print ("\n\nWriting JSON to %s" % filename)
tweet = {'id':status.id,
'retweet_count':status.retweet_count,
'text':status.text,
'author':status.user.id}
f = open (filename, 'w')
f.write (json.dumps({'tweet':tweet, 'nodes':nodes, 'links':links}, indent=2))
f.close
sys.exit()
I'm sensing that I'm making a mistake in the crawlFollowers object.
Is there a way to somehow stagger the crawler to stay within the rate limit or conform to the rate limit?
Try running with the wait_on_rate_limit flag set to True in Tweepy API:
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
Related
I have modified this code python-paged-ldap-snippet.py from https://gist.github.com/mattfahrner/c228ead9c516fc322d3a
My problem is that when I change my SEARCHFILTER from '(&(objectCategory=person)(objectClass=user))' to '(&(objectCategory=person)(objectClass=user)(memberOf=CN=Users0,OU=Groups,DC=ad,DC=company,DC=com))'
it runs just fine.
If it is on SEARCHFILTER='(&(objectCategory=person)(objectClass=user))', I notice that the code is not entering the writeToFile function.
The objective of the code is to dump all the user information and parse the info into a file.
I tried running LDAPSEARCH against '(&(objectCategory=person)(objectClass=user))' and I manage to get the output .
Not sure what is wrong. Suggestions are greatly appreciated.
Thank you.
#!/usr/bin/python
import sys
import ldap
import os
LDAPSERVER='ldap://xxx.xxx.xxx.xxx:389'
BASEDN='dc=ad,dc=company,dc=com'
LDAPUSER = "CN=LDAPuser,OU=XXX,OU=Users,DC=ad,DC=company,DC=com"
LDAPPASSWORD = 'LDAPpassword'
PAGESIZE = 20000
ATTRLIST = ['sAMAccountName','uid']
SEARCHFILTER='(&(objectCategory=person)(objectClass=user))'
#SEARCHFILTER='(&(objectCategory=person)(objectClass=user)(memberOf=CN=Users0,OU=Groups,DC=ad,DC=company,DC=com))'
data = []
ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW)
ldap.set_option(ldap.OPT_REFERRALS, 0)
l = ldap.initialize(LDAPSERVER)
l.protocol_version = 3 # Paged results only apply to LDAP v3
try:
l.simple_bind_s(LDAPUSER, LDAPPASSWORD)
print ' Login Done, Searching data'
except ldap.LDAPError as e:
exit('LDAP bind failed: %s' % e)
lc = ldap.controls.SimplePagedResultsControl(True,size=PAGESIZE,cookie='')
def writeToFile(data):
print ' Writing data to file'
#code to print all output into CVS file
while True:
try:
msgid = l.search_ext(BASEDN, ldap.SCOPE_SUBTREE, SEARCHFILTER, ATTRLIST, serverctrls=[lc])
except ldap.LDAPError as e:
sys.exit('LDAP search failed: %s' % e)
try:
rtype, rdata, rmsgid, serverctrls = l.result3(msgid)
except ldap.LDAPError as e:
sys.exit('Could not pull LDAP results: %s' % e)
for dn, attrs in rdata:
data.append(attrs)
pctrls = [
c for c in serverctrls if c.controlType == ldap.controls.SimplePagedResultsControl.controlType ]
if not pctrls:
print >> sys.stderr, 'Warning: Server ignores RFC 2696 control.'
break
cookie = pctrls[0].cookie
if not cookie:
writeToFile(data)
print 'Task Complete'
break
lc.controlValue = (PAGESIZE, cookie)
PAGESIZE = 20000
Lower your page size to a value <= 1000, since that's the max AD will give you at a time anyway. It's possible that it's waiting for 20000 records before requesting the next page and never getting it.
I am trying to create a Twitter user graph and for that I have written the following code :
import operator
import sys
import time
from urllib.error import URLError
from http.client import BadStatusLine
import json
import twitter
from functools import partial
from sys import maxsize as maxint
import itertools
import networkx
import matplotlib.pyplot as plt
G = networkx.Graph()
# Code and function taken from the twitter cookbook
def oauth_login():
CONSUMER_KEY = 'xxxx'
CONSUMER_SECRET = 'xxZD6r'
OAUTH_TOKEN = 'xxNRYl'
OAUTH_TOKEN_SECRET = 'xxHYJl'
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)
return twitter_api
# Code and function taken from the twitter cookbook
def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw):
# A nested helper function that handles common HTTPErrors. Return an updated
# value for wait_period if the problem is a 500 level error. Block until the
# rate limit is reset if it's a rate limiting issue (429 error). Returns None
# for 401 and 404 errors, which requires special handling by the caller.
def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
if wait_period > 3600: # Seconds
print('Too many retries. Quitting.', file=sys.stderr)
raise e
if e.e.code == 401:
print('Encountered 401 Error (Not Authorized)', file=sys.stderr)
return None
elif e.e.code == 404:
print('Encountered 404 Error (Not Found)', file=sys.stderr)
return None
elif e.e.code == 429:
print('Encountered 429 Error (Rate Limit Exceeded)', file=sys.stderr)
if sleep_when_rate_limited:
print("Retrying in 15 minutes...ZzZ...", file=sys.stderr)
sys.stderr.flush()
time.sleep(60 * 15 + 5)
print('...ZzZ...Awake now and trying again.', file=sys.stderr)
return 2
else:
raise e # Caller must handle the rate limiting issue
elif e.e.code in (500, 502, 503, 504):
print('Encountered {0} Error. Retrying in {1} seconds'.format(e.e.code, wait_period), file=sys.stderr)
time.sleep(wait_period)
wait_period *= 1.5
return wait_period
else:
raise e
wait_period = 2
error_count = 0
while True:
try:
return twitter_api_func(*args, **kw)
except twitter.api.TwitterHTTPError as e:
error_count = 0
wait_period = handle_twitter_http_error(e, wait_period)
if wait_period is None:
return
except URLError as e:
error_count += 1
time.sleep(wait_period)
wait_period *= 1.5
print("URLError encountered. Continuing.", file=sys.stderr)
if error_count > max_errors:
print("Too many consecutive errors...bailing out.", file=sys.stderr)
raise
except BadStatusLine as e:
error_count += 1
time.sleep(wait_period)
wait_period *= 1.5
print("BadStatusLine encountered. Continuing.", file=sys.stderr)
if error_count > max_errors:
print("Too many consecutive errors...bailing out.", file=sys.stderr)
raise
# Code and function taken from the twitter cookbook
def get_friends_followers_ids(twitter_api, screen_name=None, user_id=None,
friends_limit=maxint, followers_limit=maxint):
# Must have either screen_name or user_id (logical xor)
assert (screen_name is not None) != (user_id is not None), "Must have screen_name or user_id, but not both"
# See https://developer.twitter.com/en/docs/twitter-api/v1/accounts-and-users/follow-search-get-
#users/api-reference/get-friends-ids for details
# on API parameters
get_friends_ids = partial(make_twitter_request, twitter_api.friends.ids, count=5000)
get_followers_ids = partial(make_twitter_request, twitter_api.followers.ids, count=5000)
friends_ids, followers_ids = [], []
for twitter_api_func, limit, ids, label in [
[get_friends_ids, friends_limit, friends_ids, "friends"],
[get_followers_ids, followers_limit, followers_ids, "followers"]
]:
if limit == 0: continue
cursor = -1
while cursor != 0:
# Use make_twitter_request via the partially bound callable...
if screen_name:
response = twitter_api_func(screen_name=screen_name, cursor=cursor)
else: # user_id
response = twitter_api_func(user_id=user_id, cursor=cursor)
if response is not None:
ids += response['ids']
cursor = response['next_cursor']
print('Fetched {0} total {1} ids for {2}'.format(len(ids), label, (user_id or screen_name)),
file=sys.stderr)
# XXX: You may want to store data during each iteration to provide an
# an additional layer of protection from exceptional circumstances
if len(ids) >= limit or response is None:
break
# Do something useful with the IDs, like store them to disk...
return friends_ids[:friends_limit], followers_ids[:followers_limit]
# Code and function taken from the twitter cookbook
def get_user_profile(twitter_api, screen_names=None, user_ids=None):
# Must have either screen_name or user_id (logical xor)
assert (screen_names is not None) != (user_ids is not None)
items_to_info = {}
items = screen_names or user_ids
while len(items) > 0:
items_str = ','.join([str(item) for item in items[:100]])
items = items[100:]
if screen_names:
response = make_twitter_request(twitter_api.users.lookup, screen_name=items_str)
else: # user_ids
response = make_twitter_request(twitter_api.users.lookup, user_id=items_str)
for user_info in response:
if screen_names:
items_to_info[user_info['screen_name']] = user_info
else: # user_ids
items_to_info[user_info['id']] = user_info
return items_to_info
# Function to find reciprocal friends and sort them such that we get the top 5 friends
def reciprocal_friends(twitter_api, screen_name=None, user_id=None):
friends_list_ids, followers_list_ids = get_friends_followers_ids(twitter_api, screen_name=screen_name,
user_id=user_id,
friends_limit=5000, followers_limit=5000)
friends_reciprocal = list(set(friends_list_ids) & set(followers_list_ids))
list_followers_count = []
user_profiles = {}
for each in friends_reciprocal:
user_profiles[each] = get_user_profile(twitter_api, user_ids=[each])[each]
list_followers_count.append(user_profiles[each]['followers_count'])
res = sorted(list_followers_count, reverse=True)
friends_count = {user_profiles[fr]['followers_count']: fr for fr in friends_reciprocal}
list_resciprocal = []
if len(res) < 6:
list_resciprocal = friends_reciprocal
else:
for i in range(5):
list_resciprocal.append(friends_count[res[i]])
return list_resciprocal
# This function finds reciprocal friends again and again till we achieve at least 100 nodes
def crawler(twitter_api, screen_name=None, user_id=None):
rec_friends = reciprocal_friends(twitter_api, screen_name=screen_name, user_id=user_id)
edges = [(screen_name, x) for x in rec_friends]
G.add_edges_from(edges)
nodes = nxt_qu = rec_friends
if len(nodes) == 0:
print("No reciprocal friends")
return rec_friends
while G.number_of_nodes() < 101:
print("Queue Items : ", nxt_qu)
(queue, nxt_qu) = (nxt_qu, [])
for q in queue:
if G.number_of_nodes() >= 101:
break
print("ID Entered:", q)
res = reciprocal_friends(twitter_api, screen_name=None, user_id=q)
edges = [(q, z) for z in res]
G.add_edges_from(edges)
nxt_qu += res
nodes += res
print(nodes)
# To Plot the graph
networkx.draw(G)
plt.savefig("graphresult.png")
plt.show()
# Printing the Output
print("No. of Edges: ", G.number_of_edges())
print("No. of Nodes: ", G.number_of_nodes())
print("Diameter : ", networkx.diameter(G))
print("Average Distance: ", networkx.average_shortest_path_length(G))
# To write the output into a file
f = open("output.txt", "w")
f.write("No. of Nodes: " + str(G.number_of_nodes()))
f.write("\nNo. of Edges: " + str(G.number_of_edges()))
f.write("\nDiameter: " + str(networkx.diameter(G)))
f.write("\nAverage Distance: " + str(networkx.average_shortest_path_length(G)))
twitter_api = oauth_login()
crawler(twitter_api, screen_name="POTUS")
However I am getting this error often and this is making my program run very slow
ID Entered: 60784269
Fetched 5000 total friends ids for 60784269
Fetched 5000 total followers ids for 60784269
Encountered 429 Error (Rate Limit Exceeded)
Retrying in 15 minutes...ZzZ...
Is there a way to get around this ? Make the code run faster ?
I have read a few documents but I still dont have any clear picture. Any help is appreciated.
There is no way to go around the rate limits restrictions with the Public API.
Though there is an API v2 now which also allow you to get users and do not work against the same rate limits.
https://developer.twitter.com/en/docs/twitter-api/users/lookup/introduction
Notice that this solution would be temporary as Twitter will at some point remove access to API v1.
You can request twitter to have access to premium/enterprise level of the API but you will have to pay for that.
You can see rate limits documentation here :
API v1
API v2
I'm using the code shown below in order to retrieve papers from arXiv. I want to retrieve papers that have words "machine" and "learning" in the title. The number of papers is large, therefore I want to implement a slicing by year (published).
How can I request records of 2020 and 2019 in search_query? Please notice that I'm not interested in post-filtering.
import urllib.request
import time
import feedparser
# Base api query url
base_url = 'http://export.arxiv.org/api/query?';
# Search parameters
search_query = urllib.parse.quote("ti:machine learning")
start = 0
total_results = 5000
results_per_iteration = 1000
wait_time = 3
papers = []
print('Searching arXiv for %s' % search_query)
for i in range(start,total_results,results_per_iteration):
print("Results %i - %i" % (i,i+results_per_iteration))
query = 'search_query=%s&start=%i&max_results=%i' % (search_query,
i,
results_per_iteration)
# perform a GET request using the base_url and query
response = urllib.request.urlopen(base_url+query).read()
# parse the response using feedparser
feed = feedparser.parse(response)
# Run through each entry, and print out information
for entry in feed.entries:
#print('arxiv-id: %s' % entry.id.split('/abs/')[-1])
#print('Title: %s' % entry.title)
#feedparser v4.1 only grabs the first author
#print('First Author: %s' % entry.author)
paper = {}
paper["date"] = entry.published
paper["title"] = entry.title
paper["first_author"] = entry.author
paper["summary"] = entry.summary
papers.append(paper)
# Sleep a bit before calling the API again
print('Bulk: %i' % 1)
time.sleep(wait_time)
According to the arXiv documentation, there is no published or date field available.
What you can do is to sort the results by date (by adding &sortBy=submittedDate&sortOrder=descending to your query parameters) and stop making requests when you reach 2018.
Basically your code should be modified like this:
import urllib.request
import time
import feedparser
# Base api query url
base_url = 'http://export.arxiv.org/api/query?';
# Search parameters
search_query = urllib.parse.quote("ti:machine learning")
i = 0
results_per_iteration = 1000
wait_time = 3
papers = []
year = ""
print('Searching arXiv for %s' % search_query)
while (year != "2018"): #stop requesting when papers date reach 2018
print("Results %i - %i" % (i,i+results_per_iteration))
query = 'search_query=%s&start=%i&max_results=%i&sortBy=submittedDate&sortOrder=descending' % (search_query,
i,
results_per_iteration)
# perform a GET request using the base_url and query
response = urllib.request.urlopen(base_url+query).read()
# parse the response using feedparser
feed = feedparser.parse(response)
# Run through each entry, and print out information
for entry in feed.entries:
#print('arxiv-id: %s' % entry.id.split('/abs/')[-1])
#print('Title: %s' % entry.title)
#feedparser v4.1 only grabs the first author
#print('First Author: %s' % entry.author)
paper = {}
paper["date"] = entry.published
year = paper["date"][0:4]
paper["title"] = entry.title
paper["first_author"] = entry.author
paper["summary"] = entry.summary
papers.append(paper)
# Sleep a bit before calling the API again
print('Bulk: %i' % 1)
i += results_per_iteration
time.sleep(wait_time)
for the "post-filtering" approach, once enough results are collected, I'd do something like this:
papers2019 = [item for item in papers if item["date"][0:4] == "2019"]
I am using Tweepy to capture streaming tweets based off of the hashtag #WorldCup, as seen by the code below. It works as expected.
class StdOutListener(StreamListener):
''' Handles data received from the stream. '''
def on_status(self, status):
# Prints the text of the tweet
print('Tweet text: ' + status.text)
# There are many options in the status object,
# hashtags can be very easily accessed.
for hashtag in status.entries['hashtags']:
print(hashtag['text'])
return true
def on_error(self, status_code):
print('Got an error with status code: ' + str(status_code))
return True # To continue listening
def on_timeout(self):
print('Timeout...')
return True # To continue listening
if __name__ == '__main__':
listener = StdOutListener()
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, listener)
stream.filter(follow=[38744894], track=['#WorldCup'])
Because this is a hot hashtag right now, searches don't take too long to catch the maximum amount of tweets that Tweepy lets you get in one transaction. However, if I was going to search on #StackOverflow, it might be much slower, and therefore, I'd like a way to kill the stream. I could do this on several parameters, such as stopping after 100 tweets, stopping after 3 minutes, after a text output file has reached 150 lines, etc. I do know that the socket timeout time isn't used to achieve this.
I have taken a look at this similar question:
Tweepy Streaming - Stop collecting tweets at x amount
However, it appears to not use the streaming API. The data that it collects is also very messy, whereas this text output is clean.
Can anyone suggest a way to stop Tweepy (when using the stream in this method), based on some user input parameter, besides a keyboard interrupt?
Thanks
I solved this, so I'm going to be one of those internet heroes that answers their own question.
This is achieved by using static Python variables for the counter and for the stop value (e.g. stop after you grab 20 tweets). This is currently a geolocation search, but you could easily swap it for a hashtag search by using the getTweetsByHashtag() method.
#!/usr/bin/env python
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def on_status(self, status):
Listener.tweet_counter += 1
print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
%(status.author.screen_name, status.text.replace('\n', ' ')))
if Listener.tweet_counter < Listener.stop_at:
return True
else:
print('Max num reached = ' + str(Listener.tweet_counter))
return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stopAt = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.
The above solution was helpful in getting tweets by hashtag, even though there is a small error while defining the getTweetByHashtag function. YOu had used Listener.stopAt instead of Listener.stop_at=stop_at_number.
I have tweaked the code a little bit, so you can easily kill the code for a specified number of seconds.
defined new functions init to help tweak the seconds and "on_data" which contains more information that on_status function.
Enjoy:
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def __init__(self, time_limit=8):
self.start_time = time.time()
self.limit = time_limit
super(Listener, self).__init__()
def on_data(self, data):
Listener.tweet_counter += 1
if (time.time() - self.start_time) < self.limit and Listener.tweet_counter < Listener.stop_at:
print(str(Listener.tweet_counter)+data)
return True
else:
print("Either Max number reached or time limit up at:"+ str(Listener.tweet_counter)+" outputs")
self.saveFile.close()
return False
#def on_status(self, status):
#Listener.tweet_counter += 1
#print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
#%(status.author.screen_name, status.text.replace('\n', ' ')))
#if Listener.tweet_counter < Listener.stop_at and (time.time() - self.start_time) < self.limit:
#return True
#else:
#print('Max num reached or time elapsed= ' + str(Listener.tweet_counter))
#return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stop_at = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
#listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.
listener.getTweetsByHashtag(1000,"hi")
You can change the 1000 value to the max tweets you want and the "hi" to the keyword you need find.. Under the init function, change the 8 time_limit to the value you want in seconds. So you use it depending on what you want.
You can either set limited time and adjust the count to a very high value, or set the count of tweets needed and give a higher time value, so it can get to the count. Your choice!
Chukwu Gozie unu (God bless!)
since Twitter changed their APi the script I have to control a prototype vending unit doesn't work anymore, and the developer who wrote the script has moved on to greener pastures.
The script scans Twitter once every 15 seconds searching for the most recent tweet that contains a specified hashtag (currently set to #sunshine) and it also filters out any retweets.
When it has identified a new tweet, it sends a signal to an Arduino which triggers a solenoid to dispense a free product sample (currently sunscreen)
this line of code appears to be the problem/obsolete:
j =json.loads(urllib.urlopen('http://search.twitter.com/search.json?q='+searchTerm+'&result_type=recent&rpp=1&filter:retweets').read())
I've registered a Developer account with Twitter, so I have the consumer secret and token codes etc. but I still don't know how to modify the old code with these OAuth codes to get it working again. I have reproduced the code in full below. Can anyone please help me and show me the way to get this script working again.
import twitter
import json
import urllib
from pprint import pprint
import time
from arduino import Arduino
##################SETUP AS REQUIRED###########################
##############################################################
#Change to suit the sample, currently at 0.2 of a second #
vendtime = 0.2 #
#
#Delay Time Between each Search (never below 15 seconds) #
delayTime = 15 #
#This is the search term for the URL. (%23 = #) #
searchTerm = '%23sunshine' #
#
A = Arduino('COM3') #This will need to be COM3 #
A.output([12]) #Output on pin 12 #
A.output([13]) #to keep serial in use #
##############################################################
#to collect the first tweet without vending
countTweet = 0
#To test Twitter for consistancy
tweet= 0
noTweet= 0
#the infinite loop
while True:
#j contains the JSON we load from the URL
j =json.loads(urllib.urlopen('http://search.twitter.com/search.json?q='+searchTerm+'&result_type=recent&rpp=1&filter:retweets').read())
#Debug JSON from twitter (for faults on the Twitter end or possible GET limit id below 15 seconds per request)
#pprint(j) #needed for debugging only
#find the text and the tweet id
if 'results' in j and j['results']:
text = j['results'][0]['text']
id = j['results'][0]['id']
#how many times the Json is complete
tweet+= 1
else:
#How many times the Json is incomplete (sometimes twitter malfunctions. About 0.1 in 100 are broken)
noTweet += 1
#print the text and id to the screen
pprint(text) #needed for debugging only
pprint(id) #needed for debugging only
#to get the existing tweet from before we power on, if the first ID has been stored already (count == 1)
if countTweet != 0: #if countTweet is not equal to 0 then it's not the first tweet
#pprint ("new loop") #needed for debugging only
#if lastID is not equal to ID
if lastID != id:
#Tell Arduino to Vend
#pin 12 HIGH
A.setHigh(12)
#Sleep for the time specified in vendtime
time.sleep(vendtime)
#pin 12 LOW
A.setLow(12)
#Display the tweet that triggered the vend
#pprint(text) #needed for debugging only
#pprint(id) #needed for debugging only
#Make lastID equal to ID so that next time we can compare it
lastID = id
#pprint ('lastID updated') #needed for debugging only
#if no new tweets, print
else: #needed for debugging only
pprint ('no new tweets') #needed for debugging only
#If it's the first loop, confirm by printing to the screen
else:
pprint("First loop complete")
pprint(text)
pprint(id)
lastID = id
pprint(lastID)
countTweet += 1 #Add 1 to countTweet
pprint ('Number of Tweets')
pprint (countTweet)
pprint('Working JSON')
pprint(tweet)
pprint('Broken JSON')
pprint(noTweet)
pprint('waiting')
A.setHigh(13)
time.sleep(delayTime)
A.setLow(13)
The code you posted didn't even use the twitter library. The code below has been reworked and actually uses the twitter library but you still need to put the twitter keys into the code.
from twitter import *
import time
from arduino import Arduino
##################SETUP AS REQUIRED###########################
##############################################################
#Change to suit the sample, currently at 0.2 of a second #
vendtime = 0.2 #
#
#Delay Time Between each Search (never below 15 seconds) #
delayTime = 15 #
#This is the search term #
searchTerm = "#sunshine" #
#
A = Arduino("COM3") #This will need to be COM3 #
A.output([12]) #Output on pin 12 #
A.output([13]) #to keep serial in use #
##############################################################
# Twitter keys
OAUTH_TOKEN = "" # Access token
OAUTH_SECRET = "" # Access token secret
CONSUMER_KEY = "" # Consumer key
CONSUMER_SECRET = "" # Consumer secret
#to collect the first tweet without vending
first_tweet = True
#To test Twitter for consistancy
tweet= 0
notweet= 0
# Start Twitter session
t = Twitter\
(
auth = OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
)
#the infinite loop
while True:
# Print stats
print("Number of Tweets: %d" % (tweet + notweet))
print("Working JSON: %d" % tweet)
print("Broken JSON: %d" % notweet)
# Perform search
search_results = t.search.tweets(q = searchTerm, _timeout = 60)
#find the text and the tweet id
tweet_failed = True
if search_results:
if search_results.has_key("statuses"):
statuses = search_results["statuses"]
if statuses:
# Select first result
status = statuses[0]
if not bool(set(["id", "text"]) - set(status.keys())):
tweet_failed = False
tweet_text = status["text"]
tweet_id = status["id"]
#how many times the Json is complete
tweet+= 1
if tweet_failed:
#How many times the Json is incomplete (sometimes twitter malfunctions. About 0.1 in 100 are broken)
notweet += 1
continue
else:
if first_tweet:
first_tweet = False
print("First loop complete")
else:
#if last_id is not equal to tweet_id
if last_id != tweet_id:
#Tell Arduino to Vend
#pin 12 HIGH
A.setHigh(12)
#Sleep for the time specified in vendtime
time.sleep(vendtime)
#pin 12 LOW
A.setLow(12)
#Make last_id equal to ID so that next time we can compare it
last_id = tweet_id
#Display the tweet that triggered the vend
print("Tweet: %s" % tweet_text)
print("Id: %d" % tweet_id)
print("waiting")
A.setHigh(13)
time.sleep(delayTime)
A.setLow(13)