since Twitter changed their APi the script I have to control a prototype vending unit doesn't work anymore, and the developer who wrote the script has moved on to greener pastures.
The script scans Twitter once every 15 seconds searching for the most recent tweet that contains a specified hashtag (currently set to #sunshine) and it also filters out any retweets.
When it has identified a new tweet, it sends a signal to an Arduino which triggers a solenoid to dispense a free product sample (currently sunscreen)
this line of code appears to be the problem/obsolete:
j =json.loads(urllib.urlopen('http://search.twitter.com/search.json?q='+searchTerm+'&result_type=recent&rpp=1&filter:retweets').read())
I've registered a Developer account with Twitter, so I have the consumer secret and token codes etc. but I still don't know how to modify the old code with these OAuth codes to get it working again. I have reproduced the code in full below. Can anyone please help me and show me the way to get this script working again.
import twitter
import json
import urllib
from pprint import pprint
import time
from arduino import Arduino
##################SETUP AS REQUIRED###########################
##############################################################
#Change to suit the sample, currently at 0.2 of a second #
vendtime = 0.2 #
#
#Delay Time Between each Search (never below 15 seconds) #
delayTime = 15 #
#This is the search term for the URL. (%23 = #) #
searchTerm = '%23sunshine' #
#
A = Arduino('COM3') #This will need to be COM3 #
A.output([12]) #Output on pin 12 #
A.output([13]) #to keep serial in use #
##############################################################
#to collect the first tweet without vending
countTweet = 0
#To test Twitter for consistancy
tweet= 0
noTweet= 0
#the infinite loop
while True:
#j contains the JSON we load from the URL
j =json.loads(urllib.urlopen('http://search.twitter.com/search.json?q='+searchTerm+'&result_type=recent&rpp=1&filter:retweets').read())
#Debug JSON from twitter (for faults on the Twitter end or possible GET limit id below 15 seconds per request)
#pprint(j) #needed for debugging only
#find the text and the tweet id
if 'results' in j and j['results']:
text = j['results'][0]['text']
id = j['results'][0]['id']
#how many times the Json is complete
tweet+= 1
else:
#How many times the Json is incomplete (sometimes twitter malfunctions. About 0.1 in 100 are broken)
noTweet += 1
#print the text and id to the screen
pprint(text) #needed for debugging only
pprint(id) #needed for debugging only
#to get the existing tweet from before we power on, if the first ID has been stored already (count == 1)
if countTweet != 0: #if countTweet is not equal to 0 then it's not the first tweet
#pprint ("new loop") #needed for debugging only
#if lastID is not equal to ID
if lastID != id:
#Tell Arduino to Vend
#pin 12 HIGH
A.setHigh(12)
#Sleep for the time specified in vendtime
time.sleep(vendtime)
#pin 12 LOW
A.setLow(12)
#Display the tweet that triggered the vend
#pprint(text) #needed for debugging only
#pprint(id) #needed for debugging only
#Make lastID equal to ID so that next time we can compare it
lastID = id
#pprint ('lastID updated') #needed for debugging only
#if no new tweets, print
else: #needed for debugging only
pprint ('no new tweets') #needed for debugging only
#If it's the first loop, confirm by printing to the screen
else:
pprint("First loop complete")
pprint(text)
pprint(id)
lastID = id
pprint(lastID)
countTweet += 1 #Add 1 to countTweet
pprint ('Number of Tweets')
pprint (countTweet)
pprint('Working JSON')
pprint(tweet)
pprint('Broken JSON')
pprint(noTweet)
pprint('waiting')
A.setHigh(13)
time.sleep(delayTime)
A.setLow(13)
The code you posted didn't even use the twitter library. The code below has been reworked and actually uses the twitter library but you still need to put the twitter keys into the code.
from twitter import *
import time
from arduino import Arduino
##################SETUP AS REQUIRED###########################
##############################################################
#Change to suit the sample, currently at 0.2 of a second #
vendtime = 0.2 #
#
#Delay Time Between each Search (never below 15 seconds) #
delayTime = 15 #
#This is the search term #
searchTerm = "#sunshine" #
#
A = Arduino("COM3") #This will need to be COM3 #
A.output([12]) #Output on pin 12 #
A.output([13]) #to keep serial in use #
##############################################################
# Twitter keys
OAUTH_TOKEN = "" # Access token
OAUTH_SECRET = "" # Access token secret
CONSUMER_KEY = "" # Consumer key
CONSUMER_SECRET = "" # Consumer secret
#to collect the first tweet without vending
first_tweet = True
#To test Twitter for consistancy
tweet= 0
notweet= 0
# Start Twitter session
t = Twitter\
(
auth = OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
)
#the infinite loop
while True:
# Print stats
print("Number of Tweets: %d" % (tweet + notweet))
print("Working JSON: %d" % tweet)
print("Broken JSON: %d" % notweet)
# Perform search
search_results = t.search.tweets(q = searchTerm, _timeout = 60)
#find the text and the tweet id
tweet_failed = True
if search_results:
if search_results.has_key("statuses"):
statuses = search_results["statuses"]
if statuses:
# Select first result
status = statuses[0]
if not bool(set(["id", "text"]) - set(status.keys())):
tweet_failed = False
tweet_text = status["text"]
tweet_id = status["id"]
#how many times the Json is complete
tweet+= 1
if tweet_failed:
#How many times the Json is incomplete (sometimes twitter malfunctions. About 0.1 in 100 are broken)
notweet += 1
continue
else:
if first_tweet:
first_tweet = False
print("First loop complete")
else:
#if last_id is not equal to tweet_id
if last_id != tweet_id:
#Tell Arduino to Vend
#pin 12 HIGH
A.setHigh(12)
#Sleep for the time specified in vendtime
time.sleep(vendtime)
#pin 12 LOW
A.setLow(12)
#Make last_id equal to ID so that next time we can compare it
last_id = tweet_id
#Display the tweet that triggered the vend
print("Tweet: %s" % tweet_text)
print("Id: %d" % tweet_id)
print("waiting")
A.setHigh(13)
time.sleep(delayTime)
A.setLow(13)
Related
I'm trying to get the latest 100 posts from my giphy user.
It works for accounts like "giphy" and "spongebob"
But not for "jack0_o"
import requests
def get_user_gifs(username):
api_key = "API_KEY"
limit = 25 # The number of GIFs to retrieve per request (max 25)
offset = 0
# Set a flag to indicate when all GIFs have been retrieved
done = False
# Keep making requests until all GIFs have been retrieved
while not done:
# Make the request to the Giphy API
endpoint = f"https://api.giphy.com/v1/gifs/search?api_key={api_key}&q={username}&limit={limit}&offset={offset}&sort=recent"
response = requests.get(endpoint)
data = response.json()
# Extract the GIF URLs from the data and print them one per line
for gif in data["data"]:
print(gif["url"])
# Update the starting index for the next batch of GIFs
offset += limit
# Check if there are more GIFs to retrieve
if len(data["data"]) < limit or offset >= 100:
done = True
get_user_gifs("spongebob") #WORKS
get_user_gifs("jack0_o") #does not work
Already tried adding ratings with "pg", "r", "g"
I am trying to extract tweet locations from a specific area with python using tweepy + writing it into a csv-file.
I am not very much into python but I could manage to put together the following sript which kind of works:
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#Enter Twitter API Key information
consumer_key = 'cons_key'
consumer_secret = 'cons_secret'
access_token = 'acc_token'
access_secret = 'acc-secret'
file = open("C:\Python27\Output2.csv", "w")
file.write("X,Y\n")
data_list = []
count = 0
class listener(StreamListener):
def on_data(self, data):
global count
#How many tweets you want to find, could change to time based
if count <= 100:
json_data = json.loads(data)
coords = json_data["coordinates"]
if coords is not None:
print coords["coordinates"]
lon = coords["coordinates"][0]
lat = coords["coordinates"][1]
data_list.append(json_data)
file.write(str(lon) + ",")
file.write(str(lat) + "\n")
count += 1
return True
else:
file.close()
return False
def on_error(self, status):
print status
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
#What you want to search for here
twitterStream.filter(locations=[11.01,47.85,12.09,48.43])
the problem is, that it extracts the coordinates very slowly (like 10 entries per 30 minutes). Would there be a way to make this faster?
How can I add the timestamps for each tweet?
Is there way to make sure to retrieve all tweets possible for the specific region (I guess the max is all tweets of the past week)?
thanks very much in advance!
Twitter’s standard streaming API provides a 1% sample of all the Tweets posted. In addition, very few Tweets have location data added to them. So, I’m not surprised that you’re only getting a small number of Tweets in a 30 minute timespan for one specific bounding box. The only way to improve the volume would be to pay for the enterprise PowerTrack API.
Tweets all contain a created_at value which is the time stamp you’ll want to record.
I'm trying to take every open tweets in a hashtag but my code does not go further than 299 tweets.
I also trying to take tweets from a specific time line like tweets only in May 2015 and July 2016. Are there any way to do it in the main process or should I write a little code for it?
Here is my code:
# if this is the first time, creates a new array which
# will store max id of the tweets for each keyword
if not os.path.isfile("max_ids.npy"):
max_ids = np.empty(len(keywords))
# every value is initialized as -1 in order to start from the beginning the first time program run
max_ids.fill(-1)
else:
max_ids = np.load("max_ids.npy") # loads the previous max ids
# if there is any new keywords added, extends the max_ids array in order to correspond every keyword
if len(keywords) > len(max_ids):
new_indexes = np.empty(len(keywords) - len(max_ids))
new_indexes.fill(-1)
max_ids = np.append(arr=max_ids, values=new_indexes)
count = 0
for i in range(len(keywords)):
since_date="2015-01-01"
sinceId = None
tweetCount = 0
maxTweets = 5000000000000000000000 # maximum tweets to find per keyword
tweetsPerQry = 100
searchQuery = "#{0}".format(keywords[i])
while tweetCount < maxTweets:
if max_ids[i] < 0:
if (not sinceId):
new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
else:
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
since_id=sinceId)
else:
if (not sinceId):
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
max_id=str(max_ids - 1))
else:
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
max_id=str(max_ids - 1),
since_id=sinceId)
if not new_tweets:
print("Keyword: {0} No more tweets found".format(searchQuery))
break
for tweet in new_tweets:
count += 1
print(count)
file_write.write(
.
.
.
)
item = {
.
.
.
.
.
}
# instead of using mongo's id for _id, using tweet's id
raw_data = tweet._json
raw_data["_id"] = tweet.id
raw_data.pop("id", None)
try:
db["Tweets"].insert_one(item)
except pymongo.errors.DuplicateKeyError as e:
print("Already exists in 'Tweets' collection.")
try:
db["RawTweets"].insert_one(raw_data)
except pymongo.errors.DuplicateKeyError as e:
print("Already exists in 'RawTweets' collection.")
tweetCount += len(new_tweets)
print("Downloaded {0} tweets".format(tweetCount))
max_ids[i] = new_tweets[-1].id
np.save(arr=max_ids, file="max_ids.npy") # saving in order to continue mining from where left next time program run
Have a look at this: https://tweepy.readthedocs.io/en/v3.5.0/cursor_tutorial.html
And try this:
import tweepy
auth = tweepy.OAuthHandler(CONSUMER_TOKEN, CONSUMER_SECRET)
api = tweepy.API(auth)
for tweet in tweepy.Cursor(api.search, q='#python', rpp=100).items():
# Do something
pass
In your case you have a max number of tweets to get, so as per the linked tutorial you could do:
import tweepy
MAX_TWEETS = 5000000000000000000000
auth = tweepy.OAuthHandler(CONSUMER_TOKEN, CONSUMER_SECRET)
api = tweepy.API(auth)
for tweet in tweepy.Cursor(api.search, q='#python', rpp=100).items(MAX_TWEETS):
# Do something
pass
If you want tweets after a given ID, you can also pass that argument.
Sorry, I can't answer in comment, too long. :)
Sure :) Check this example:
Advanced searched for #data keyword 2015 may - 2016 july
Got this url: https://twitter.com/search?l=&q=%23data%20since%3A2015-05-01%20until%3A2016-07-31&src=typd
session = requests.session()
keyword = 'data'
date1 = '2015-05-01'
date2 = '2016-07-31'
session.get('https://twitter.com/search?l=&q=%23+keyword+%20since%3A+date1+%20until%3A+date2&src=typd', streaming = True)
Now we have all the requested tweets,
Probably you could have problems with 'pagination'
Pagination url ->
https://twitter.com/i/search/timeline?vertical=news&q=%23data%20since%3A2015-05-01%20until%3A2016-07-31&src=typd&include_available_features=1&include_entities=1&max_position=TWEET-759522481271078912-759538448860581892-BD1UO2FFu9QAAAAAAAAETAAAAAcAAAASAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&reset_error_state=false
Probably you could put a random tweet id, or you can parse first, or requests some data from twitter. It can be done.
Use Chrome's networking tab to find all the requested information :)
This code worked for me.
import tweepy
import pandas as pd
import os
#Twitter Access
auth = tweepy.OAuthHandler( 'xxx','xxx')
auth.set_access_token('xxx-xxx','xxx')
api = tweepy.API(auth,wait_on_rate_limit = True)
df = pd.DataFrame(columns=['text', 'source', 'url'])
msgs = []
msg =[]
for tweet in tweepy.Cursor(api.search, q='#bmw', rpp=100).items(10):
msg = [tweet.text, tweet.source, tweet.source_url]
msg = tuple(msg)
msgs.append(msg)
df = pd.DataFrame(msgs)
Check twitter api documentation, probably it allows just 300 tweets to parse.
I would recommend to forget api, make it with requests with streaming. The api is an implementation of requests with limitations.
I am using Tweepy to capture streaming tweets based off of the hashtag #WorldCup, as seen by the code below. It works as expected.
class StdOutListener(StreamListener):
''' Handles data received from the stream. '''
def on_status(self, status):
# Prints the text of the tweet
print('Tweet text: ' + status.text)
# There are many options in the status object,
# hashtags can be very easily accessed.
for hashtag in status.entries['hashtags']:
print(hashtag['text'])
return true
def on_error(self, status_code):
print('Got an error with status code: ' + str(status_code))
return True # To continue listening
def on_timeout(self):
print('Timeout...')
return True # To continue listening
if __name__ == '__main__':
listener = StdOutListener()
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, listener)
stream.filter(follow=[38744894], track=['#WorldCup'])
Because this is a hot hashtag right now, searches don't take too long to catch the maximum amount of tweets that Tweepy lets you get in one transaction. However, if I was going to search on #StackOverflow, it might be much slower, and therefore, I'd like a way to kill the stream. I could do this on several parameters, such as stopping after 100 tweets, stopping after 3 minutes, after a text output file has reached 150 lines, etc. I do know that the socket timeout time isn't used to achieve this.
I have taken a look at this similar question:
Tweepy Streaming - Stop collecting tweets at x amount
However, it appears to not use the streaming API. The data that it collects is also very messy, whereas this text output is clean.
Can anyone suggest a way to stop Tweepy (when using the stream in this method), based on some user input parameter, besides a keyboard interrupt?
Thanks
I solved this, so I'm going to be one of those internet heroes that answers their own question.
This is achieved by using static Python variables for the counter and for the stop value (e.g. stop after you grab 20 tweets). This is currently a geolocation search, but you could easily swap it for a hashtag search by using the getTweetsByHashtag() method.
#!/usr/bin/env python
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def on_status(self, status):
Listener.tweet_counter += 1
print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
%(status.author.screen_name, status.text.replace('\n', ' ')))
if Listener.tweet_counter < Listener.stop_at:
return True
else:
print('Max num reached = ' + str(Listener.tweet_counter))
return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stopAt = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.
The above solution was helpful in getting tweets by hashtag, even though there is a small error while defining the getTweetByHashtag function. YOu had used Listener.stopAt instead of Listener.stop_at=stop_at_number.
I have tweaked the code a little bit, so you can easily kill the code for a specified number of seconds.
defined new functions init to help tweak the seconds and "on_data" which contains more information that on_status function.
Enjoy:
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def __init__(self, time_limit=8):
self.start_time = time.time()
self.limit = time_limit
super(Listener, self).__init__()
def on_data(self, data):
Listener.tweet_counter += 1
if (time.time() - self.start_time) < self.limit and Listener.tweet_counter < Listener.stop_at:
print(str(Listener.tweet_counter)+data)
return True
else:
print("Either Max number reached or time limit up at:"+ str(Listener.tweet_counter)+" outputs")
self.saveFile.close()
return False
#def on_status(self, status):
#Listener.tweet_counter += 1
#print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
#%(status.author.screen_name, status.text.replace('\n', ' ')))
#if Listener.tweet_counter < Listener.stop_at and (time.time() - self.start_time) < self.limit:
#return True
#else:
#print('Max num reached or time elapsed= ' + str(Listener.tweet_counter))
#return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stop_at = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
#listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.
listener.getTweetsByHashtag(1000,"hi")
You can change the 1000 value to the max tweets you want and the "hi" to the keyword you need find.. Under the init function, change the 8 time_limit to the value you want in seconds. So you use it depending on what you want.
You can either set limited time and adjust the count to a very high value, or set the count of tweets needed and give a higher time value, so it can get to the count. Your choice!
Chukwu Gozie unu (God bless!)
I am getting a 'TwythonRateLimitError' and want to be sure that I don't screw up my account. I am new to working with the Twitter API. How can I check to make sure that I am not going over my query limit? I read that it is 150 queries/hour... What happens if I do? Am I at a risk of this in my code or is it only for particular commands?
I am not building an app, I am just trying to get a specific sample for twitter (random set of users with similar following bases (7500 to 10000 followers). My code so far is below. I will be saving the successful hits to a file but I am waiting to be sure that is necessary.
from twython import Twython, TwythonError, TwythonRateLimitError
from random import randint
APP_KEY = 'redacted'
APP_SECRET = 'redacted'
ACCESS_TOKEN = 'redacted'
twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter.obtain_access_token()
twitter = Twython(APP_KEY,access_token=ACCESS_TOKEN)
print "hello twitterQuery\n"
count = 0
step = 0
isError = 0
try:
#new account i made today to set upper bound on userID
maxID = twitter.show_user(screen_name="query_test")['id']
except TwythonRateLimitError:
isError = 1
ids = [0,0,0,0,0,0,0,0,0,0]
if isError == 0 and step <= 150:
while count < 10:
step = step +1
randomID = randint(1,maxID)
isMissing = 0
print str(step) + " " + str(randomID)
try:
randomUserData = twitter.show_user(user_id=randomID)
except TwythonError:
isMissing = 1;
if isMissing == 0:
followers = randomUserData['followers_count']
if followers >= 7500 and followers <= 10000:
print "ID: " + str(randomID) +", followers: "+ str(followers)
ids[count] = randomID
count = count+1
print "\ndone"
for each id in ids:
print id
to see your current rate limit status, pass in your app token and send a GET request to
https://api.twitter.com/1.1/account/rate_limit_status.json
and query the response.
See this page for further context