I am trying to set up tweepy in python but it doesn't execute the 'on_status' part. Please help me on how to solve this. I am new to this
import tweepy
class MyStreamListener(tweepy.Stream):
def on_status(self, status):
# process tweets on arrival
print(status.text)
def on_error(self, status_code):
# handles streaming errors
print(f"Encountered streaming error ({status_code})")
def main():
consumer_key = "----"
consumer_secret = "---"
access_token = "---"
access_token_secret = "---"
stream = MyStreamListener(consumer_key,consumer_secret,access_token, access_token_secret)
# filter tweets by keywords
keywords = ['coronavirus', 'covid19', 'covid vaccine']
stream.filter(track=keywords, stall_warnings=True)
if __name__ == "__main__":
main()
Related
I'm setting a streaming listener, and then filter the tweets by a specific keyword and the location bounding box, and I want to save the filtering result into a json file.
But I found that all the result from streaming listener is in the json file, not just the filtered results.
I think it probably because the 'save json file' code is in the class Mystreamlistener, and the filtering code is behind it.
But I don't know how to revise my code. Here is my code:
This is my first code:
try:
import json
except ImportError:
import simplejson as json
import tweepy, sys
from time import sleep
import csv
consumer_key = 'XX'
consumer_secret = 'XX'
access_token = 'XX'
access_token_secret = 'XX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
box = [-178.2,6.6,-49.0,83.3]
import tweepy
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text.encode('utf-8'))
with open('government.json', 'a') as f:
tweet=str(status.user)
nPos=tweet.index("_json=")
tweet=tweet[nPos+6:]
ePos=tweet.index("id=")
tweet=tweet[:ePos-2]
f.write(tweet+'\n')
def on_error(self, status_code):
if status_code == 420:
#returning False in on_data disconnects the stream
return False
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(api.auth, listener=myStreamListener)
myStream.filter(track=['trump'], locations=(box))
I already tried the answer from Ajeet Khan in question:
How to save a tweepy Twitter stream to a file?
But I don't know how to call the class, the second code is what I tried according to Ajeet Khan's answer.
try:
import json
except ImportError:
import simplejson as json
import tweepy, sys
from time import sleep
import csv
consumer_key = 'XX'
consumer_secret = 'XX'
access_token = 'XX'
access_token_secret = 'XX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
box = [-178.2,6.6,-49.0,83.3]
import tweepy
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text.encode('utf-8'))
def on_error(self, status_code):
if status_code == 420:
#returning False in on_data disconnects the stream
return False
class StdOutListener(tweepy.StreamListener):
def on_data(self, status):
#print data
with open('fetched_tweets.txt','a') as tf:
tf.write(status)
return True
def on_error(self, status):
print(status)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(api.auth, listener=myStreamListener)
myStream.filter(track=['trump'], locations=(box))
StdOutListener()
I am currently writing a python program that utilizes Tweepy & the Twitter API, and extracts URI links from tweets on twitter.
This is currently my code. How do I modify it so that it only outputs the URIs from tweets(if there is one included)?
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
#Variables that contains the user credentials to access Twitter API
access_token = "-"
access_token_secret = ""
consumer_key = ""
consumer_secret = ""
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keyword: '#NFL'
twitterator = stream.filter(track=[ '#NFL' ])
for tweet in twitterator:
print "(%s) #%s %s" % (tweet["created_at"], tweet["user"]["screen_name"], tweet["text"])
for url in tweet["entities"]["urls"]:
print " - found URL: %s" % url["expanded_url"]
I've modified your code to only print URLs if present:
#Import the necessary methods from tweepy library
import json
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
#Variables that contains the user credentials to access Twitter API
access_token = "-"
access_token_secret = ""
consumer_key = ""
consumer_secret = ""
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
tweet = json.loads(data)
for url in tweet["entities"]["urls"]:
print " - found URL: %s" % url["expanded_url"]
return True
def on_error(self, status):
print status
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keyword: '#NFL'
stream.filter(track=[ '#NFL' ])
I'm running the following code to get tweets related to the below keywords.
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
access_token = "K"
access_token_secret = ""
consumer_key = ""
consumer_secret = ""
class StdOutListener(StreamListener):
def on_data(self, data):
print data
return True
def on_error(self, status):
print status.text
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(track=["flyspicejet","jetairways","airindiain","goairlinesindia","airvistara","spicejet","airindia","goairlines","vistara"])
However I ran this for a few hours but got no results!How can it be possible that not even a single tweet with these keywords(popular airlines in India) was published?
What am I doing wrong?
Thanks a lot in advance.
You're not handing the error cases properly. Try the following code:
Python 3 version:
import time
import argparse
import os
import sys
import json
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from http.client import IncompleteRead
access_token = "acc token here"
access_token_secret = "acc token secret"
consumer_key = "consumer key"
consumer_secret = "consumer secret here"
class StdOutListener(StreamListener):
'''A basic listener that just writes received tweets to file.'''
def __init__(self, outputfile):
self.file = outputfile
def on_data(self, data):
with open(self.file, 'a') as ofile:
#simply write everything to file as raw json
ofile.write(data)
return True
def on_error(self, status):
print(status)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
requiredArgs = parser.add_argument_group('must need arguments')
requiredArgs.add_argument('-o', '--output', help='Output txt file to write tweets', required=True)
args = parser.parse_args()
filepath = os.getcwd() + os.path.sep + args.output
if os.path.exists(filepath):
sys.exit("output file already exists; Give new filename!")
else:
#create an empty file
open(args.output,'a').close()
l = StdOutListener(args.output)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
keyword_list = ['Computer Science', 'Cancer', 'Germany', 'Paris']
while True:
try:
stream = Stream(auth, l)
stream.filter(track=keyword_list, stall_warnings=True)
except IncompleteRead as e:
# Oh well, sleep sometime & reconnect and keep trying again
time.sleep(15)
continue
except KeyboardInterrupt:
stream.disconnect()
break
So I have the following python code which receives notification of received direct messages via Tweepy:
#!/usr/bin/env python
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import simplejson as json
consumer_key = "secret"
consumer_secret = "secret"
access_token = "secret"
access_token_secret = "secret"
class StdOutListener(StreamListener):
def __init__(self):
print "init"
def on_connect(self):
print "Connected"
def on_disconnect(self, status):
print ("Disconnected", status)
def on_direct_message(self, status):
print ("on_direct_message", status)
def on_data(self, status):
# print ("on_data", status)
decoded = json.loads(status)
## grab the direct message
directMessage = decoded['direct_message']
message = directMessage.get('text', None)
message.strip()
print "message:*", message, "*"
return True
def on_error(self, status):
print ("on_error", status)
if __name__ == '__main__':
## Connect to Twitter
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
## Init the StreamListener
print ("Init Stream")
l = StdOutListener()
stream = Stream(auth, l)
stream.userstream()
When it runs and I send the direct message "test message" I get the following printed to the console:
message:* text message *
ie the message was received and parsed but padded with space at either end which message.strip() didn't even correct. If I uncomment the print directMessage line and view the json sent by Twitter there is no space.
I cannot work out if there is a problem with my JSON editing or usage of tweepy or something else.
I've also tried using the json package as well as simplejson.
I'm trying to access the Twitter stream which I had working previously while improperly using Tweepy. Now that I understand how Tweepy is intended to be used I wrote the following Stream.py module. When I run it, I get error code 401 which tells me my auth has been rejected. But I had it working earlier with the same consumer token and secret. Any ideas?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import TweepError
from tweepy import error
#Removed. I have real keys and tokens
consumer_key = "***"
consumer_secret = "***"
access_token="***"
access_token_secret="***"
class CustomListener(StreamListener):
""" A listener handles tweets are the received from the stream.
This is a basic listener that just prints received tweets to stdout."""
def on_status(self, status):
# Do things with the post received. Post is the status object.
print status.text
return True
def on_error(self, status_code):
# If error thrown during streaming.
# Check here for meaning:
# https://dev.twitter.com/docs/error-codes-responses
print "ERROR: ",; print status_code
return True
def on_timeout(self):
# If no post received for too long
return True
def on_limit(self, track):
# If too many posts match our filter criteria and only a subset is
# sent to us
return True
def filter(self, track_list):
while True:
try:
self.stream.filter(track=track_list)
except error.TweepError as e:
raise TweepError(e)
def go(self):
listener = CustomListener()
auth = OAuthHandler(consumer_key, consumer_secret)
self.stream = Stream(auth,listener,timeout=3600)
listener.filter(['LOL'])
if __name__ == '__main__':
go(CustomListener)
For anyone who happens to have the same issue, I should have added this line after auth was initialized:
auth.set_access_token(access_token, access_token_secret)