I tried to get any tweet that contain images. But when I get tweet data in line if media in data.entities:, I get error AttributeError: str object has no attribute entities.
I tried adding to the line
twitterStream = Stream (auth, listener (), include_entities = 1)
but it does not work either
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import pprint
consumer_key = "xxxxxxxxxxxxx"
consumer_secret = "xxxxxxxxxxxxxxxxx"
access_token = "xxxxxxxxxxxxxxxxxx"
access_secret = "xxxxxxxxxxxx"
class listener(StreamListener):
def on_data(self, data):
if 'media' in data.entities:
print(data)
#for image in data.extended_entities['media']:
#print(image['media_url'])
#return(True)
def on_error(self, status):
print ("error")
print (status)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
tweets=twitterStream.filter(track=["#picture"])
Tweepy passes the raw text data to tweepy.StreamListener's on_data() method, which is used for handling the raw data from API (so you need to parse JSON string and construst tweepy.Status object).
If you handle normal status objects, you'd better use on_status() method but on_data(). This method takes Tweepy's normal Status object as an argument, so you can use this status object as usual.
So following code
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print('#on_status')
print(type(status))
print(status.text)
def on_error(self, error_code):
print('#on_error')
print(error_code)
if error_code == 420:
return False
stream = tweepy.Stream(api.auth, MyStreamListener())
stream.filter(track=["#picture"])
will print like this:
#on_status
<class 'tweepy.models.Status'>
test1! #picture
#on_status
<class 'tweepy.models.Status'>
This is test picture tweet2! #picture
See also: Streaming With Tweepy — tweepy 3.6.0 documentation
Related
I'm setting a streaming listener, and then filter the tweets by a specific keyword and the location bounding box, and I want to save the filtering result into a json file.
But I found that all the result from streaming listener is in the json file, not just the filtered results.
I think it probably because the 'save json file' code is in the class Mystreamlistener, and the filtering code is behind it.
But I don't know how to revise my code. Here is my code:
This is my first code:
try:
import json
except ImportError:
import simplejson as json
import tweepy, sys
from time import sleep
import csv
consumer_key = 'XX'
consumer_secret = 'XX'
access_token = 'XX'
access_token_secret = 'XX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
box = [-178.2,6.6,-49.0,83.3]
import tweepy
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text.encode('utf-8'))
with open('government.json', 'a') as f:
tweet=str(status.user)
nPos=tweet.index("_json=")
tweet=tweet[nPos+6:]
ePos=tweet.index("id=")
tweet=tweet[:ePos-2]
f.write(tweet+'\n')
def on_error(self, status_code):
if status_code == 420:
#returning False in on_data disconnects the stream
return False
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(api.auth, listener=myStreamListener)
myStream.filter(track=['trump'], locations=(box))
I already tried the answer from Ajeet Khan in question:
How to save a tweepy Twitter stream to a file?
But I don't know how to call the class, the second code is what I tried according to Ajeet Khan's answer.
try:
import json
except ImportError:
import simplejson as json
import tweepy, sys
from time import sleep
import csv
consumer_key = 'XX'
consumer_secret = 'XX'
access_token = 'XX'
access_token_secret = 'XX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
box = [-178.2,6.6,-49.0,83.3]
import tweepy
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text.encode('utf-8'))
def on_error(self, status_code):
if status_code == 420:
#returning False in on_data disconnects the stream
return False
class StdOutListener(tweepy.StreamListener):
def on_data(self, status):
#print data
with open('fetched_tweets.txt','a') as tf:
tf.write(status)
return True
def on_error(self, status):
print(status)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(api.auth, listener=myStreamListener)
myStream.filter(track=['trump'], locations=(box))
StdOutListener()
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import pw
import json
access_token = pw.access_token
access_token_secret = pw.access_token_secret
consumer_key = pw.consumer_key
consumer_secret = pw.consumer_secret
class StdOutListener(StreamListener):
def on_data(self, data):
dicto = json.loads(data)
print(dicto['user'])
return True
def on_error(self, status):
print(status)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, StdOutListener())
stream.filter(track="music")
So I just followed a simple tutorial to process tweets. I'm just trying to parse the tweets, however when I use print(dicto['user']) it prints some and then throws a key error. The weird thing is that it's always after the 48th one. If I simply print the whole dictionary then it happily prints away. Right after the 48th piece of information is says traceback (most recent call last).
I'm a little confused what's going on here.
So I have the following python code which receives notification of received direct messages via Tweepy:
#!/usr/bin/env python
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import simplejson as json
consumer_key = "secret"
consumer_secret = "secret"
access_token = "secret"
access_token_secret = "secret"
class StdOutListener(StreamListener):
def __init__(self):
print "init"
def on_connect(self):
print "Connected"
def on_disconnect(self, status):
print ("Disconnected", status)
def on_direct_message(self, status):
print ("on_direct_message", status)
def on_data(self, status):
# print ("on_data", status)
decoded = json.loads(status)
## grab the direct message
directMessage = decoded['direct_message']
message = directMessage.get('text', None)
message.strip()
print "message:*", message, "*"
return True
def on_error(self, status):
print ("on_error", status)
if __name__ == '__main__':
## Connect to Twitter
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
## Init the StreamListener
print ("Init Stream")
l = StdOutListener()
stream = Stream(auth, l)
stream.userstream()
When it runs and I send the direct message "test message" I get the following printed to the console:
message:* text message *
ie the message was received and parsed but padded with space at either end which message.strip() didn't even correct. If I uncomment the print directMessage line and view the json sent by Twitter there is no space.
I cannot work out if there is a problem with my JSON editing or usage of tweepy or something else.
I've also tried using the json package as well as simplejson.
I have a class MyStreamListener that I'm trying to call from a different file, but I get the type error 'MyStreamListener' not callable. From what I've read when referencing user made classes, it could be because I'm trying to access a reserved keyword in python, but I've already tried changing the name of the class. Is there anything else I'm doing wrong?
functionality.py
from authenticate import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
from twitter_stream import MyStreamListener
def oauth_authenticate():
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
return api
def streaming():
api = oauth_authenticate()
streamListener = MyStreamListener()
stream = tweepy.Stream(auth=api.auth, listener=streamListener())
if __name__ == '__main__':
print "wanting to stream"
streaming()
print "EXITING"
twitter_stream.py
import tweepy
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status.text)
In the line:
stream = tweepy.Stream(auth = api.auth, listener = streamListener())
you are trying to call streamListener since you've got the parens there. Instead, just pass the object itself, i.e.:
stream = tweepy.Stream(auth=api.auth, listener=streamListener)
I'm trying to access the Twitter stream which I had working previously while improperly using Tweepy. Now that I understand how Tweepy is intended to be used I wrote the following Stream.py module. When I run it, I get error code 401 which tells me my auth has been rejected. But I had it working earlier with the same consumer token and secret. Any ideas?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import TweepError
from tweepy import error
#Removed. I have real keys and tokens
consumer_key = "***"
consumer_secret = "***"
access_token="***"
access_token_secret="***"
class CustomListener(StreamListener):
""" A listener handles tweets are the received from the stream.
This is a basic listener that just prints received tweets to stdout."""
def on_status(self, status):
# Do things with the post received. Post is the status object.
print status.text
return True
def on_error(self, status_code):
# If error thrown during streaming.
# Check here for meaning:
# https://dev.twitter.com/docs/error-codes-responses
print "ERROR: ",; print status_code
return True
def on_timeout(self):
# If no post received for too long
return True
def on_limit(self, track):
# If too many posts match our filter criteria and only a subset is
# sent to us
return True
def filter(self, track_list):
while True:
try:
self.stream.filter(track=track_list)
except error.TweepError as e:
raise TweepError(e)
def go(self):
listener = CustomListener()
auth = OAuthHandler(consumer_key, consumer_secret)
self.stream = Stream(auth,listener,timeout=3600)
listener.filter(['LOL'])
if __name__ == '__main__':
go(CustomListener)
For anyone who happens to have the same issue, I should have added this line after auth was initialized:
auth.set_access_token(access_token, access_token_secret)