Tweepy streaming in MongoDB with full_text - python

I need help to develop Python code, working with the library Tweepy and MongoDB. I cannot stream Tweets with full text. I don't know how to implement this code to this aim.
Please look at this code:
keywords = ['trump']
language = ['en']
analyzer = SentimentIntensityAnalyzer()
class StdOutListener(StreamListener):
def on_status(self, status):
if hasattr(self, status):
try:
tweet = status.retweted_status.extended_tweet["full_text"]
except:
tweet = status.retweeted_status.text
else:
try:
tweet = status.extended_tweet["full_text"]
except AttributeError:
tweet = status.text
def on_data(self, data):
t = json.loads(data)
tweet_id = t['id_str']
username = t['user']['screen_name']
followers = t['user']['followers_count']
tweet = unidecode(t['text'])
text = t['full_text']
hashtags = t['entities']['hashtags']
dt = t['created_at']
language = t['lang']
blob = analyzer.polarity_scores(tweet)
sentiment = blob['compound']
created = datetime.datetime.strptime(dt, '%a %b %d %H:%M:%S +0000 %Y')
tweet = {'id':tweet_id, 'username':username, 'followers':followers, 'text':text, 'hashtags':hashtags, 'language':language, 'created':created, 'sentiment':sentiment}
print (username + ':' + ' ' + text)
return True
def on_error(self, status_code):
if status_code == 420:
print (status)
return False
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l, tweet_mode="extended")
tweets = stream.filter(track=keywords, languages=language )
I've tried to do it in a bad way.. For example, I replaced text with full_text, but it doesn't work. I hope you can help me..
Thanks for you time!

if hasattr(self, status): will raise a TypeError, as hasattr requires a string input for attribute names.
You should refer to Tweepy's documentation on extended Tweets, which has an example very similar to what I think you're trying to do.

Related

How to get Tweets of a Keyword

I'm trying to get tweets from a certain keyword 'comfama'. but I can't seem to get any results. Is something wrong with my code? I'm tried with 'donald trump' and this keyword shows results but with 'comfama' nothing happens.
import tweepy
import pandas
import json # The API returns JSON formatted text
TRACKING_KEYWORDS = ['comfama']
OUTPUT_FILE = "comfama_tweets.txt"
TWEETS_TO_CAPTURE = 10
access_token = "xxx"
access_token_secret = "xxx"
consumer_key = "xxx"
consumer_secret = "xxx"
# Pass OAuth details to tweepy's OAuth handler
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
class MyStreamListener(tweepy.StreamListener):
"""
Twitter listener, collects streaming tweets and output to a file
"""
def __init__(self, api=None):
super(MyStreamListener, self).__init__()
self.num_tweets = 0
self.file = open(OUTPUT_FILE, "w")
def on_status(self, status):
tweet = status._json
self.file.write( json.dumps(tweet) + '\n' )
self.num_tweets += 1
# Stops streaming when it reaches the limit
if self.num_tweets <= TWEETS_TO_CAPTURE:
if self.num_tweets % 100 == 0: # just to see some progress...
print('Numer of tweets captured so far: {}'.format(self.num_tweets))
return True
else:
return False
self.file.close()
def on_error(self, status):
print(status)
# Initialize Stream listener
l = MyStreamListener()
# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)
# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=[TRACKING_KEYWORDS])

How many keywords is too many to put in a Tweepy filter while streaming live data

I have code similar to the code below and was wondering how many keywords I could put in the filter without denigrating performance. I realize the answer would depend on several factors affecting the computers performance such as processor speed, connection speed and the likes from the sending computer but how many will Twitter accept? Also is there a rule of thumb to determine how many from the sending computer? I would like around 3000. Is that too many?
import sys
import tweepy
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
x = str(status)
words = x.split()
for word in words:
screen_name = status.user.screen_name
user_id = status.user.id
tweet = status.text
print word, " | ", screen_name," | ", user_id
print tweet
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
sapi = tweepy.streaming.Stream(auth, CustomStreamListener())
sapi.filter(track=['filter1', 'filter2'])

How to get media_url from tweets using the Tweepy API

I am using this code:
import tweepy
from tweepy.api import API
import urllib
import os
i = 1
consumer_key="xx"
consumer_secret="xx"
access_token="xx"
access_token_secret="xx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
self.api = api or API()
self.n = 0
self.m = 10
def on_status(self, status):
if 'media' in status.entities:
for image in status.entities['media']:
global i
#picName = status.user.screen_name
picName = "pic%s.jpg" % i
i += 1
link = image['media_url']
filename = os.path.join("C:/Users/Charbo/Documents/Python/",picName)
urllib.urlretrieve(link,filename)
#use to test
print(status.user.screen_name)
else:
print("no media_url")
self.n = self.n+1
if self.n < self.m:
return True
else:
print ('tweets = '+str(self.n))
return False
def on_error(self, status):
print (status)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth, MyStreamListener(),timeout=30)
myStream.filter(track=['#feelthebern'])
I am trying the access the media_url under 'photo' in my dictionary. But I am getting the following error: 'dict' object has no attribute 'media'. I would appreciate help navigating the JSON.
Thanks in advance!
You should try two things :
Add entities to your request
>
tweepy.Cursor(api.search, q="#hashtag", count=5, include_entities=True)
Check if media is not nul :
>
if 'media' in tweet.entities:
for image in tweet.entities['media']:
(do smthing with image['media_url'])
Hope this will help
This reply might be a little late, but I'm sure other people will find it useful someday. I actually didn't want to retweet any tweet with a video in it. So I built this function.... and it works perfectly.
def on_status(self, status):
#Ignores the tweet so long as I am the Author, or it's a reply to a tweet
if status.in_reply_to_status_id is not None or \
status.user.id == self.me.id:
return
#I only retweet tweets that I haven't yet retweeted. I also don't want to retweet any tweets that are quotes.
if not status.retweeted and not status.is_quote_status:
#Checking whether the tweet has no "media" in it.
if 'media' not in status.entities:
try:
print(status.text)
status.retweet()
time.sleep(40) #Sleep for 40 seconds to avoid limits
except Exception as e:
print("Error on_data %s" % str(e))
print("Error from retweeting")
#If tweet has media, I only retweet a tweet with a photo
elif 'media' in status.entities:
media_details = status.entities['media']
media_details_kind = media_details[0]
#print(vide['type'])
if media_details_kind['type'] == 'photo':
try:
print("It is a photo")
status.retweet()
time.sleep(40)
except Exception as e:
print("Error on_data %s" % str(e))
print("Error from retweeting")
else: #Anything else is a video or GIF. I do nothing.
print("Sorry, this might be a video. Cound't retweet because it is neither a photo nor a text")
print(status.text)

save twitter user information into a file using StreamListener

Guys i wanna save twitter user info like name, statuses, tweet in my file (either json,txt,csv or any other json or text are prefered). I tried this code and some other similar but none of them work. Guys have a look at below code and suggest me what changes should i made??
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
ckey = '**********'
consumer_secret = '**********'
access_token_key = '**********'
access_token_secret = '**********'
start_time = time.time() #grabs the system time
keyword_list = ['twitter'] #track list
#Listener Class Override
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
try:
all_data = json.loads["text"]
username = all_data["user"]["name"]
tweets = all_date["user"]["statuses"]
saveFile = open('raw_tweets29.json', 'a')
saveFile.write(username)
saveFile.write('\n')
saveFile.close()
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
exit()
def on_error(self, status):
print statuses
auth = OAuthHandler(ckey, consumer_secret) #OAuth object
auth.set_access_token(access_token_key, access_token_secret)
twitterStream = Stream(auth, listener(start_time, time_limit=20))
twitterStream.filter(track=['twitter'])
when i run below code this give me error -
failed ondata, 'function' object has no attribute '__getitem__'
I would greatly appreciate any help you can give me in working this problem
I am doing some mistake, now i figure it out there is no need of temp variable 'text' what i need to do is load actual data.
there is one more thing require is encoding.
thanks everyone for your time.
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os,sys
import json
ckey = '***'
consumer_secret = '***'
access_token_key = '***'
access_token_secret = '***'
start_time = time.time()
class listener(StreamListener):
def __init__(self, start_time, time_limit=300):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
try:
tweet = json.loads(data)
user_name = tweet['user']['name']
tweet_count = tweet['user']['statuses_count']
text = tweet['text']
saveFile = open('user_tweets29.json', 'a')
saveFile.write(text.encode('utf8'))
saveFile.write('\n')
saveFile.close()
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
exit()
def on_error(self, status):
print statuses
auth = OAuthHandler(ckey, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)
twitterStream = Stream(auth, listener(start_time, time_limit=60))
twitterStream.filter(track=['twitter'])

Tweepy odd streaming error - python

I am attempting to make a script that searches in the user timeline, then favorites tweets. For some reason, it isnt working.
I wrote this code:
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import *
import tweepy, json
class StdOutListener(StreamListener):
def on_data(self, data):
data = json.loads(data)
try:
api.create_favorite(data[id])
except:
pass
print 'Favoriting tweet id ' + data[id] + ' in twitter timeline...'
return True
def on_error(self, status):
print status
l = StdOutListener()
auth = tweepy.OAuthHandler('x', 'x')
auth.set_access_token('x-x', 'x')
api = tweepy.API(auth)
stream = Stream(auth, l)
userz = api.followers_ids(screen_name='smileytechguy')
keywords = ['ebook', 'bot']
stream.filter(track=keywords, follow=userz)
But I am getting this Error message
Traceback (most recent call last):
File "FavTL.py", line 27, in <module>
stream.filter(track=keywords, follow=userz)
File "build\bdist.win-amd64\egg\tweepy\streaming.py", line 310, in filter
AttributeError: 'long' object has no attribute 'encode'
any idea on how can I fix it.
This code should work. Don't forget to enable writing through your API-keys
consumer_key = '..'
consumer_secret = '..'
access_token = '..'
access_secret = '..'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
class StdOutListener(StreamListener):
def on_data(self, data):
# Twitter returns data in JSON format - we need to decode it first
decoded = json.loads(data)
tweet_id = decoded['id']
api.create_favorite(tweet_id)
print 'Favoriting tweet id ' + str(tweet_id) + ' in twitter timeline...'
time.sleep(65)
return True
def on_error(self, status):
if(status == 420):
print "Twitter is limiting this account."
else:
print "Error Status "+ str(status)
l = StdOutListener()
api = tweepy.API(auth)
stream = Stream(auth, l)
userz = api.followers_ids('smileytechguy')
keywords = ['ebook', 'bot']
stream.filter(track=keywords, follow=str(userz))

Categories