How to get media_url from tweets using the Tweepy API

How to get media_url from tweets using the Tweepy API - python

I am using this code:
import tweepy
from tweepy.api import API
import urllib
import os
i = 1
consumer_key="xx"
consumer_secret="xx"
access_token="xx"
access_token_secret="xx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
self.api = api or API()
self.n = 0
self.m = 10
def on_status(self, status):
if 'media' in status.entities:
for image in status.entities['media']:
global i
#picName = status.user.screen_name
picName = "pic%s.jpg" % i
i += 1
link = image['media_url']
filename = os.path.join("C:/Users/Charbo/Documents/Python/",picName)
urllib.urlretrieve(link,filename)
#use to test
print(status.user.screen_name)
else:
print("no media_url")
self.n = self.n+1
if self.n < self.m:
return True
else:
print ('tweets = '+str(self.n))
return False
def on_error(self, status):
print (status)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth, MyStreamListener(),timeout=30)
myStream.filter(track=['#feelthebern'])
I am trying the access the media_url under 'photo' in my dictionary. But I am getting the following error: 'dict' object has no attribute 'media'. I would appreciate help navigating the JSON.
Thanks in advance!

You should try two things :
Add entities to your request
>
tweepy.Cursor(api.search, q="#hashtag", count=5, include_entities=True)
Check if media is not nul :
>
if 'media' in tweet.entities:
for image in tweet.entities['media']:
(do smthing with image['media_url'])
Hope this will help

This reply might be a little late, but I'm sure other people will find it useful someday. I actually didn't want to retweet any tweet with a video in it. So I built this function.... and it works perfectly.
def on_status(self, status):
#Ignores the tweet so long as I am the Author, or it's a reply to a tweet
if status.in_reply_to_status_id is not None or \
status.user.id == self.me.id:
return
#I only retweet tweets that I haven't yet retweeted. I also don't want to retweet any tweets that are quotes.
if not status.retweeted and not status.is_quote_status:
#Checking whether the tweet has no "media" in it.
if 'media' not in status.entities:
try:
print(status.text)
status.retweet()
time.sleep(40) #Sleep for 40 seconds to avoid limits
except Exception as e:
print("Error on_data %s" % str(e))
print("Error from retweeting")
#If tweet has media, I only retweet a tweet with a photo
elif 'media' in status.entities:
media_details = status.entities['media']
media_details_kind = media_details[0]
#print(vide['type'])
if media_details_kind['type'] == 'photo':
try:
print("It is a photo")
status.retweet()
time.sleep(40)
except Exception as e:
print("Error on_data %s" % str(e))
print("Error from retweeting")
else: #Anything else is a video or GIF. I do nothing.
print("Sorry, this might be a video. Cound't retweet because it is neither a photo nor a text")
print(status.text)

Related

Tweepy streaming in MongoDB with full_text

I need help to develop Python code, working with the library Tweepy and MongoDB. I cannot stream Tweets with full text. I don't know how to implement this code to this aim.
Please look at this code:
keywords = ['trump']
language = ['en']
analyzer = SentimentIntensityAnalyzer()
class StdOutListener(StreamListener):
def on_status(self, status):
if hasattr(self, status):
try:
tweet = status.retweted_status.extended_tweet["full_text"]
except:
tweet = status.retweeted_status.text
else:
try:
tweet = status.extended_tweet["full_text"]
except AttributeError:
tweet = status.text
def on_data(self, data):
t = json.loads(data)
tweet_id = t['id_str']
username = t['user']['screen_name']
followers = t['user']['followers_count']
tweet = unidecode(t['text'])
text = t['full_text']
hashtags = t['entities']['hashtags']
dt = t['created_at']
language = t['lang']
blob = analyzer.polarity_scores(tweet)
sentiment = blob['compound']
created = datetime.datetime.strptime(dt, '%a %b %d %H:%M:%S +0000 %Y')
tweet = {'id':tweet_id, 'username':username, 'followers':followers, 'text':text, 'hashtags':hashtags, 'language':language, 'created':created, 'sentiment':sentiment}
print (username + ':' + ' ' + text)
return True
def on_error(self, status_code):
if status_code == 420:
print (status)
return False
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l, tweet_mode="extended")
tweets = stream.filter(track=keywords, languages=language )
I've tried to do it in a bad way.. For example, I replaced text with full_text, but it doesn't work. I hope you can help me..
Thanks for you time!

if hasattr(self, status): will raise a TypeError, as hasattr requires a string input for attribute names.
You should refer to Tweepy's documentation on extended Tweets, which has an example very similar to what I think you're trying to do.

How big can the list argument of twitter api.statuses.lookup() be?

I am using the twitter api to retrieve tweets by ID.I have 13 ids. However tweets of only 8 ids are displayed. (There is no error though). The code is as follows
from tweepy import OAuthHandler
import tweepy
import TwitterCredentials
def status_lookup(api, filename):
id_list = [591672103788621824, 591673483832270848,91675312032776192,591677980394393600, 591678618935267328, 591679831399477248, 591681597054652416, 591681654047023104,591681941017100288, 591693321111744513,591712699421052928, 591712700138291201, 591714830446301184]
tweets = api.statuses_lookup(id_list)
# tweets = []
# tweets.extend(api.statuses_lookup(ids))
a = len(tweets)
print(a)
try:
for tweet in tweets:
print("New Tweet : ", tweet.text)
with open(filename, 'a', encoding="utf-8") as tf:
for tweet in tweets:
tf.write("New Tweet:" + ":" + tweet.text + "\n")
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
if __name__ == '__main__':
# print(len(id_list))
fetched_tweets_filename = "Rtweets.txt"
auth=OAuthHandler(TwitterCredentials.CONSUMER_KEY,TwitterCredentials.CONSUMER_SECRET)
auth.set_access_token(TwitterCredentials.ACCESS_TOKEN,TwitterCredentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
status_lookup(api, fetched_tweets_filename)
I am clueless! Please help!

The list of IDs passed to that method allows for up to 100 Tweet IDs.
I just checked this and it looks like for the five IDs not returned, the user accounts have been suspended and the Tweets are no longer available.

How many keywords is too many to put in a Tweepy filter while streaming live data

I have code similar to the code below and was wondering how many keywords I could put in the filter without denigrating performance. I realize the answer would depend on several factors affecting the computers performance such as processor speed, connection speed and the likes from the sending computer but how many will Twitter accept? Also is there a rule of thumb to determine how many from the sending computer? I would like around 3000. Is that too many?
import sys
import tweepy
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
x = str(status)
words = x.split()
for word in words:
screen_name = status.user.screen_name
user_id = status.user.id
tweet = status.text
print word, " | ", screen_name," | ", user_id
print tweet
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
sapi = tweepy.streaming.Stream(auth, CustomStreamListener())
sapi.filter(track=['filter1', 'filter2'])

How to accept twitter stream using tweepy in streamparse spout and pass the tweets to bolt?

Recently, I started working on storm and being more comfortable with python, I decided to use streamparse for working with storm. I am planning to accept a twitter stream in spout and perform some computations in bolt. But I cannot figure out how I would code that in spout. I have gone through various streamparse tutorials but they all show spout emitting tuples from static list and do not have stream like twitter streaming api provides.
This is my code for storm:
class WordSpout(Spout):
def initialize(self, stormconf, context):
self.words = itertools.cycle(['dog', 'cat','zebra', 'elephant'])
def next_tuple(self):
word = next(self.words)
self.emit([word])
This is my code for tweepy:
class listener(StreamListener):
def on_status(self,status):
print(status.text)
print "--------------------------------"
return(True)
def on_error(self, status):
print "error"
def on_connect(self):
print "CONNECTED"
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=["california"])
How should I integrate both these codes?

To do this, I setup a kafka queue, by which the tweepy listener wrote the status.text into the queue using pykafka. The spout then constantly read data from the queue to perform the analytics. My code looks a bit like this:
listener.py:
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
# print(status.text)
client = KafkaClient(hosts='127.0.0.1:9092')
topic = client.topics[str('tweets')]
with topic.get_producer(delivery_reports=False) as producer:
# print status.text
sentence = status.text
for word in sentence.split(" "):
if word is None:
continue
try:
word = str(word)
producer.produce(word)
except:
continue
def on_error(self, status_code):
if status_code == 420: # exceed rate limit
return False
else:
print("Failing with status code " + str(status_code))
return False
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
myStream.filter(track=['is'])
Spout File:
from streamparse.spout import Spout
from pykafka import KafkaClient
class TweetSpout(Spout):
words = []
def initialize(self, stormconf, context):
client = KafkaClient(hosts='127.0.0.1:9092')
self.topic = client.topics[str('tweets')]
def next_tuple(self):
consumer = self.topic.get_simple_consumer()
for message in consumer:
if message is not None:
self.emit([message.value])
else:
self.emit()

save twitter user information into a file using StreamListener

Guys i wanna save twitter user info like name, statuses, tweet in my file (either json,txt,csv or any other json or text are prefered). I tried this code and some other similar but none of them work. Guys have a look at below code and suggest me what changes should i made??
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
ckey = '**********'
consumer_secret = '**********'
access_token_key = '**********'
access_token_secret = '**********'
start_time = time.time() #grabs the system time
keyword_list = ['twitter'] #track list
#Listener Class Override
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
try:
all_data = json.loads["text"]
username = all_data["user"]["name"]
tweets = all_date["user"]["statuses"]
saveFile = open('raw_tweets29.json', 'a')
saveFile.write(username)
saveFile.write('\n')
saveFile.close()
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
exit()
def on_error(self, status):
print statuses
auth = OAuthHandler(ckey, consumer_secret) #OAuth object
auth.set_access_token(access_token_key, access_token_secret)
twitterStream = Stream(auth, listener(start_time, time_limit=20))
twitterStream.filter(track=['twitter'])
when i run below code this give me error -
failed ondata, 'function' object has no attribute '__getitem__'
I would greatly appreciate any help you can give me in working this problem

I am doing some mistake, now i figure it out there is no need of temp variable 'text' what i need to do is load actual data.
there is one more thing require is encoding.
thanks everyone for your time.
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os,sys
import json
ckey = '***'
consumer_secret = '***'
access_token_key = '***'
access_token_secret = '***'
start_time = time.time()
class listener(StreamListener):
def __init__(self, start_time, time_limit=300):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
try:
tweet = json.loads(data)
user_name = tweet['user']['name']
tweet_count = tweet['user']['statuses_count']
text = tweet['text']
saveFile = open('user_tweets29.json', 'a')
saveFile.write(text.encode('utf8'))
saveFile.write('\n')
saveFile.close()
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
exit()
def on_error(self, status):
print statuses
auth = OAuthHandler(ckey, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)
twitterStream = Stream(auth, listener(start_time, time_limit=60))
twitterStream.filter(track=['twitter'])

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to get media_url from tweets using the Tweepy API - python

You should try two things : Add entities to your request > tweepy.Cursor(api.search, q="#hashtag", count=5, include_entities=True) Check if media is not nul : > if 'media' in tweet.entities: for image in tweet.entities['media']: (do smthing with image['media_url']) Hope this will help

Related

Tweepy streaming in MongoDB with full_text

How big can the list argument of twitter api.statuses.lookup() be?

How many keywords is too many to put in a Tweepy filter while streaming live data

How to accept twitter stream using tweepy in streamparse spout and pass the tweets to bolt?

save twitter user information into a file using StreamListener

Categories

Resources