How to get Tweets of a Keyword - python

I'm trying to get tweets from a certain keyword 'comfama'. but I can't seem to get any results. Is something wrong with my code? I'm tried with 'donald trump' and this keyword shows results but with 'comfama' nothing happens.
import tweepy
import pandas
import json # The API returns JSON formatted text
TRACKING_KEYWORDS = ['comfama']
OUTPUT_FILE = "comfama_tweets.txt"
TWEETS_TO_CAPTURE = 10
access_token = "xxx"
access_token_secret = "xxx"
consumer_key = "xxx"
consumer_secret = "xxx"
# Pass OAuth details to tweepy's OAuth handler
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
class MyStreamListener(tweepy.StreamListener):
"""
Twitter listener, collects streaming tweets and output to a file
"""
def __init__(self, api=None):
super(MyStreamListener, self).__init__()
self.num_tweets = 0
self.file = open(OUTPUT_FILE, "w")
def on_status(self, status):
tweet = status._json
self.file.write( json.dumps(tweet) + '\n' )
self.num_tweets += 1
# Stops streaming when it reaches the limit
if self.num_tweets <= TWEETS_TO_CAPTURE:
if self.num_tweets % 100 == 0: # just to see some progress...
print('Numer of tweets captured so far: {}'.format(self.num_tweets))
return True
else:
return False
self.file.close()
def on_error(self, status):
print(status)
# Initialize Stream listener
l = MyStreamListener()
# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)
# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=[TRACKING_KEYWORDS])

Related

Twitter stream not delivering to S3 in json format

I'm trying to stream twitter data into AWS S3, however the stream data is not in json format:
S3 bucket does not use any compression. Firehose does not use any data transform. I'm getting the tweets, but they have no type. I need to the load the data into a pandas dataframe.
class TwitterStreamer():
"""
Class for streaming and processing fetched tweets
"""
def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
#This handles twitter authentication and connection to streaming API
listener = TwitterListener(fetched_tweets_filename)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(token_key, token_secret)
stream = Stream(auth, listener)
#filter stream
stream.filter(track=hash_tag_list, languages=['en'])
#Create class to print tweet - inherits from StreamListener
class TwitterListener(StreamListener):
"""
This is a basic listener class that just prints received tweets
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
tweet = json.loads(data)
if 'text' in tweet.keys():
#print (tweet['text'])
message_lst = [tweet['text'].replace('\n',' ').replace('\r',' '),
str(tweet['created_at']),'\n']
message = '\t'.join(message_lst)
print(message)
response = client.put_record(DeliveryStreamName=DeliveryStreamName,
Record={'Data': message})
print('Status: ' + json.dumps(response['ResponseMetadata']['HTTPStatusCode']))
try:
#print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(message)
return True
except BaseException as e:
print('error:')
return True
def on_error(self, status):
print(status)
if __name__ == "__main__":
#session = boto3.Session()
#kinesis_client = session.client('firehose')
client = boto3.client('firehose',
region_name='us-east-1',
aws_access_key_id='',
aws_secret_access_key=''
)
#partition_key = str(uuid.uuid4())
hash_tag_list=['omicron']
fetched_tweets_filename = 'tweets.json'
DeliveryStreamName = 'twitter-delivery-stream'
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename, hash_tag_list)

Tweepy Streaming Direct Messages

I've been using Tweepy with Python 2.7 to stream tweets and everything has been working fine, except the on_direct_message() method isn't being called when I send the account a direct message. I've updated my permissions and even tried using the on_data() method, but it can't seem to detect direct messages being sent to the account:
import tweepy
CONSUMER_KEY = ''
CONSUMER_SECRET = ''
ACCESS_KEY = ''
ACCESS_SECRET = ''
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)
followed_accounts = ['account', 'account']
followed_ids = []
for account in followed_accounts:
followed_ids.append(str(api.get_user(screen_name=account).id))
class StdOutListener(tweepy.StreamListener):
def on_direct_message(self, status):
author = status.author.screen_name
api.send_direct_message(screen_name=author, text='response')
return True
def on_status(self, status):
author = status.author.screen_name
statusID = status.id
print status.text + "\n"
api.update_status('response')
api.send_direct_message(screen_name='my username', text='Just sent a Tweet')
return True
def on_data(self, status):
print 'Entered on_data()'
print status
return True
def on_error(self, status_code):
print "Error Code: " + str(status_code)
if status_code == 420:
return False
else:
return True
def on_timeout(self):
print('Timeout...')
return True
if __name__ == '__main__':
listener = StdOutListener()
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
stream = tweepy.Stream(auth, listener)
stream.filter(follow=followed_ids)
Sending the account a direct message gives no errors, and the account receives the message properly on Twitter.

How to accept twitter stream using tweepy in streamparse spout and pass the tweets to bolt?

Recently, I started working on storm and being more comfortable with python, I decided to use streamparse for working with storm. I am planning to accept a twitter stream in spout and perform some computations in bolt. But I cannot figure out how I would code that in spout. I have gone through various streamparse tutorials but they all show spout emitting tuples from static list and do not have stream like twitter streaming api provides.
This is my code for storm:
class WordSpout(Spout):
def initialize(self, stormconf, context):
self.words = itertools.cycle(['dog', 'cat','zebra', 'elephant'])
def next_tuple(self):
word = next(self.words)
self.emit([word])
This is my code for tweepy:
class listener(StreamListener):
def on_status(self,status):
print(status.text)
print "--------------------------------"
return(True)
def on_error(self, status):
print "error"
def on_connect(self):
print "CONNECTED"
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=["california"])
How should I integrate both these codes?
To do this, I setup a kafka queue, by which the tweepy listener wrote the status.text into the queue using pykafka. The spout then constantly read data from the queue to perform the analytics. My code looks a bit like this:
listener.py:
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
# print(status.text)
client = KafkaClient(hosts='127.0.0.1:9092')
topic = client.topics[str('tweets')]
with topic.get_producer(delivery_reports=False) as producer:
# print status.text
sentence = status.text
for word in sentence.split(" "):
if word is None:
continue
try:
word = str(word)
producer.produce(word)
except:
continue
def on_error(self, status_code):
if status_code == 420: # exceed rate limit
return False
else:
print("Failing with status code " + str(status_code))
return False
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
myStream.filter(track=['is'])
Spout File:
from streamparse.spout import Spout
from pykafka import KafkaClient
class TweetSpout(Spout):
words = []
def initialize(self, stormconf, context):
client = KafkaClient(hosts='127.0.0.1:9092')
self.topic = client.topics[str('tweets')]
def next_tuple(self):
consumer = self.topic.get_simple_consumer()
for message in consumer:
if message is not None:
self.emit([message.value])
else:
self.emit()

stream tweets from different locations

How can I stream tweets from a country using a box-boundaries rectangle?
I have code to stream by location but I want to do a loop that says: for each tweet see if this tweet is in rectangle 1 or rectangle2 or in rectangle(n) else do not take this tweet.
The code I have is:
import sys
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret=''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_data(self, data):
print (data)
return True
def on_error(self, status):
print (status)
sapi = tweepy.streaming.Stream(auth, CustomStreamListener())
sapi.filter(locations=[1.9,34.7,7.94,36.63])
In this code I filter for one rectangle but I want a code like this:
For each tweet t_i:
For each rectangle r_j:
If tweet_is_in_rectangle(r_j) == False:
exclude t_i

Tweepy odd streaming error - python

I am attempting to make a script that searches in the user timeline, then favorites tweets. For some reason, it isnt working.
I wrote this code:
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import *
import tweepy, json
class StdOutListener(StreamListener):
def on_data(self, data):
data = json.loads(data)
try:
api.create_favorite(data[id])
except:
pass
print 'Favoriting tweet id ' + data[id] + ' in twitter timeline...'
return True
def on_error(self, status):
print status
l = StdOutListener()
auth = tweepy.OAuthHandler('x', 'x')
auth.set_access_token('x-x', 'x')
api = tweepy.API(auth)
stream = Stream(auth, l)
userz = api.followers_ids(screen_name='smileytechguy')
keywords = ['ebook', 'bot']
stream.filter(track=keywords, follow=userz)
But I am getting this Error message
Traceback (most recent call last):
File "FavTL.py", line 27, in <module>
stream.filter(track=keywords, follow=userz)
File "build\bdist.win-amd64\egg\tweepy\streaming.py", line 310, in filter
AttributeError: 'long' object has no attribute 'encode'
any idea on how can I fix it.
This code should work. Don't forget to enable writing through your API-keys
consumer_key = '..'
consumer_secret = '..'
access_token = '..'
access_secret = '..'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
class StdOutListener(StreamListener):
def on_data(self, data):
# Twitter returns data in JSON format - we need to decode it first
decoded = json.loads(data)
tweet_id = decoded['id']
api.create_favorite(tweet_id)
print 'Favoriting tweet id ' + str(tweet_id) + ' in twitter timeline...'
time.sleep(65)
return True
def on_error(self, status):
if(status == 420):
print "Twitter is limiting this account."
else:
print "Error Status "+ str(status)
l = StdOutListener()
api = tweepy.API(auth)
stream = Stream(auth, l)
userz = api.followers_ids('smileytechguy')
keywords = ['ebook', 'bot']
stream.filter(track=keywords, follow=str(userz))

Categories