Filtering Tweets By Location

Filtering Tweets By Location - python

I'm trying to modify this script to only save the JSONs of tweets that have a location attached to them and am running into an issue with Python where checking that something isn't null doesn't seem to work. Has Key isn't working correctly, because they all have the key, most of them are just 'null'. Is not None isn't working because Python thinks null and None are different and checking it as text to not be "null" also didn't work. Does anyone have a clever idea on how to solve this?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import pymongo
import tweepy
import json
#Variables that contains the user credentials to access Twitter API
access_key = '' #redacted for privacy and such
access_secret = ''
consumer_key = ''
consumer_secret = ''
#Runs auth to Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#This is a basic listener that will print incoming data to stdout
class StdOutListener(StreamListener):
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
#Customizes the stream and saves text and lang to databases
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
super(tweepy.StreamListener, self).__init__()
self.db = pymongo.MongoClient('localhost', 27017).crime
def on_data(self, data):
jd = json.loads(data)
if jd.has_key('coordinates') :
self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )
def on_error(self, status_code):
return True # Don't kill the stream
def on_timeout(self):
return True # Don't kill the stream
#Calls on StreamListerner and provides specifications of tracking
l = tweepy.streaming.Stream(auth, CustomStreamListener(api))
l.filter(track=['guns'])

You could try something like checking the length of the string:
if len( jd['coordinates'] ) > 1:
self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )

Related

twitter app closes after opening(This App has violated Twitter Rules and policies)

I have a twitter app for many months
yesterday after adding "reply" feature for my app ,twitter limited my app
so i decided delete the app and creat a new one but when i set and replace api keys and passwords in my code ,my code still not work and closes after opening it in terminal
how can i solve this issue,i tried send my issue to twitter support but i have'n any helpful email
also for this issue i don't recive any email before that.
import tweepy
import time
consumer_key = "xxxxxxxxx"
consumer_secret = "xxxxxxxxx"
access_token = "xxxxxxxxxx"
access_token_secret = "xxxxxxxxxxx"
bearer_token = "xxxxxxxxxxx"
client_secret= "xxxxxxxxxxxxx"
client_id = "xxxxxxxxxxxxxxxx"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
class MyStreamListener(tweepy.StreamListener):
def on_status(self, tweet):
print(tweet.text)
try:
search = tweet.text
res = any(ele in search for ele in filter_words)
if res == True:
pass
else:
api.create_favorite(tweet.id)
if "#" in str(tweet.text):
if "مرز خسروی" in str(tweet.text):
status_id = tweet.user
username = "#" + status_id.screen_name
api.update_status(status = f"{username}مقدمتان گرامی باد،زیارتتان قبول",in_reply_to_status_id = tweet.id)
else:
pass
else:
api.retweet(tweet.id)
if "مرز خسروی" in str(tweet.text):
status_id = tweet.user
username = "#" + status_id.screen_name
api.update_status(status = f"{username}مقدمتان گرامی باد،زیارتتان قبول",in_reply_to_status_id = tweet.id)
else:
pass
except tweepy.TweepError as e:
print(e.reason)
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener=myStreamListener)
keywords = ["کرمانشاه","kermanshah","فرامان","کرماشان","کرمانشاهی","برناج","هرسین","کنگاور","پاوه",
"روانسر","جوانرود","نوسود","نودشه","هجیج","ژاپه","هوره","سرپل ذهاب","قصرشیرین","ثلاث باباجانی",
"تنگه مرصاد","کرند غرب","تاریکه بازار","مرز خسروی","دوکل","پاساژ ارگ","طاق بستان","بیستون","کوزران",
"نان برنجی","نان خرمایی","خورشت خلال","نوبهار پلازا","بازی دراز","بزره دماغ","پارک کوهستان",
"بستنی نوبهار","آبشوران","دالاهو","تاق وه سان","سراب قنبر","کرناچی","قره سو","چقامیرزا",
"کوچه لکا","دنده کباب","ترخینه","نواله","کلانه","پراو","فرخشاد","مرز پرویزخان","چیامیرزا","چیاکو",
"تنگه کنشت","سنقر","لشگر ۸۱","ماهیدشت","نیوی","جشن انار","جوانشیر",
"پاتاق","میدان تاجگذاری","چهارراه شیرخورشید","شاباد","گرگگه","قسقوان","شهرام ناظری","حسین صفامنش",
"پوران درخشنده","رحیم معینی کرمانشاهی","کزاری","میر جلال الدین کزاری","باختران","پرتو کرمانشاهی",
"یداله بهزاد","آیت الله نجومی","تکیه معاون الملک","تکیه بیگلربیگی","هورامان","مسجد جامع شافعی",
"خانه خدیوی","کلیسای پنطی کاستی","پل چهر","تاریکه بازار","مسجد عمادالدوله","مسجد حاج شهباز خان",
"مسجد شهباز خان","آرامگاه ویس","طاق گرا","فرهاد تراش","شکارگاه خسروپرویز","معبد آناهیتا",
"مجسمه هرکول","سان رستم","مادها","دو اشکفت","گاکیه","گودین تپه","آش عباسعلی","سورانه","پاغازه",
"ونوشک","خورشت کنگر","قزانچی","سراب نیلوفر","سراب یاوری","کوه شاهو","رودخانه سیروان","مدلل","لیلیوم",
"کسگم","سراب صحنه","چهارراه نوبهار","گوران","قلخانی","عید یاران"]
filter_words = ["بازنشستگان","#تجمع_اعتراضی","احمق","بازداشت","بپاخیز","اعتراض"
,"تجمع","گوه","سگ","عوضی","بیشعور","الدنگ","الاع","جنایتکار","گاو","خر","پوفیوز","زندان","کیر","سگ",
"سیاسی","اعدام","مرگ","رژیم","آخوند","کوس","کون","جنده","مادر","حروم زاده","حرام زاده","سرنگونی",
"قیام","رجوی","شاهزاده","شورشی","کولبر","حقوق","بیشرف","ریت","سرکوبگر","شکنجه","معتاد","ایدز",
"دیکتاتور","ناموس","غارتگر","#اعتراضات_سراسری","سکس","خامنه ای","خمینی","تظاهرات","فراخوان",
"سلیمانی","جوانان","شعار","هموطن","آبادان#","خیزش","خیابان","قیام_نهایی#","بپاخاسته","مسعود رجوی",
"اسلام","آخوند","آبادان","بپاخواسته","قیام#","کومله","دموکرات","صیغه","پژاک","خاله","قتل","دستگیر",
"کور","بی سواد","حزب","دادگستری","دادستان","قوه قضاییه","تجمعات","فاشیست","کسشر","کصشر","دوزاری",
"فحش","سپاه","اظلاعات","بازجویی","مامور","اوین","جرم","سنگسار"]
myStream.filter(track=keywords,languages=["fa"])

why it is unable to output characters the tweet of 2nd account with Python?

My environment is below:
Python Python 3.6.5
sqlite3 3.28.0
import tweepy
import sqlite3
# 認証キーの設定
consumer_key = \
"XXXXXXXX"
consumer_secret = "XXXXXXXX"
access_token = "XXXXXXXX"
access_token_secret = "XXXXXXXX"
# OAuth認証
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
class Database:
def __init__(self):
self.dbpath = 'db.sqlite3'
self.conn = sqlite3.connect(self.dbpath)
self.c = self.conn.cursor()
db = Database()
def output_users_from_db():
return db.c.execute('select name, comment from twitter_users')
def update_comment(name, comment='null'):
db.c.execute("""update twitter_users set comment = ? where name = ?""", (comment, name))
db.conn.commit()
if __name__ == "__main__":
api = tweepy.API(auth)
users_info_from_db = output_users_from_db()
for i, user_info_on_db in enumerate(users_info_from_db):
print(user_info_on_db[0])
time_line = api.user_timeline(screen_name=user_info_on_db[0])
for i, info in enumerate(time_line):
# Below print functions can out put all of characters from twitter
print(user_info_on_db[0]) # user account
print(info.text) # tweet
break
Code above works. But if I write code below,
time_line = api.user_timeline(screen_name=user_info_on_db[0])
for i, info in enumerate(time_line):
# Below print functions can out put all of characters from twitter
print(user_info_on_db[0]) # user account
print(info.text) # tweet
update_comment(user_info_on_db[0], comment=info.text)
break
print() only works once, it cannot print 2nd account's tweet. How come when the code include update_comment(user_info_on_db[0], comment=info.text), print() cannot output tweet of 2nd account?

Retrieve Arabic data from Twitter

I want to retrieve Arabic data from Twitter, Using Python3.5 and Tweepy.
I find a program that works very well with the english or french language But For the Arabic language the tweets are decode.
for exp:
\ u04f \ u04e \ u043e \ u0430 \ U0430 \ u044f
This is the programme :
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
ConsumerKey = 'Your_Consumer_Key'
ConsumerSecret = 'Your_Consumer_Secret'
AccessToken = 'Your_Access_Token'
AccessTokenSecret = 'Your_Access_Token_Secret'
tweets = open('Tweets.txt',mode='w',encoding="utf8",newline=None)
class listener(StreamListener) :
def on_data (self , data) :
tweets.write(data)
print (data)
return True
def on_error (self , status) :
print (status)
auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=['أحوال','الطقس','2016'])
tweets.close()
I used some functions , but I get errors :
data.decode() I get an error AttributeError: 'str' object has no attribute 'decode'
u(data) I get an error NameError: name 'u' is not defined
track=[unicode('2016','utf-8'),unicode('الطقس','utf-8'),unicode('أحوال','utf-8')] I get an error NameError: name 'unicode' is not defined

That code works very well
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sys
ConsumerKey = 'Your_Consumer_Key'
ConsumerSecret = 'Your_Consumer_Secret'
AccessToken = 'Your_Access_Token'
AccessTokenSecret = 'Your_Access_Token_Secret'
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
#tweets = open('Tweets.txt',mode='w',encoding="utf8",newline=None)
class listener(StreamListener) :
def on_data (self , data):
try:
tweet = json.loads(data)['text']
print(tweet.translate(non_bmp_map))
except KeyError:
pass
def on_error(self , status):
print(status)
auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=['الله'])
#tweets.close()

Elasticsearch: Cannot add documents to an index

Using tweepy and elasticsearch Python modules I can create the index, but the documents do not get created/added. I've taken the code from an example that worked for the author (isn't that always the case).
#!/usr/bin/env python
#
import tweepy
import sys
import json
from textwrap import TextWrapper
from datetime import datetime
from elasticsearch import Elasticsearch
consumer_key = "abcd"
consumer_secret = "1234"
access_token = "qwerty-5678"
access_secret = "huffalump"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
es = Elasticsearch()
#es.indices.create(index="twitter", ignore=400)
class StreamListener(tweepy.StreamListener):
status_wrapper = TextWrapper(width=60, initial_indent=' ', subsequent_indent=' ')
def on_status(self, status):
try:
print 'n%s %s' % (status.author.screen_name, status.created_at)
json_data = status._json
print json_data['text']
es.create(index="twitter", doc_type="twitter_twp", body=json_data)
except Exception, e:
print e
pass
streamer = tweepy.Stream(auth=auth, listener=StreamListener(), timeout=3000000000 )
#Fill with your own Keywords below
terms = ['cyber']
streamer.filter(None,terms)
#streamer.userstream(None)
I monitor my Elasticsearch index at http://192.168.1.7:9200/_cat/indices?v and the data never changes:
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open twitter WDYgTflkRZ-5dTRHx2zuEw 3 2 0 0 390b 390b
I've tried everything - even reading the docs. Why aren't my docs going in my index!?

Stopping Tweepy stream after a duration parameter (# lines, seconds, #Tweets, etc)

I am using Tweepy to capture streaming tweets based off of the hashtag #WorldCup, as seen by the code below. It works as expected.
class StdOutListener(StreamListener):
''' Handles data received from the stream. '''
def on_status(self, status):
# Prints the text of the tweet
print('Tweet text: ' + status.text)
# There are many options in the status object,
# hashtags can be very easily accessed.
for hashtag in status.entries['hashtags']:
print(hashtag['text'])
return true
def on_error(self, status_code):
print('Got an error with status code: ' + str(status_code))
return True # To continue listening
def on_timeout(self):
print('Timeout...')
return True # To continue listening
if __name__ == '__main__':
listener = StdOutListener()
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, listener)
stream.filter(follow=[38744894], track=['#WorldCup'])
Because this is a hot hashtag right now, searches don't take too long to catch the maximum amount of tweets that Tweepy lets you get in one transaction. However, if I was going to search on #StackOverflow, it might be much slower, and therefore, I'd like a way to kill the stream. I could do this on several parameters, such as stopping after 100 tweets, stopping after 3 minutes, after a text output file has reached 150 lines, etc. I do know that the socket timeout time isn't used to achieve this.
I have taken a look at this similar question:
Tweepy Streaming - Stop collecting tweets at x amount
However, it appears to not use the streaming API. The data that it collects is also very messy, whereas this text output is clean.
Can anyone suggest a way to stop Tweepy (when using the stream in this method), based on some user input parameter, besides a keyboard interrupt?
Thanks

I solved this, so I'm going to be one of those internet heroes that answers their own question.
This is achieved by using static Python variables for the counter and for the stop value (e.g. stop after you grab 20 tweets). This is currently a geolocation search, but you could easily swap it for a hashtag search by using the getTweetsByHashtag() method.
#!/usr/bin/env python
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def on_status(self, status):
Listener.tweet_counter += 1
print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
%(status.author.screen_name, status.text.replace('\n', ' ')))
if Listener.tweet_counter < Listener.stop_at:
return True
else:
print('Max num reached = ' + str(Listener.tweet_counter))
return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stopAt = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.

The above solution was helpful in getting tweets by hashtag, even though there is a small error while defining the getTweetByHashtag function. YOu had used Listener.stopAt instead of Listener.stop_at=stop_at_number.
I have tweaked the code a little bit, so you can easily kill the code for a specified number of seconds.
defined new functions init to help tweak the seconds and "on_data" which contains more information that on_status function.
Enjoy:
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def __init__(self, time_limit=8):
self.start_time = time.time()
self.limit = time_limit
super(Listener, self).__init__()
def on_data(self, data):
Listener.tweet_counter += 1
if (time.time() - self.start_time) < self.limit and Listener.tweet_counter < Listener.stop_at:
print(str(Listener.tweet_counter)+data)
return True
else:
print("Either Max number reached or time limit up at:"+ str(Listener.tweet_counter)+" outputs")
self.saveFile.close()
return False
#def on_status(self, status):
#Listener.tweet_counter += 1
#print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
#%(status.author.screen_name, status.text.replace('\n', ' ')))
#if Listener.tweet_counter < Listener.stop_at and (time.time() - self.start_time) < self.limit:
#return True
#else:
#print('Max num reached or time elapsed= ' + str(Listener.tweet_counter))
#return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stop_at = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
#listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.
listener.getTweetsByHashtag(1000,"hi")
You can change the 1000 value to the max tweets you want and the "hi" to the keyword you need find.. Under the init function, change the 8 time_limit to the value you want in seconds. So you use it depending on what you want.
You can either set limited time and adjust the count to a very high value, or set the count of tweets needed and give a higher time value, so it can get to the count. Your choice!
Chukwu Gozie unu (God bless!)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Filtering Tweets By Location - python

You could try something like checking the length of the string: if len( jd['coordinates'] ) > 1: self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )

Related

twitter app closes after opening(This App has violated Twitter Rules and policies)

why it is unable to output characters the tweet of 2nd account with Python?

Retrieve Arabic data from Twitter

Elasticsearch: Cannot add documents to an index

Stopping Tweepy stream after a duration parameter (# lines, seconds, #Tweets, etc)

Categories

Resources