Retrieve Arabic data from Twitter - python

I want to retrieve Arabic data from Twitter, Using Python3.5 and Tweepy.
I find a program that works very well with the english or french language But For the Arabic language the tweets are decode.
for exp:
\ u04f \ u04e \ u043e \ u0430 \ U0430 \ u044f
This is the programme :
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
ConsumerKey = 'Your_Consumer_Key'
ConsumerSecret = 'Your_Consumer_Secret'
AccessToken = 'Your_Access_Token'
AccessTokenSecret = 'Your_Access_Token_Secret'
tweets = open('Tweets.txt',mode='w',encoding="utf8",newline=None)
class listener(StreamListener) :
def on_data (self , data) :
tweets.write(data)
print (data)
return True
def on_error (self , status) :
print (status)
auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=['أحوال','الطقس','2016'])
tweets.close()
I used some functions , but I get errors :
data.decode() I get an error AttributeError: 'str' object has no attribute 'decode'
u(data) I get an error NameError: name 'u' is not defined
track=[unicode('2016','utf-8'),unicode('الطقس','utf-8'),unicode('أحوال','utf-8')] I get an error NameError: name 'unicode' is not defined

That code works very well
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sys
ConsumerKey = 'Your_Consumer_Key'
ConsumerSecret = 'Your_Consumer_Secret'
AccessToken = 'Your_Access_Token'
AccessTokenSecret = 'Your_Access_Token_Secret'
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
#tweets = open('Tweets.txt',mode='w',encoding="utf8",newline=None)
class listener(StreamListener) :
def on_data (self , data):
try:
tweet = json.loads(data)['text']
print(tweet.translate(non_bmp_map))
except KeyError:
pass
def on_error(self , status):
print(status)
auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=['الله'])
#tweets.close()

Related

Tweepy AttributeError: 'API' object has no attribute 'search'

I'm trying to get tweets using tweepy but I am running into this error:
AttributeError: 'API' object has no attribute 'search'
Code:
import tweepy
# Authentication
consumerKey = "Type your consumer key here"
consumerSecret = "Type your consumer secret here"
accessToken = "Type your accedd token here"
accessTokenSecret = "Type your access token secret here"
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)
api = tweepy.API(auth)
#Sentiment Analysis
def percentage(part,whole):
return 100 * float(part)/float(whole)
keyword = input("Please enter keyword or hashtag to search: ")
noOfTweet = int(input ("Please enter how many tweets to analyze: "))
tweets = tweepy.Cursor(api.search, q=keyword).items(noOfTweet)
How can I fix this?
use api.search_tweets instead of api.search
tweepy v.4.0.0 has changed it..
Use:
tweets = tweepy.Cursor(api.search(q=keyword)).items(noOfTweet)

How to specify more than one coordinates for geocode parameter of API.search in Tweepy

I want to search tweets based on more than one coordinates. So, I tried this but it doesn't return any results:
total = 0
for status in tweepy.Cursor(api.search, q='cricket', lang="en",
geocode="24.8607,67.0011,25mi OR 40.7128,74.0060,20mi"
).items(10):
total+=1
print(total)
It's been a while since #Tayyap Mazhar shared the post, but just in case, the code below will work as expected. Just remember, do not put comma while declaring geoc variable!
import tweepy
import pandas as pd
CONSUMER_KEY = "?"
CONSUMER_SECRET = "?"
OAUTH_TOKEN = "?"
OAUTH_TOKEN_SECRET = "?"
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
auth.secure = True
api = tweepy.API(auth, wait_on_rate_limit=True,
wait_on_rate_limit_notify=True)
if (not api):
print ("Can’t Authenticate")
sys.exit(-1)
tweet_lst=[]
geoc=[('41.0,28.9499962,1km'),('41.1062629083,29.0264182277,1km'),('41.072833042,29.022833242,1km'),('41.05,28.91,1km')]
for geocode in geoc:
for tweet in tweepy.Cursor(api.search,geocode=geocode).items(1000):
tweetDate = tweet.created_at.date()
if(tweet.coordinates !=None):
tweet_lst.append([tweetDate,tweet.id,tweet.
coordinates['coordinates'][0],
tweet.coordinates['coordinates'][1],
tweet.user.screen_name,
tweet.user.name, tweet.text,
tweet.user._json['geo_enabled']])
tweet_df = pd.DataFrame(tweet_lst, columns=['tweet_dt', 'id', 'long','lat','username', 'name', 'tweet','geo'])```

Elasticsearch: Cannot add documents to an index

Using tweepy and elasticsearch Python modules I can create the index, but the documents do not get created/added. I've taken the code from an example that worked for the author (isn't that always the case).
#!/usr/bin/env python
#
import tweepy
import sys
import json
from textwrap import TextWrapper
from datetime import datetime
from elasticsearch import Elasticsearch
consumer_key = "abcd"
consumer_secret = "1234"
access_token = "qwerty-5678"
access_secret = "huffalump"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
es = Elasticsearch()
#es.indices.create(index="twitter", ignore=400)
class StreamListener(tweepy.StreamListener):
status_wrapper = TextWrapper(width=60, initial_indent=' ', subsequent_indent=' ')
def on_status(self, status):
try:
print 'n%s %s' % (status.author.screen_name, status.created_at)
json_data = status._json
print json_data['text']
es.create(index="twitter", doc_type="twitter_twp", body=json_data)
except Exception, e:
print e
pass
streamer = tweepy.Stream(auth=auth, listener=StreamListener(), timeout=3000000000 )
#Fill with your own Keywords below
terms = ['cyber']
streamer.filter(None,terms)
#streamer.userstream(None)
I monitor my Elasticsearch index at http://192.168.1.7:9200/_cat/indices?v and the data never changes:
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open twitter WDYgTflkRZ-5dTRHx2zuEw 3 2 0 0 390b 390b
I've tried everything - even reading the docs. Why aren't my docs going in my index!?

Filtering Tweets By Location

I'm trying to modify this script to only save the JSONs of tweets that have a location attached to them and am running into an issue with Python where checking that something isn't null doesn't seem to work. Has Key isn't working correctly, because they all have the key, most of them are just 'null'. Is not None isn't working because Python thinks null and None are different and checking it as text to not be "null" also didn't work. Does anyone have a clever idea on how to solve this?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import pymongo
import tweepy
import json
#Variables that contains the user credentials to access Twitter API
access_key = '' #redacted for privacy and such
access_secret = ''
consumer_key = ''
consumer_secret = ''
#Runs auth to Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#This is a basic listener that will print incoming data to stdout
class StdOutListener(StreamListener):
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
#Customizes the stream and saves text and lang to databases
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
super(tweepy.StreamListener, self).__init__()
self.db = pymongo.MongoClient('localhost', 27017).crime
def on_data(self, data):
jd = json.loads(data)
if jd.has_key('coordinates') :
self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )
def on_error(self, status_code):
return True # Don't kill the stream
def on_timeout(self):
return True # Don't kill the stream
#Calls on StreamListerner and provides specifications of tracking
l = tweepy.streaming.Stream(auth, CustomStreamListener(api))
l.filter(track=['guns'])
You could try something like checking the length of the string:
if len( jd['coordinates'] ) > 1:
self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )

Twitter streaming by locations

I use tweepy to stream tweets filtered by location coordinates. Currently I've been streaming tweets from two different neighborhoods in a city and found out that the tweets are the same despite difference in the coordinates (they are non-overlapping areas).
Is there a resolution of the coordinates up to which the twitter API can filter the tweets?
Here's a sample code for a streamer:
(Code with sample locations)
import handler
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from tweepy import API
ckey = 'blahblah..'
csecret = 'blah blah...'
atoken = 'blah blah...'
asecret = 'blah blah...'
Loc1 = [-70.981963,41.591322,-70.89941601,41.75938768]
Loc2 = [-70.919537579,41.608616525,-70.905971579,41.617116525]
Loc3 = [-70.92909611,41.621725545,-70.92084611,41.632153545]
class listener(StreamListener):
def on_status(self, status ):
try:
x = handler.editor('file.csv', status) #used to save some data to file
print x.encode("utf-8")
return True
except BaseException, e:
print 'Exception:, ',str(e)
def on_error(self, status):
print status
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth,listener())
twitterStream.filter(locations = Loc1)
I run several instances of this code with different authorization details and locations. In this case Loc2 and Loc3 are neighborhoods in Loc1.
I think the resolution is in increments of 0.1 degree. https://dev.twitter.com/docs/api/1.1/post/statuses/filter
You will have to take all tweets of the region and filter by yourself for those that have the coordenades.

Categories