Elasticsearch: Cannot add documents to an index

Elasticsearch: Cannot add documents to an index - python

Using tweepy and elasticsearch Python modules I can create the index, but the documents do not get created/added. I've taken the code from an example that worked for the author (isn't that always the case).
#!/usr/bin/env python
#
import tweepy
import sys
import json
from textwrap import TextWrapper
from datetime import datetime
from elasticsearch import Elasticsearch
consumer_key = "abcd"
consumer_secret = "1234"
access_token = "qwerty-5678"
access_secret = "huffalump"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
es = Elasticsearch()
#es.indices.create(index="twitter", ignore=400)
class StreamListener(tweepy.StreamListener):
status_wrapper = TextWrapper(width=60, initial_indent=' ', subsequent_indent=' ')
def on_status(self, status):
try:
print 'n%s %s' % (status.author.screen_name, status.created_at)
json_data = status._json
print json_data['text']
es.create(index="twitter", doc_type="twitter_twp", body=json_data)
except Exception, e:
print e
pass
streamer = tweepy.Stream(auth=auth, listener=StreamListener(), timeout=3000000000 )
#Fill with your own Keywords below
terms = ['cyber']
streamer.filter(None,terms)
#streamer.userstream(None)
I monitor my Elasticsearch index at http://192.168.1.7:9200/_cat/indices?v and the data never changes:
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open twitter WDYgTflkRZ-5dTRHx2zuEw 3 2 0 0 390b 390b
I've tried everything - even reading the docs. Why aren't my docs going in my index!?

Related

There is a problem using "since" in Tweety function to extract Covaxin related hashtag tweets from the starting time of Covid'19

It is saying unexpected parameter :since when running the below code
import tweepy
# Enter your own credentials obtained
# from your developer account
consumer_key = "wwww"
consumer_secret = "xxxx"
access_key = "yyyy"
access_secret = "zzzz"
# The above keys are mentioned correctly in the programming code
# Twitter authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
# Creating an API object
api = tweepy.API(auth)
hashtag_tweets = tweepy.Cursor(api.search_tweets, q="#Covaxin", tweet_mode='extended').items(5)
date_tweets = tweepy.Cursor(api.search_tweets, q="#Covaxin", since="2020-03-31", until="2022-01-09",tweet_mode='extended').items(5)
list = []
for tweet in date_tweets:
text = tweet._json["full_text"]
#print(text)
refined_tweet = {'text' : text,
'favorite_count' : tweet.favorite_count,
'retweet_count' : tweet.retweet_count,
'created_at' : tweet.created_at}
list.append(refined_tweet)
#print(list)
import pandas as pd
df = pd.DataFrame(list)
print(df)
df.to_csv('refined_tweets.csv')
It is saying unexpected parameter :since when running the code
I was trying to get the output for all tweets satisfying date query for the particular hashtag Covaxin from the starting of Covid days till now.

How to specify more than one coordinates for geocode parameter of API.search in Tweepy

I want to search tweets based on more than one coordinates. So, I tried this but it doesn't return any results:
total = 0
for status in tweepy.Cursor(api.search, q='cricket', lang="en",
geocode="24.8607,67.0011,25mi OR 40.7128,74.0060,20mi"
).items(10):
total+=1
print(total)

It's been a while since #Tayyap Mazhar shared the post, but just in case, the code below will work as expected. Just remember, do not put comma while declaring geoc variable!
import tweepy
import pandas as pd
CONSUMER_KEY = "?"
CONSUMER_SECRET = "?"
OAUTH_TOKEN = "?"
OAUTH_TOKEN_SECRET = "?"
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
auth.secure = True
api = tweepy.API(auth, wait_on_rate_limit=True,
wait_on_rate_limit_notify=True)
if (not api):
print ("Can’t Authenticate")
sys.exit(-1)
tweet_lst=[]
geoc=[('41.0,28.9499962,1km'),('41.1062629083,29.0264182277,1km'),('41.072833042,29.022833242,1km'),('41.05,28.91,1km')]
for geocode in geoc:
for tweet in tweepy.Cursor(api.search,geocode=geocode).items(1000):
tweetDate = tweet.created_at.date()
if(tweet.coordinates !=None):
tweet_lst.append([tweetDate,tweet.id,tweet.
coordinates['coordinates'][0],
tweet.coordinates['coordinates'][1],
tweet.user.screen_name,
tweet.user.name, tweet.text,
tweet.user._json['geo_enabled']])
tweet_df = pd.DataFrame(tweet_lst, columns=['tweet_dt', 'id', 'long','lat','username', 'name', 'tweet','geo'])```

why it is unable to output characters the tweet of 2nd account with Python?

My environment is below:
Python Python 3.6.5
sqlite3 3.28.0
import tweepy
import sqlite3
# 認証キーの設定
consumer_key = \
"XXXXXXXX"
consumer_secret = "XXXXXXXX"
access_token = "XXXXXXXX"
access_token_secret = "XXXXXXXX"
# OAuth認証
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
class Database:
def __init__(self):
self.dbpath = 'db.sqlite3'
self.conn = sqlite3.connect(self.dbpath)
self.c = self.conn.cursor()
db = Database()
def output_users_from_db():
return db.c.execute('select name, comment from twitter_users')
def update_comment(name, comment='null'):
db.c.execute("""update twitter_users set comment = ? where name = ?""", (comment, name))
db.conn.commit()
if __name__ == "__main__":
api = tweepy.API(auth)
users_info_from_db = output_users_from_db()
for i, user_info_on_db in enumerate(users_info_from_db):
print(user_info_on_db[0])
time_line = api.user_timeline(screen_name=user_info_on_db[0])
for i, info in enumerate(time_line):
# Below print functions can out put all of characters from twitter
print(user_info_on_db[0]) # user account
print(info.text) # tweet
break
Code above works. But if I write code below,
time_line = api.user_timeline(screen_name=user_info_on_db[0])
for i, info in enumerate(time_line):
# Below print functions can out put all of characters from twitter
print(user_info_on_db[0]) # user account
print(info.text) # tweet
update_comment(user_info_on_db[0], comment=info.text)
break
print() only works once, it cannot print 2nd account's tweet. How come when the code include update_comment(user_info_on_db[0], comment=info.text), print() cannot output tweet of 2nd account?

How to print tweet from from specific profile in python using twitter api

I want to print the tweets from a profile but I can't. I guess that I'm not using the right commands or something. I'm new in coding so I don't uderstand to much about api's.
I can get info about the profile so the conection is right.
from tweepy import OAuthHandler
from tweepy import API
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
from collections import Counter
import sys
import tweepy
#I don't put the secret token and all of that
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
auth_api = API(auth)
account_list = ["jufut390"]
if len(account_list) > 0:
for target in account_list:
print("Getting data for " + target)
item = auth_api.get_user(target)
print("screen_name: " + item.screen_name)
#Get info about tweets
end_date = datetime.utcnow() - timedelta(days=5)
for status in Cursor(auth_api.user_timeline, id=target, tweet_mode = "extended").items():
#print tweets
if status.created_at < end_date:
break

In this line :
for status in Cursor(auth_api.user_timeline, id=target, tweet_mode = "extended").items():
The id parameter has no effect. It should be user_id and a valid user ID (numeric) See : https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-user_timeline.html
In you case, you can use the screen_name.
Secondly, you say you want to print the tweets, so write a print. Try this :
#Get info about tweets
end_date = datetime.utcnow() - timedelta(days=5)
for status in Cursor(auth_api.user_timeline, screen_name=item.screen_name, tweet_mode = "extended").items():
print(status.full_text)
if status.created_at < end_date:
break

Filtering Tweets By Location

I'm trying to modify this script to only save the JSONs of tweets that have a location attached to them and am running into an issue with Python where checking that something isn't null doesn't seem to work. Has Key isn't working correctly, because they all have the key, most of them are just 'null'. Is not None isn't working because Python thinks null and None are different and checking it as text to not be "null" also didn't work. Does anyone have a clever idea on how to solve this?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import pymongo
import tweepy
import json
#Variables that contains the user credentials to access Twitter API
access_key = '' #redacted for privacy and such
access_secret = ''
consumer_key = ''
consumer_secret = ''
#Runs auth to Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#This is a basic listener that will print incoming data to stdout
class StdOutListener(StreamListener):
def on_data(self, data):
print data
return True
def on_error(self, status):
print status
#Customizes the stream and saves text and lang to databases
class CustomStreamListener(tweepy.StreamListener):
def __init__(self, api):
self.api = api
super(tweepy.StreamListener, self).__init__()
self.db = pymongo.MongoClient('localhost', 27017).crime
def on_data(self, data):
jd = json.loads(data)
if jd.has_key('coordinates') :
self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )
def on_error(self, status_code):
return True # Don't kill the stream
def on_timeout(self):
return True # Don't kill the stream
#Calls on StreamListerner and provides specifications of tracking
l = tweepy.streaming.Stream(auth, CustomStreamListener(api))
l.filter(track=['guns'])

You could try something like checking the length of the string:
if len( jd['coordinates'] ) > 1:
self.db.tweets.insert( { 'text' : jd['text'], 'coordinates' : jd['coordinates'], 'lang' : jd['lang'] } )

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Elasticsearch: Cannot add documents to an index - python

Related

There is a problem using "since" in Tweety function to extract Covaxin related hashtag tweets from the starting time of Covid'19

How to specify more than one coordinates for geocode parameter of API.search in Tweepy

why it is unable to output characters the tweet of 2nd account with Python?

How to print tweet from from specific profile in python using twitter api

Filtering Tweets By Location

Categories

Resources