I am trying to use the following Python code to insert parsed out tweets into a MySQL database:
#-*- coding: utf-8 -*-
__author__ = 'sagars'
import pymysql
import tweepy
import time
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
class listener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
tweet = all_data["text"]
username = all_data["user"]["screen_name"]
c.execute("INSERT INTO tweets (tweet_time, username, tweet) VALUES (%s,%s,%s)"
(time.time(), username, tweet))
print (username, tweet)
return True
def on_error(self, status):
print (status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track = ["LeBron James"])
But I am running into the following error:
Traceback (most recent call last):
File "C:/Users/sagars/PycharmProjects/YouTube NLP Lessons/Twitter Stream to DB.py", line 45, in <module>
twitterStream.filter(track = ["LeBron James"])
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 428, in filter
self._start(async)
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 346, in _start
self._run()
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 286, in _run
raise exception
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 255, in _run
self._read_loop(resp)
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 309, in _read_loop
self._data(next_status_obj)
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 289, in _data
if self.listener.on_data(data) is False:
File "C:/Users/sagars/PycharmProjects/YouTube NLP Lessons/Twitter Stream to DB.py", line 35, in on_data
(time.time(), username, tweet))
TypeError: 'str' object is not callable
How can the code be adjusted to avoid the error? Should the tweets be parsed out of the JSON object in a different way?
You forgot a comma before (time.time(), usernam.... Etc.
To clarify it would be
c.execute("INSERT INTO tweets (tweet_time, username, tweet) VALUES (%s,%s,%s)" ,
(time.time(), username, tweet))
Related
I am trying to catch the exception which is raised when the connection is reset from the peer during the real-time streaming of tweet, but seems the try-exception block is not properly catching the error raised and pass through it. Please advise, if the block is not rightly placed in the code or there is something wrong with the code.
I have created a script that will stream the tweet in real time to an excel file. Lot of times it has happened that streaming got disconnected due to ECONNRESET error which is connection reset by peer -
Exception in thread Thread-1:
Traceback (most recent call last):
File “/usr/lib/python2.7/threading.py”, line 801, in __bootstrap_inner
self.run()
File “/usr/lib/python2.7/threading.py”, line 754, in run
self.__target(*self.__args, **self.__kwargs)
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 297, in _run
six.reraise(*exc_info)
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 266, in _run
self._read_loop(resp)
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 316, in _read_loop
line = buf.read_line().strip()
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 181, in read_line
self._buffer += self._stream.read(self._chunk_size)
File “/usr/local/lib/python2.7/dist-packages/urllib3/response.py”, line 430, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File “/usr/lib/python2.7/contextlib.py”, line 35, in exit
self.gen.throw(type, value, traceback)
File “/usr/local/lib/python2.7/dist-packages/urllib3/response.py”, line 349, in _error_catcher
raise ProtocolError(‘Connection broken: %r’ % e, e)
ProtocolError: (‘Connection broken: error("(104, ‘ECONNRESET’)",)’, error("(104, ‘ECONNRESET’)",))
Its a protocol error and i tried to catch this error by importing urllib3 library as it has protocol exceptions, but the try and exception block is not able to suppress it and continue with the streaming.
import pandas as pd
import csv
from bs4 import BeautifulSoup
import re
import tweepy
import ast
from datetime import datetime
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
from unidecode import unidecode
from urllib3.exceptions import ProtocolError
from urllib3.exceptions import IncompleteRead
import requests
consumer_key= 'xxxxxxxxx'
consumer_secret= 'xxxxxxxxx'
access_token= 'xxxxxxxxx'
access_token_secret= 'xxxxxxxxx'
with open('TEST_FEB.csv','w')as f:
f.truncate()
f.close()
class listener(StreamListener):
def on_data(self,data):
data1 = json.loads(data)
time = data1["created_at"]
if hasattr(data1,"retweeted_status:"):
tweet = unidecode(data1["tweet"]["text"])
if data1["truncated"] == "true":
tweet = unidecode(data1["extended_tweet"]["full_text"])
else:
tweet = unidecode(data1["text"])
tweet1 = BeautifulSoup(tweet, "lxml").get_text()
url = "https://twitter.com/{}/status/{}".format(data1["user"]
["screen_name"], data1["id_str"])
file = open('TEST_FEB.csv', 'a')
csv_writer = csv.writer(file)
csv_writer.writerow([time, tweet1, url])
file.close()
def on_limit(self, track):
return True
auth = OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)
while True:
try:
twitterStream = Stream(auth, listener(),
wait_on_rate_limit=True, retry_count=10, stall_warnings=True)
twitterStream.filter(track=["abcd"], async = True)
except ProtocolError as error:
print (str(error))
continue
except IncompleteRead as IR:
print (str(IR))
continue
The expected result is that whenever the connection is reset from the peer and the said error is raised, the code should suppress it and continue with the streaming. The code in the current form is not working that way.
This is my code to insert tweet data in MYSQL
import pymysql
import tweepy
import time
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import pymysql.cursors
ckey= ''
csecret= ''
atoken=''
asecret=''
conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='admin1234', db='mysql')
cur = conn.cursor()
class listener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
tweet = all_data["text"]
a=0
#username = all_data["user"]["screen_name"]
cur.execute("INSERT INTO tweet (textt) VALUES (%s)" (tweet))
print (tweet)
return True
def on_error(self, status):
print (status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track = ["puasa"])
cur.close()
conn.close()
but i get error TypeError: 'str' object is not callable
traceback error
Traceback (most recent call last):
File "collect-sql.py", line 40, in <module>
twitterStream.filter(track = ["puasa"])
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py", line 450, in filter
self._start(async)
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py", line 364, in _start
self._run()
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py", line 297, in _run
six.reraise(*exc_info)
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/six.py", line 693, in reraise
raise value
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py", line 266, in _run
self._read_loop(resp)
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py", line 327, in _read_loop
self._data(next_status_obj)
File "/Users/amzar/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py", line 300, in _data
if self.listener.on_data(data) is False:
File "collect-sql.py", line 30, in on_data
cur.execute("INSERT INTO tweet (textt) VALUES (%s)" (tweet))
TypeError: 'str' object is not callable
You need 2 extra commas:
cur.execute("INSERT INTO tweet (textt) VALUES (%s)", (tweet,))
The first separates the query string from the arguments, the second turns the value in brackets into the first element in a 1 element tuple (it actually would work if you just used a single string instead of a tuple, assuming you only have one argument, but this isn't officially supported from the look of things).
But this error that you mentioned in the comments:
UnicodeEncodeError: 'latin-1' codec can't encode character '\u201c' in position 97: ordinal not in range(256)
means you are trying to interpret unicode text containing a character from the extended character set into latin-1.
If the field is already internally defined (in your mysql database) as unicode, you may need to specify the character set to use when connecting e.g.:
conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='admin1234', db='mysql', use_unicode=True, charset="utf8")
If the field in mysql is not already something like utf-8 then I recommend you alter or otherwise redefine the database to use a unicode character se tfor this column.
https://dev.mysql.com/doc/refman/8.0/en/charset-mysql.html
I'm using below code to streaming tweets and analyse them for making decisions. while running the below code I got an error. that error occurs twitter users those who had the friend list of more than 50.
import re
import tweepy
import sys
import time
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
users = tweepy.Cursor(api.friends, screen_name='#myuser').items()
while True:
try:
user = next(users)
except tweepy.TweepError:
time.sleep(60*15)
user = next(users)
except StopIteration:
break
for status in tweepy.Cursor(api.user_timeline,screen_name=user.screen_name,result_type='recent').items(5):
text=status._json['text'].translate(non_bmp_map)
print (user.screen_name + ' >>>>>> '+text)
while executing this script I have got an error as below.
Traceback (most recent call last):
File "D:sensitive2demo.py", line 31, in <module>
for status in tweepy.Cursor(api.user_timeline,screen_name=user.screen_name,result_type='recent').items(5):
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\cursor.py", line 49, in __next__
return self.next()
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\cursor.py", line 197, in next
self.current_page = self.page_iterator.next()
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\cursor.py", line 108, in next
data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\binder.py", line 250, in _call
return method.execute()
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\binder.py", line 234, in execute
raise TweepError(error_msg, resp, api_code=api_error_code)
tweepy.error.TweepError: Twitter error response: status code = 401
I have googled a lot.but nothing worked. Can somebody help me to solve the problem?
401 is an http status code for 'Unauthorized'. I would suggest verifying your credentials.
I am trying to parse out the tweet and username sections of the JSON object returned from Twitter using the following code:
class listener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
tweet = all_data["text"]
username = all_data["user"]["screen_name"]
c.execute("INSERT INTO tweets (tweet_time, username, tweet) VALUES (%s,%s,%s)" ,
(time.time(), username, tweet))
print (username, tweet)
return True
def on_error(self, status):
print (status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track = ["LeBron James"])
But I get the following error. How can the code be adjusted to decode or encode the response properly?
Traceback (most recent call last):
File "C:/Users/sagars/PycharmProjects/YouTube NLP Lessons/Twitter Stream to DB.py", line 45, in <module>
twitterStream.filter(track = ["LeBron James"])
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 428, in filter
self._start(async)
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 346, in _start
self._run()
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 286, in _run
raise exception
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 255, in _run
self._read_loop(resp)
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 309, in _read_loop
self._data(next_status_obj)
File "C:\Python34\lib\site-packages\tweepy\streaming.py", line 289, in _data
if self.listener.on_data(data) is False:
File "C:/Users/sagars/PycharmProjects/YouTube NLP Lessons/Twitter Stream to DB.py", line 36, in on_data
print (username, tweet)
File "C:\Python34\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-8: character maps to <undefined>
Unfortunately the problem with that is the information you get from twitter is not utf-8 encoded, which is causing you to get the charmap error. To fix that, you'll need to encode it.
tweet = all_data["text"].encode('utf-8')
username = all_data["user"]["screen_name"].encode('utf-8')
This will cause you to lose some of emoji and special characters that show up in the tweet, it will be converted to \x899. If you really need that information (I discard it myself) for sentiment analysis, then you'll need to install a package with a pre-compiled list to convert them accordingly.
I launched this code via terminal via command python py/twi.py and it shows no reaction:
import oauth, tweepy
from time import sleep
message = "hello"
def init():
global api
#confident information
consumer_key = "***"
consumer_secret = "***"
callback_url = "https://twitter.com/Problem196"
access_key="***"
access_secret="***"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret, callback_url)
auth.set_access_token(access_key, access_secret)
api=tweepy.API(auth)
init()
api.update_status(message)
But it supposed to post tweet "hello" on page https://twitter.com/problem196 .
I have updated tweepy, it's fine. Why it's not posting? I have no idea. Please help.
UPD: After I put code print api.last_response.msg terminal showed me some errors:
artem#artem-VirtualBox:~$ python py/twi.py
Traceback (most recent call last):
File "py/twi.py", line 20, in <module>
api.update_status(message)
File "/usr/local/lib/python2.7/dist-packages/tweepy-2.1-py2.7.egg/tweepy/binder.py", line 197, in _call
return method.execute()
File "/usr/local/lib/python2.7/dist-packages/tweepy-2.1-py2.7.egg/tweepy/binder.py", line 154, in execute
raise TweepError('Failed to send request: %s' % e)
tweepy.error.TweepError: Failed to send request: [Errno -2] Name or service not known
artem#artem-VirtualBox:~$ python py/twi.py
Traceback (most recent call last):
File "py/twi.py", line 20, in <module>
api.update_status(message)
File "/usr/local/lib/python2.7/dist-packages/tweepy-2.1-py2.7.egg/tweepy/binder.py", line 197, in _call
return method.execute()
File "/usr/local/lib/python2.7/dist-packages/tweepy-2.1-py2.7.egg/tweepy/binder.py", line 173, in execute
raise TweepError(error_msg, resp)
tweepy.error.TweepError: [{'message': 'Status is a duplicate', 'code': 187}]