I am using Twython to capture a stream of tweets from a group of users. I worked for an hour or so quite well (just a few tweets) and then crashed with an HTTP error IncompleteRead. I saw this discussed in a few posts but never resolved.
Is there any way to capture this error so it does not crash the program?
File "C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py", line
331, in _error_catcher
yield
File "C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py",
line 640, in read_chunked
chunk = self._handle_chunk(amt)
File "C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py",
line 586, in _handle_chunk
value = self._fp._safe_read(amt)
File "C:\ProgramData\Anaconda3\lib\http\client.py", line 612, in
_safe_read
raise IncompleteRead(b''.join(s), amt)
http.client.IncompleteRead: IncompleteRead(0 bytes read, 1 more expected)
My code is simple and I see no other options to trap these errors.
from twython import TwythonStreamer
CONSUMER_KEY = '...'
CONSUMER_SECRET = '...'
# Access:
ACCESS_TOKEN = '...'
ACCESS_SECRET = '....'
class MyStreamer(TwythonStreamer):
def on_success(self, data):
if 'text' in data:
if not data['text'].startswith('RT') and not
data['text'].startswith('#'):
print(data['text'])
def on_error(self, status_code, data):
print(status_code)
self.disconnect()
stream = MyStreamer(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN,
ACCESS_SECRET)
# follow is watching for tweets from a user or list of users
users = [25073877, 19905457,1058764970010308611,251918778]
stream.statuses.filter(follow=users, language = 'en')
Related
I am trying to catch the exception which is raised when the connection is reset from the peer during the real-time streaming of tweet, but seems the try-exception block is not properly catching the error raised and pass through it. Please advise, if the block is not rightly placed in the code or there is something wrong with the code.
I have created a script that will stream the tweet in real time to an excel file. Lot of times it has happened that streaming got disconnected due to ECONNRESET error which is connection reset by peer -
Exception in thread Thread-1:
Traceback (most recent call last):
File “/usr/lib/python2.7/threading.py”, line 801, in __bootstrap_inner
self.run()
File “/usr/lib/python2.7/threading.py”, line 754, in run
self.__target(*self.__args, **self.__kwargs)
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 297, in _run
six.reraise(*exc_info)
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 266, in _run
self._read_loop(resp)
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 316, in _read_loop
line = buf.read_line().strip()
File “/usr/local/lib/python2.7/dist-packages/tweepy/streaming.py”, line 181, in read_line
self._buffer += self._stream.read(self._chunk_size)
File “/usr/local/lib/python2.7/dist-packages/urllib3/response.py”, line 430, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File “/usr/lib/python2.7/contextlib.py”, line 35, in exit
self.gen.throw(type, value, traceback)
File “/usr/local/lib/python2.7/dist-packages/urllib3/response.py”, line 349, in _error_catcher
raise ProtocolError(‘Connection broken: %r’ % e, e)
ProtocolError: (‘Connection broken: error("(104, ‘ECONNRESET’)",)’, error("(104, ‘ECONNRESET’)",))
Its a protocol error and i tried to catch this error by importing urllib3 library as it has protocol exceptions, but the try and exception block is not able to suppress it and continue with the streaming.
import pandas as pd
import csv
from bs4 import BeautifulSoup
import re
import tweepy
import ast
from datetime import datetime
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
from unidecode import unidecode
from urllib3.exceptions import ProtocolError
from urllib3.exceptions import IncompleteRead
import requests
consumer_key= 'xxxxxxxxx'
consumer_secret= 'xxxxxxxxx'
access_token= 'xxxxxxxxx'
access_token_secret= 'xxxxxxxxx'
with open('TEST_FEB.csv','w')as f:
f.truncate()
f.close()
class listener(StreamListener):
def on_data(self,data):
data1 = json.loads(data)
time = data1["created_at"]
if hasattr(data1,"retweeted_status:"):
tweet = unidecode(data1["tweet"]["text"])
if data1["truncated"] == "true":
tweet = unidecode(data1["extended_tweet"]["full_text"])
else:
tweet = unidecode(data1["text"])
tweet1 = BeautifulSoup(tweet, "lxml").get_text()
url = "https://twitter.com/{}/status/{}".format(data1["user"]
["screen_name"], data1["id_str"])
file = open('TEST_FEB.csv', 'a')
csv_writer = csv.writer(file)
csv_writer.writerow([time, tweet1, url])
file.close()
def on_limit(self, track):
return True
auth = OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)
while True:
try:
twitterStream = Stream(auth, listener(),
wait_on_rate_limit=True, retry_count=10, stall_warnings=True)
twitterStream.filter(track=["abcd"], async = True)
except ProtocolError as error:
print (str(error))
continue
except IncompleteRead as IR:
print (str(IR))
continue
The expected result is that whenever the connection is reset from the peer and the said error is raised, the code should suppress it and continue with the streaming. The code in the current form is not working that way.
I'm using below code to streaming tweets and analyse them for making decisions. while running the below code I got an error. that error occurs twitter users those who had the friend list of more than 50.
import re
import tweepy
import sys
import time
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
users = tweepy.Cursor(api.friends, screen_name='#myuser').items()
while True:
try:
user = next(users)
except tweepy.TweepError:
time.sleep(60*15)
user = next(users)
except StopIteration:
break
for status in tweepy.Cursor(api.user_timeline,screen_name=user.screen_name,result_type='recent').items(5):
text=status._json['text'].translate(non_bmp_map)
print (user.screen_name + ' >>>>>> '+text)
while executing this script I have got an error as below.
Traceback (most recent call last):
File "D:sensitive2demo.py", line 31, in <module>
for status in tweepy.Cursor(api.user_timeline,screen_name=user.screen_name,result_type='recent').items(5):
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\cursor.py", line 49, in __next__
return self.next()
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\cursor.py", line 197, in next
self.current_page = self.page_iterator.next()
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\cursor.py", line 108, in next
data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\binder.py", line 250, in _call
return method.execute()
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tweepy-3.6.0-py3.6.egg\tweepy\binder.py", line 234, in execute
raise TweepError(error_msg, resp, api_code=api_error_code)
tweepy.error.TweepError: Twitter error response: status code = 401
I have googled a lot.but nothing worked. Can somebody help me to solve the problem?
401 is an http status code for 'Unauthorized'. I would suggest verifying your credentials.
I have an appliance that uses REST APIs to set/show information, and uses OAuth 2.0.
I have following Python code. The authentication works fine, but the POST commands are causing a crash.
import json
from sanction import Client
# The URL for access token requests
token_request_url = "https://192.168.42.5/oauth/token"
# Client name and secret
client_id = "test"
client_secret = "Paswordgoeshere"
# The base URL of the resource server
resource_base = "https://192.168.42.5/"
# Create the OAuth2 client object
client = Client(token_endpoint = token_request_url,
resource_endpoint = resource_base,
client_id = client_id,
client_secret = client_secret)
# Request the access token into the client object
client.request_token(grant_type = 'client_credentials')
# Example GET request to show DNS servers
print "DNS Info"
print client.request('/rest/dns')
print ""
print ""
# Example POST request to PUT DNS servers
data = {"ip": "8.8.8.8"}
print json.dumps(data)
#data_json = json.dumps(data)
client.request('/rest/dns', data=json.dumps(data), headers={'content-type': 'application/json'})
#Crashing before the next line
print "New DNS Info"
print client.request('/rest/dns')
This works, the client.request line does add the DNS entry I need, but the line also crashes output:
python example.py
DNS Info
[]
{"ip": "8.8.8.8"}
Traceback (most recent call last):
File "example.py", line 36, in <module>
client.request('/rest/dns', data=json.dumps(data), headers={'content-type': 'application/json'})
File "/usr/local/lib/python2.7/dist-packages/sanction/__init__.py", line 170, in request
'utf-8'))
File "/usr/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 384, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
I'm using GoogleAppEngine to try to make a simple app that requires logging in with Twitter. My problem is that when I redirect the user to some type of "confirmation" page- /profile in my case- I'm getting a badkey error because I saved the users authentication data into my db.Model.
import logging
import webapp2
from google.appengine.ext import db
from google.appengine.ext.webapp import template
import urlparse
import oauth2 as oauth
consumer_key='MY_CONSUMER_KEY'
consumer_secret='SECRET_KEY'
request_token_url='https://api.twitter.com/oauth/request_token'
access_token_url='https://api.twitter.com/oauth/access_token'
authorize_url='https://api.twitter.com/oauth/authorize'
consumer=oauth.Consumer(consumer_key,consumer_secret)
client = oauth.Client(consumer)
screenname = ''
class Profile(db.Model):
twitter_id = db.StringProperty()
access_token = db.StringProperty()
access_token_secret = db.StringProperty()
twitter_user_name = db.StringProperty()
class MainHandler(webapp2.RequestHandler):
def get(self):
self.response.out.write(template.render("login.html", {}))
class SignInWithTwitter(webapp2.RequestHandler):
def get(self):
resp, content = client.request(request_token_url, "GET")
self.request_token = dict(urlparse.parse_qsl(content))
self.redirect((authorize_url +'?oauth_token='+ self.request_token['oauth_token']))
class ProfilePage(webapp2.RequestHandler):
def get(self):
logging.info("tst" + screenname)
self.profile = Profile.get(screenname)
self.response.out.write("<h1> Hello " + self.profile.screenname +"<h1>")
class AuthorizeTwitter(webapp2.RequestHandler):
def get(self):
oauth_verifier = self.request.get("oauth_verifier")
token = oauth.Token(self.request.get('oauth_token'), self.request.get('oauth_token_secret'))
token.set_verifier(oauth_verifier)
client = oauth.Client(consumer, token)
resp, content = client.request(access_token_url, "POST")
access_token = dict(urlparse.parse_qsl(content))
oauth_token = access_token['oauth_token']
oauth_token_secret = access_token['oauth_token_secret']
userid = access_token['user_id']
global screenname
screenname = access_token['screen_name']
logging.info(screenname)
profile = Profile.get_by_key_name(screenname)
if profile is None:
profile = Profile(key_name = screenname)
profile.twitter_id = userid
profile.access_token = oauth_token
profile.access_token_secret = oauth_token_secret
profile.twitter_user_name = screenname
profile.save()
self.redirect("/profile")
application = webapp2.WSGIApplication([
('/', MainHandler),
('/signin', SignInWithTwitter),
('/services/twitter/authorized', AuthorizeTwitter),
('/profile', ProfilePage),
], debug=True)
login.html is simply a button that redirects you to /signin
The error I'm getting is this:
BadKeyError: Invalid string key TwitterUserNameHere===. Details: Incorrect padding
Also is there a way I can get the current profile that is logged in once I redirect my user to /profile? Or will I need to keep accessing my database using my global screenname variable
EDIT
Traceback (most recent call last):
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1535, in __call__
rv = self.handle_exception(request, response, e)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1529, in __call__
rv = self.router.dispatch(request, response)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1278, in default_dispatcher
return route.handler_adapter(request, response)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1102, in __call__
return handler.dispatch()
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 572, in dispatch
return self.handle_exception(e, self.app.debug)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/base/data/home/apps/s~howaggieru/1.371718316886323623/main.py", line 56, in get
self.profile = Profile.get(screenname)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/db/__init__.py", line 1238, in get
results = get(keys, **kwargs)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/db/__init__.py", line 1533, in get
return get_async(keys, **kwargs).get_result()
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/db/__init__.py", line 1492, in get_async
keys, multiple = datastore.NormalizeAndTypeCheckKeys(keys)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/datastore.py", line 178, in NormalizeAndTypeCheckKeys
keys = [_GetCompleteKeyOrError(key) for key in keys]
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/datastore.py", line 2782, in _GetCompleteKeyOrError
key = Key(arg)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/datastore_types.py", line 371, in __init__
'Invalid string key %s. Details: %s' % (encoded, e))
BadKeyError: Invalid string key TwitterUsername===. Details: Incorrect padding
The traceback shows that your error is happening in ProfilePage. Now, in AuthorizeTwitter you are correctly doing profile = Profile.get_by_key_name(screenname) - but in ProfilePage you do profile = Profile.get(screenname). That assumes that "screenname" is the entire string key, which it isn't, hence the error. Use get_by_keyname there too.
Saving the screenname as a module-level global will lead you into all sorts of problems once you have more than one user at a time. Instead you should pass it around as a URL parameter: you're redirecting from the authorize handler to the profile one, so you should pass the twitter handle as a URL parameter to the profile function. The webapp2 docs show how to do that.
I have the same piece of coding to deal with Twitter User Stream running on two different machines. Both machines are Ubuntu Lucid using python 2.6.5, but on the machine in my home I receive HTTP Error 401: Unauthorized while on the university it works perfectly. On both machines it works perfectly when I use curl with the same parameters, i.e., consumer key, consumer secret, acces token, and access key.
See the code bellow, it was created by Josh Sharp
from oauth.oauth import OAuthRequest, OAuthSignatureMethod_HMAC_SHA1
from hashlib import md5
import json, time
import random, math, re, urllib, urllib2
STREAM_URL = "https://userstream.twitter.com/2/user.json"
class Token(object):
def __init__(self,key,secret):
self.key = key
self.secret = secret
def _generate_nonce(self):
random_number = ''.join(str(random.randint(0, 9)) for i in range(40))
m = md5(str(time.time()) + str(random_number))
return m.hexdigest()
CONSUMER_KEY = 'consumer_key'
CONSUMER_SECRET = 'consumer_secret'
ACCESS_TOKEN = 'token'
ACCESS_TOKEN_SECRET = 'token_secret'
access_token = Token(ACCESS_TOKEN,ACCESS_TOKEN_SECRET)
consumer = Token(CONSUMER_KEY,CONSUMER_SECRET)
parameters = {
'oauth_consumer_key': CONSUMER_KEY,
'oauth_token': access_token.key,
'oauth_signature_method': 'HMAC-SHA1',
'oauth_timestamp': str(int(time.time())),
'oauth_nonce': access_token._generate_nonce(),
'oauth_version': '1.0',
}
oauth_request = OAuthRequest.from_token_and_callback(access_token,
http_url=STREAM_URL,
parameters=parameters)
signature_method = OAuthSignatureMethod_HMAC_SHA1()
signature = signature_method.build_signature(oauth_request, consumer, access_token)
parameters['oauth_signature'] = signature
data = urllib.urlencode(parameters)
req = urllib2.urlopen("%s?%s" % (STREAM_URL,data))
buffer = ''
# We're using urllib2 to avoid external dependencies
# even though pyCurl actually handles the callbacks
# much more gracefully than this clumsy method.
# We read a byte at a time until we find a newline
# which indicates the end of a chunk.
while True:
chunk = req.read(1)
if not chunk:
print buffer
break
chunk = unicode(chunk)
buffer += chunk
tweets = buffer.split("\n",1)
if len(tweets) > 1:
print tweets[0]
buffer = tweets[1]
The error when I try to execute in home is:
File "py_stream.py", line 48, in <module>
req = urllib2.urlopen("%s?%s" % (STREAM_URL,data))
File "/usr/lib/python2.6/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.6/urllib2.py", line 397, in open
response = meth(req, response)
File "/usr/lib/python2.6/urllib2.py", line 510, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.6/urllib2.py", line 435, in error
return self._call_chain(*args)
File "/usr/lib/python2.6/urllib2.py", line 369, in _call_chain
result = func(*args)
File "/usr/lib/python2.6/urllib2.py", line 518, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 401: Unauthorized
Since it works using curl on both machines, I suppose nothing is wrong related with the SSL certification. But at the same time it makes me wonder what fails when I use it in home.
After many weeks trying to find what was the problem, I discovered that the clock was not well synced with the one responsible for the Twitter Stream. And therefore, Twitter returns 401: Unauthorized.
If you are using Ubuntu, you can solve this problem using ntpdate as following:
sudo ntpdate ntp.ubuntu.com