Using requests.adapters.HTTPAdapter for testing - python

I'm trying to test some requests codes by providing a mocked out TestAdapter my session object. Basically, my code looks like this:
URL = 'http://blahblahblah'
class TestAdapter(requests.adapters.HTTPAdapter):
def __init__(self, response):
self._response = response
super(TestAdapter, self).__init__()
def send(self, request, *args, **kwargs):
return self.build_response(request, self._response)
resp = urllib3\
.HTTPResponse(body=json.dumps({'results': results}), status=200,
headers={'content-type': 'application/json'})
adapter = TestAdapter(resp)
session = requests.Session()
session.mount(URL, adapter)
response = session.post(URL)
response.json()
However, this use raises an error from the depths of URLLib3:
Error
Traceback (most recent call last):
File "tests/test_web_cache_client.py", line 40, in test_valid_reponse
response.json()
File "/home/wilner/.virtualenvs/hub/local/lib/python2.7/site-packages/requests/models.py", line 778, in json
if not self.encoding and len(self.content) > 3:
File "/home/wilner/.virtualenvs/hub/local/lib/python2.7/site-packages/requests/models.py", line 724, in content
self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes()
File "/home/wilner/.virtualenvs/hub/local/lib/python2.7/site-packages/requests/models.py", line 653, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/home/wilner/.virtualenvs/hub/local/lib/python2.7/site-packages/urllib3/response.py", line 255, in stream
while not is_fp_closed(self._fp):
File "/home/wilner/.virtualenvs/hub/local/lib/python2.7/site-packages/urllib3/util/response.py", line 22, in is_fp_closed
raise ValueError("Unable to determine whether fp is closed.")
ValueError: Unable to determine whether fp is closed.
I could definitely find another way of testing this stuff, but this seems like it should work. What am I doing wrong?

Betamax is a library that does something very similar to what you want. The way I do it in betamax can be found here (and is reproduced below for posterity):
def add_urllib3_response(serialized, response):
if 'base64_string' in serialized['body']:
body = io.BytesIO(
base64.b64decode(serialized['body']['base64_string'].encode())
)
else:
body = body_io(**serialized['body'])
h = HTTPResponse(
body,
status=response.status_code,
headers=response.headers,
preload_content=False,
original_response=MockHTTPResponse(response.headers)
)
response.raw = h
The important thing here is that body (the first parameter) to HTTPResponse is a io.BytesIO object in every case. If you ensure you're using bytes, e.g., json.dumps({'results': results}).encode('utf-8') needs to be passed to io.BytesIO.

I've worked around this by setting requests.Response._content = requests.adapters.HTTPAdapter._body to avoid the erroneous fp check. I think this is a bug in the library.

Related

Python telegram bot's `get_chat_members_count` & avoiding flood limits or how to use wrappers and decorators

I'm checking a list of around 3000 telegram chats to get and retrieve the number of chat members in each chat using the get_chat_members_count method.
At some point I'm hitting a flood limit and getting temporarily banned by Telegram BOT.
Traceback (most recent call last):
File "C:\Users\alexa\Desktop\ico_icobench_2.py", line 194, in <module>
ico_tel_memb = bot.get_chat_members_count('#' + ico_tel_trim, timeout=60)
File "C:\Python36\lib\site-packages\telegram\bot.py", line 60, in decorator
result = func(self, *args, **kwargs)
File "C:\Python36\lib\site-packages\telegram\bot.py", line 2006, in get_chat_members_count
result = self._request.post(url, data, timeout=timeout)
File "C:\Python36\lib\site-packages\telegram\utils\request.py", line 278, in post
**urlopen_kwargs)
File "C:\Python36\lib\site-packages\telegram\utils\request.py", line 208, in _request_wrapper
message = self._parse(resp.data)
File "C:\Python36\lib\site-packages\telegram\utils\request.py", line 168, in _parse
raise RetryAfter(retry_after)
telegram.error.RetryAfter: Flood control exceeded. Retry in 85988 seconds
The python-telegram-bot wiki gives a detailed explanation and example on how to avoid flood limits here.
However, I'm struggling to implement their solution and I hope someone here has more knowledge of this than myself.
I have literally made a copy and paste of their example and can't get it to work no doubt because i'm new to python. I'm guessing I'm missing some definitions but I'm not sure which. Here is the code below and after that the first error I'm receiving. Obviously the TOKEN needs to be replaced with your token.
import telegram.bot
from telegram.ext import messagequeue as mq
class MQBot(telegram.bot.Bot):
'''A subclass of Bot which delegates send method handling to MQ'''
def __init__(self, *args, is_queued_def=True, mqueue=None, **kwargs):
super(MQBot, self).__init__(*args, **kwargs)
# below 2 attributes should be provided for decorator usage
self._is_messages_queued_default = is_queued_def
self._msg_queue = mqueue or mq.MessageQueue()
def __del__(self):
try:
self._msg_queue.stop()
except:
pass
super(MQBot, self).__del__()
#mq.queuedmessage
def send_message(self, *args, **kwargs):
'''Wrapped method would accept new `queued` and `isgroup`
OPTIONAL arguments'''
return super(MQBot, self).send_message(*args, **kwargs)
if __name__ == '__main__':
from telegram.ext import MessageHandler, Filters
import os
token = os.environ.get('TOKEN')
# for test purposes limit global throughput to 3 messages per 3 seconds
q = mq.MessageQueue(all_burst_limit=3, all_time_limit_ms=3000)
testbot = MQBot(token, mqueue=q)
upd = telegram.ext.updater.Updater(bot=testbot)
def reply(bot, update):
# tries to echo 10 msgs at once
chatid = update.message.chat_id
msgt = update.message.text
print(msgt, chatid)
for ix in range(10):
bot.send_message(chat_id=chatid, text='%s) %s' % (ix + 1, msgt))
hdl = MessageHandler(Filters.text, reply)
upd.dispatcher.add_handler(hdl)
upd.start_polling()
The first error I get is:
Traceback (most recent call last):
File "C:\Users\alexa\Desktop\z test.py", line 34, in <module>
testbot = MQBot(token, mqueue=q)
File "C:\Users\alexa\Desktop\z test.py", line 9, in __init__
super(MQBot, self).__init__(*args, **kwargs)
File "C:\Python36\lib\site-packages\telegram\bot.py", line 108, in __init__
self.token = self._validate_token(token)
File "C:\Python36\lib\site-packages\telegram\bot.py", line 129, in _validate_token
if any(x.isspace() for x in token):
TypeError: 'NoneType' object is not iterable
The second issue I have is how to use wrappers and decorators with get_chat_members_count.
The code I have added to the example is:
#mq.queuedmessage
def get_chat_members_count(self, *args, **kwargs):
return super(MQBot, self).get_chat_members_count(*args, **kwargs)
But nothing happens and I don't get my count of chat members. I'm also not saying which chat I need to count so not surprising I'm getting nothing back but where am I supposed to put the telegram chat id?
You are getting this error because MQBot receives an empty token. For some reason, it does not raise a descriptive exception but instead crashes unexpectedly.
So why token is empty? It seems that you are using os.environ.get incorrectly. The os.environ part is a dictionary and its' method get allows one to access dict's contents safely. According to docs:
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.
According to your question, in this part token = os.environ.get('TOKEN') you pass token itself as a key. Instead, you should've passed the name of the environmental variable which contains your token.
You can fix this either rewriting that part like this token = 'TOKEN' or by setting environmental variable correctly and accessing it from os.environ.get via correct name.

normally my api returns json, but sometimes returns a full response object?

All,
I have a script i have in place which fetches JSON off of a webserver. Simple as the following:
url = "foo.com/json"
response = requests.get(url).content
data = json.loads(response)
but i noticed is that sometimes instead of returning the JSON object, it will return what looks like a response dump. See here: https://pastebin.com/fUy5YMuY
What confuses me is to how to continue on.
Right now i took the above python and wrapped it
try:
url = "foo.com/json"
response = requests.get(url).content
data = json.loads(response)
except Exception as ex:
with open("test.txt", "w") as t:
t.write(response)
print("Error", sys.exc_info())
Is there a way to catch this? Right now I get a ValueError... and then reparse it? I was thinking to do something like:
except Exception as ex:
response = reparse(response)
but im still confused as to why it will sometimes return the JSON and other times, the header info + content.
def reparse(response):
"""
Catch the ValueError and attempt to reparse it for the json contnet
"""
Can i feed something like the pastebin dump into some sort of requests.Reponse class or similar?
Edit Here is the full stack trace I am getting.
File "scrape_people_by_fcc_docket.py", line 82, in main
json_data = get_page(limit, page*limit)
File "scrape_people_by_fcc_docket.py", line 13, in get_page
data = json.loads(response)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 369, in decode
raise ValueError(errmsg("Extra data", s, end, len(s)))
ValueError: Extra data: line 2 column 1 - line 16 column 367717 (char 3 - 368222)
None
In the above code, the response variable is defined by:
response = requests.get(url).content
which is odd because most of the time, reponse will return a JSON object which is completely parsable.
Ideally, I have been trying to find a way to, when content isnt JSON, pass some how parse it for the actual content and then continue on.
instead of using .text or .content you can use the response method: .json() which so far seems to resolve my issues. I am doing continual testing and watching for errors and will update this as needed, but it seems that the json function will return the data i need without headers, and similarly already calls json.loads or similar to parse the information.

urlib.request.urlopen not accepting query string with spaces

I am taking a udacity course on python where we are supposed to check for profane words in a document. I am using the website http://www.wdylike.appspot.com/?q= (text_to_be_checked_for_profanity). The text to be checked can be passed as a query string in the above URL and the website would return a true or false after checking for profane words. Below is my code.
import urllib.request
# Read the content from a document
def read_content():
quotes = open("movie_quotes.txt")
content = quotes.read()
quotes.close()
check_profanity(content)
def check_profanity(text_to_read):
connection = urllib.request.urlopen("http://www.wdylike.appspot.com/?q="+text_to_read)
result = connection.read()
print(result)
connection.close
read_content()
It gives me the following error
Traceback (most recent call last):
File "/Users/Vrushita/Desktop/Rishit/profanity_check.py", line 21, in <module>
read_content()
File "/Users/Vrushita/Desktop/Rishit/profanity_check.py", line 11, in read_content
check_profanity(content)
File "/Users/Vrushita/Desktop/Rishit/profanity_check.py", line 16, in check_profanity
connection = urllib.request.urlopen("http://www.wdylike.appspot.com/?q="+text_to_read)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 472, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 582, in http_response
'http', request, response, code, msg, hdrs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 510, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 444, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 590, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 400: Bad Request
The document that I am trying to read the content from contains a string "Hello world" However, if I change the string to "Hello+world", the same code works and returns the desired result. Can someone explain why this is happening and what is a workaround for this?
urllib accepts it, the server doesn't. And well it should not, because a space is not a valid URL character.
Escape your query string properly with urllib.parse.quote_plus(); it'll ensure your string is valid for use in query parameters. Or better still, use the urllib.parse.urlencode() function to encode all key-value pairs:
from urllib.parse import urlencode
params = urlencode({'q': text_to_read})
connection = urllib.request.urlopen(f"http://www.wdylike.appspot.com/?{params}")
The below response is for python 3.*
400 Bad request occurs when there is space within your input text.
To avoid this use parse.
so import it.
from urllib import request, parse
If you are sending any text along with the url then parse the text.
url = "http://www.wdylike.appspot.com/?q="
url = url + parse.quote(input_to_check)
Check the explanation here - https://discussions.udacity.com/t/problem-in-profanity-with-python-3-solved/227328
The Udacity profanity checker program -
from urllib import request, parse
def read_file():
fhand = open(r"E:\Python_Programming\Udacity\movie_quotes.txt")
file_content = fhand.read()
#print (file_content)
fhand.close()
profanity_check(file_content)
def profanity_check(input_to_check):
url = "http://www.wdylike.appspot.com/?q="
url = url + parse.quote(input_to_check)
req = request.urlopen(url)
answer = req.read()
#print(answer)
req.close()
if b"true" in answer:
print ("Profanity Alret!!!")
else:
print ("Nothing to worry")
read_file()
I think this code is closer to what the Lesson was aiming to, inferencing the difference between native functions, classes and functions inside classes:
from urllib import request, parse
def read_text():
quotes = open('C:/Users/Alejandro/Desktop/movie_quotes.txt', 'r+')
contents_of_file = quotes.read()
print(contents_of_file)
check_profanity(contents_of_file)
quotes.close()
def check_profanity(text_to_check):
connection = request.urlopen('http://www.wdylike.appspot.com/?q=' + parse.quote(text_to_check))
output = connection.read()
# print(output)
connection.close()
if b"true" in output:
print("Profanity Alert!!!")
elif b"false" in output:
print("This document has no curse words!")
else:
print("Could not scan the document properly")
read_text()
I'm working on the same project also using Python 3 like the most.
While looking for the solution in Python 3, I found this HowTo, and I decided to give it a try.
It seems that on some websites, including Google, connections through programming code (for example, via the urllib module), sometimes does not work properly. Apparently this has to do with the User Agent, which is recieved by the website when building the connection.
I did some further researches and came up with the following solution:
First I imported URLopener from urllib.request and created a class called ForceOpen as a subclass of URLopener.
Now I could create a "regular" User Agent by setting the variable version inside the ForceOpen class. Then just created an instance of it and used the open method in place of urlopen to open the URL.
(It works fine, but I'd still appreciate comments, suggestions or any feedback, also because I'm not absolute sure, if this way is a good alternative - many thanks)
from urllib.request import URLopener
class ForceOpen(URLopener): # create a subclass of URLopener
version = "Mozilla/5.0 (cmp; Konqueror ...)(Kubuntu)"
force_open = ForceOpen() # create an instance of it
def read_text():
quotes = open(
"/.../profanity_editor/data/quotes.txt"
)
contents_of_file = quotes.read()
print(contents_of_file)
quotes.close()
check_profanity(contents_of_file)
def check_profanity(text_to_check):
# now use the open method to open the URL
connection = force_open.open(
"http://www.wdylike.appspot.com/?q=" + text_to_check
)
output = connection.read()
connection.close()
if b"true" in output:
print("Attention! Curse word(s) have been detected.")
elif b"false" in output:
print("No curse word(s) found.")
else:
print("Error! Unable to scan document.")
read_text()

problems getting data from FlightRadar24 with urllib2

I'm trying to get data from FlightRadar24 using the script below, based on this answer to handle cookies. When I currently type that url into a browser, I get a nice long json or dictionary including a list of lat/long/alt updates. But when I try the code below, I get the error message listed below.
What do I need to do to successfully read the json into python?
NOTE: that link may stop working in a week or two - they don't make the data available forever.
import urllib2
import cookielib
jar = cookielib.FileCookieJar("cookies")
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
url = "http://lhr.data.fr24.com/_external/planedata_json.1.3.php?f=72c5ef5"
response = opener.open(url)
print response.headers
print "Got page"
print "Currently have %d cookies" % len(jar)
print jar
Traceback (most recent call last):
File "[mypath]/test v00.py", line 8, in
response = opener.open(link)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 410, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 448, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 403: Forbidden
I am not sure what you need cookies for, but the issue is that the webserver is blocking access to the user-agent being sent by urllib in the request header (which is something like - 'Python-urllib/2.7' or so) .
You should add a valid browser User-agent to the header to get the correct data. Example -
import urllib2
url = "http://lhr.data.fr24.com/_external/planedata_json.1.3.php?f=72c5ef5"
req = urllib2.Request(url, headers={"Connection":"keep-alive", "User-Agent":"Mozilla/5.0"})
response = urllib2.urlopen(req)
jsondata = response.read()
The first Answer by #AnandSKumar is the accepted answer but here are a few more lines that are helpful, since the jsondata = response.read() returns a string.
NOTE: that link may stop working in a week or two - they don't make the data available forever.
import urllib2
import json
import numpy as np
import matplotlib.pyplot as plt
# FROM this question: https://stackoverflow.com/a/32163003
# and THIS ANSWER: https://stackoverflow.com/a/32163003/3904031
# and a little from here: https://stackoverflow.com/a/6826511
url = "http://lhr.data.fr24.com/_external/planedata_json.1.3.php?f=72c5ef5"
req = urllib2.Request(url, headers={"Connection":"keep-alive", "User-Agent":"Mozilla/5.0"})
response = urllib2.urlopen(req)
the_dict = json.loads(response.read())
trail = the_dict['trail']
trailarray = np.array(trail)
s0, s1 = len(trailarray)/3, 3
lat, lon, alt = trailarray[:s0*s1].reshape(s0,s1).T
alt *= 10. # they drop the last zero
# plot raw data of the trail. Note there are gaps - no time information here
plt.figure()
plt.subplot(2,2,1)
plt.plot(lat)
plt.hold
plt.plot(lon)
plt.title('raw lat lon')
plt.subplot(2,2,3)
plt.plot(alt)
plt.title('raw alt')
plt.subplot(1,2,2)
plt.plot(lon, lat)
plt.title('raw lat vs lon')
plt.text(-40, 46, "this segment is")
plt.text(-40, 45.5, "transatlantic")
plt.text(-40, 45, "gap in data")
plt.savefig('raw lat lon alt')
plt.show()
To convert the time and date info to human form:
def humanize(seconds_since_epoch):
""" from https://stackoverflow.com/a/15953715/3904031 """
return datetime.datetime.fromtimestamp(seconds_since_epoch).strftime('%Y-%m-%d %H:%M:%S')
import datetime
humanize(the_dict['arrival'])
returns
'2015-08-20 17:43:50'

python argument taking 3 arguments? Where?

I'm working with the google safebrowsing api, and the following code:
def getlist(self, type):
dlurl = "safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=" + api_key + "&appver=1.0&pver=2.2"
phish = "googpub-phish-shavar"
mal = "goog-malware-shavar"
self.type = type
if self.type == "phish":
req = urllib.urlopen(dlurl, phish )
data = req.read()
print(data)
Produces the following trace back:
File "./test.py", line 39, in getlist
req = urllib.urlopen(dlurl, phish )
File "/usr/lib/python2.6/urllib.py", line 88, in urlopen
return opener.open(url, data)
File "/usr/lib/python2.6/urllib.py", line 209, in open
return getattr(self, name)(url, data)
TypeError: open_file() takes exactly 2 arguments (3 given)
What am I doing wrong here? I cant spot where 3 arguments are being passed.
BTW, I'm calling this with
x = class()
x.getlist("phish")
Basically, you didn't supply a method in the url, so Python assumed it was a file URL, and tried to open it as a file--which doesn't work (and throws a confusing error message in the process of failing).
Try:
dlurl = "http://safebrowsing.clients.google.com/safebrowsing/downloads?client=api&apikey=" + api_key + "&appver=1.0&pver=2.2"
The function urllib.urlopen opens a network object denoted by a URL for reading. If the URL does not have a scheme identifier, it opens a file.
The appropriate opener is called at line 88 which leads to opener open_file at 209.
If you look at the function:
def open_file(self, url):
"""Use local file or FTP depending on form of URL."""
Answer: you should be providing a scheme like http://...

Categories