Python requests: URL base in Session - python

When using a Session, it seems you need to provide the full URL each time, e.g.
session = requests.Session()
session.get('http://myserver/getstuff')
session.get('http://myserver/getstuff2')
This gets a little tedious. Is there a way to do something like:
session = requests.Session(url_base='http://myserver')
session.get('/getstuff')
session.get('/getstuff2')

This feature has been asked on the forums a few times 1, 2, 3. The preferred approach as documented here, is subclassing, as follows:
from requests import Session
from urllib.parse import urljoin
class LiveServerSession(Session):
def __init__(self, base_url=None):
super().__init__()
self.base_url = base_url
def request(self, method, url, *args, **kwargs):
joined_url = urljoin(self.base_url, url)
return super().request(method, joined_url, *args, **kwargs)
You would use this simply as follows:
baseUrl = 'http://api.twitter.com'
with LiveServerSession(baseUrl) as s:
resp = s.get('/1/statuses/home_timeline.json')

requests_toolbelt.sessions.BaseUrlSession
https://github.com/requests/toolbelt/blob/f5c86c51e0a01fbc8b3b4e1c286fd5c7cb3aacfa/requests_toolbelt/sessions.py#L6
NOTE: This uses urljoin from standard lib. Beware of urljoin's behavior.
In [14]: from urlparse import urljoin
In [15]: urljoin('https://localhost/api', '/resource')
Out[15]: 'https://localhost/resource'
In [16]: urljoin('https://localhost/api', 'resource')
Out[16]: 'https://localhost/resource'
In [17]: urljoin('https://localhost/api/', '/resource')
Out[17]: 'https://localhost/resource'
In [18]: urljoin('https://localhost/api/', 'resource')
Out[18]: 'https://localhost/api/resource'
OR
import requests
from functools import partial
def PrefixUrlSession(prefix=None):
if prefix is None:
prefix = ""
else:
prefix = prefix.rstrip('/') + '/'
def new_request(prefix, f, method, url, *args, **kwargs):
return f(method, prefix + url, *args, **kwargs)
s = requests.Session()
s.request = partial(new_request, prefix, s.request)
return s

You could just subclass request.Session and overload its __init__ and request methods like this:
# my_requests.py
import requests
class SessionWithUrlBase(requests.Session):
# In Python 3 you could place `url_base` after `*args`, but not in Python 2.
def __init__(self, url_base=None, *args, **kwargs):
super(SessionWithUrlBase, self).__init__(*args, **kwargs)
self.url_base = url_base
def request(self, method, url, **kwargs):
# Next line of code is here for example purposes only.
# You really shouldn't just use string concatenation here,
# take a look at urllib.parse.urljoin instead.
modified_url = self.url_base + url
return super(SessionWithUrlBase, self).request(method, modified_url, **kwargs)
And then you could use your subclass instead of requests.Session in your code:
from my_requests import SessionWithUrlBase
session = SessionWithUrlBase(url_base='https://stackoverflow.com/')
session.get('documentation') # https://stackoverflow.com/documentation
Also you could monkey-patch requests.Session to avoid modifying existing codebase (this implementation should be 100% compatible), but be sure to do actual patching before any code calls requests.Session():
# monkey_patch.py
import requests
class SessionWithUrlBase(requests.Session):
...
requests.Session = SessionWithUrlBase
And then:
# main.py
import requests
import monkey_patch
session = requests.Session()
repr(session) # <monkey_patch.SessionWithUrlBase object at ...>

I don't see a built-in way to do this, but you can use wrapper functions to add the functionality you want:
from functools import wraps
import inspect
import requests
from requests.compat import urljoin
def _base_url(func, base):
'''Decorator for adding a base URL to func's url parameter'''
#wraps(func)
def wrapper(*args, **kwargs):
argname = 'url'
argspec = inspect.getargspec(func)
if argname in kwargs:
kwargs[argname] = urljoin(base, kwargs[argname])
else:
# Find and replace url parameter in positional args. The argspec
# includes self while args doesn't, so indexes have to be shifted
# over one
for i, name in enumerate(argspec[0]):
if name == argname:
args = list(args)
args[i-1] = urljoin(base, args[i-1])
break
return func(*args, **kwargs)
return wrapper
def inject_base_url(func):
'''Decorator for adding a base URL to all methods that take a url param'''
#wraps(func)
def wrapper(*args, **kwargs):
argname = 'base_url'
if argname in kwargs:
obj = args[0]
# Add base_url decorator to all methods that have a url parameter
for name, method in inspect.getmembers(obj, inspect.ismethod):
argspec = inspect.getargspec(method.__func__)
if 'url' in argspec[0]:
setattr(obj, name, _base_url(method, kwargs[argname]))
del kwargs[argname]
return func(*args, **kwargs)
return wrapper
# Wrap requests.Session.__init__ so it takes a base_url parameter
setattr(
requests.Session,
'__init__',
inject_base_url(getattr(requests.Session, '__init__'))
)
Now you can specify a base URL when you construct a new requests.Session object:
s = requests.Session(base_url='http://stackoverflow.com')
s.get('questions') # http://stackoverflow.com/questions
s.post('documentation') # http://stackoverflow.com/documentation
# With no base_url, you get the default behavior
s = requests.Session()
s.get('http://google.com')

keep it simple and use builtin methods for joining (no '/' suffix hassle):
import urllib.parse
session = requests.Session()
session.my_base_url_join = lambda path: urllib.parse.urljoin(str_BASE_URL, path)
# use: session.get(session.my_base_url_join(path='/message'))

This is possible with the requests-toolbelt package, which enables setting a base_url at the session level:
from requests_toolbelt import sessions
s = sessions.BaseUrlSession(
base_url='https://example.com/resource/')
r = s.get('sub-resource/', params={'foo': 'bar'})
>>> print(r.request.url)
https://example.com/resource/sub-resource/?foo=bar
see documentation for BaseUrlSession.

Based on #qrtLs answer, here is a 3-4 line version that does what you want (assuming you don't need to create multiple sessions and only need the get method defined).
import requests
import urllib.parse
session = requests.Session()
session.base_url = "https://google.com/"
session.get = lambda *args, **kwargs: requests.Session.get(session, urllib.parse.urljoin(s.base_url, args[0]), *args[1:], **kwargs)
r = session.get("/search?q=asdf", verify=False)

Related

Count number of retries for each request

I use package requests together with urllib3.util.retry.Retry() to send tens of thousands of queries. I seek to count the number of queries and the number of necessary attempts until I successfully retrieve the desired data. My goal is to construct a measure for the reliability of the API.
To fix ideas, let's assume that the Response object of requests contains this data:
from requests import Session
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
def create_session():
session = Session()
retries = Retry(
total = 15,
backoff_factor = 0.5,
status_forcelist = [401, 408, 429, 500, 502, 504],
allowed_methods = frozenset(["GET"])
)
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
urls = ['https://httpbin.org/status/500']
count_queries = len(urls)
count_attempts = 0
with create_session() as s:
for url in urls:
response = s.get(url)
count_attempts += response.total_retries
Since there is no such variable, I am looking for alternatives to count the total number of retries.
While I am unable to identify an approach to this problem, I made the following observations during my search which is potentially helpful:
urllib3 stores the retry-history in the Retry object. The urllib3.HTTPResponse stores the last Retry object (docs). The urllib3.HTTPResponse (to be precise, its undecoded body) is stored in requests.Response.raw, however only when stream=True (docs). In my understanding, I can't access this data.
One user provides a solution to a similar question that subclasses the Retry class. Essentially, a callback function is called which prints a string to a logger. This could be adapted to increment a counter instead of printing to logs. However, if possible, I prefer to track the retries specific to a particular get, as shown above, as opposed to all gets using the same session.
A very similar question was asked here, however no (working) solution was provided.
I'm using Python 3.9, urllib3 1.26.8, requests 2.26.0.
This is a rather verbose solution along the lines of this answer. It counts requests and retries on the session level (which, however, was not my preferred approach).
import requests
from urllib3.util.retry import Retry
class RequestTracker:
""" track queries and retries """
def __init__(self):
self._retries = 0
self._queries = 0
def register_retry(self):
self._retries += 1
def register_query(self):
self._queries += 1
#property
def retries(self):
return self._retries
#property
def queries(self):
return self._queries
class RetryTracker(Retry):
""" subclass Retry to track count of retries """
def __init__(self, *args, **kwargs):
self._request_tracker = kwargs.pop('request_tracker', None)
super(RetryTracker, self).__init__(*args, **kwargs)
def new(self, **kw):
""" pass additional information when creating new Retry instance """
kw['request_tracker'] = self._request_tracker
return super(RetryTracker, self).new(**kw)
def increment(self, method, url, *args, **kwargs):
""" register retry attempt when new Retry object with incremented counter is returned """
if self._request_tracker:
self._request_tracker.register_retry()
return super(RetryTracker, self).increment(method, url, *args, **kwargs)
class RetrySession(requests.Session):
""" subclass Session to track count of queries """
def __init__(self, retry):
super().__init__()
self._requests_count = retry
def prepare_request(self, request):
""" increment query counter """
# increment requests counter
self._requests_count.register_query()
return super().prepare_request(request)
class RequestManager:
""" manage requests """
def __init__(self, request_tracker=None):
# session settings
self.__session = None
self.__request_tracker = request_tracker
# retry logic specification
args = dict(
total = 11,
backoff_factor = 1,
status_forcelist = [401,408, 429, 500, 502, 504],
allowed_methods = frozenset(["GET"])
)
if self.__request_tracker is not None:
args['request_tracker'] = self.__request_tracker
self.__retries = RetryTracker(**args)
else:
self.__retries = Retry(**args)
#property
def session(self):
if self.__session is None:
# create new session
if self.__request_tracker is not None:
self.__session = RetrySession(self.__request_tracker)
else:
self.__session = requests.Session()
# mount https adapter with retry logic
https = requests.adapters.HTTPAdapter(max_retries=self.__retries)
self.__session.mount('https://', https)
return self.__session
#session.setter
def session(self, value):
raise AttributeError('Setting session attribute is prohibited.')
request_tracker = RequestTracker()
request_manager = RequestManager(request_tracker=request_tracker)
session = request_manager.session
urls = ['https://httpbin.org/status/500']
with session as s:
for url in urls:
response = s.get(url)
print(request_tracker.queries)
print(request_tracker.retries)

Getting type error in python

I am using a class based service in python and I get error whenever I want to use it. Unable to figure out the reason.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from xml.dom import minidom
from pysimplesoap.client import SoapClient
from pysimplesoap.helpers import sort_dict
MEDIA_ROOT = '/User/sunand/documents/resumes/'
parser = ResumeParser()
names = parser.get_names(MEDIA_ROOT)
print names
class ParserClient(SoapClient):
""" Extends the soap client to encode the response with utf-8 encoding.
"""
def wsdl_call(
self,
method,
*args,
**kwargs
):
""" Override wsdl_call method to make sure unmarshall is not called.
"""
operation = self.get_operation(method)
# get i/o type declarations:
inp = operation['input']
header = operation.get('header')
if 'action' in operation:
self.action = operation['action']
# construct header and parameters
if header:
self.__call_headers = sort_dict(header, self.__headers)
(method, params) = self.wsdl_call_get_params(method, inp,
*args, **kwargs)
response = self.call(method, *params)
return response
def send(self, method, xml):
""" Overrides the send method to get the actual xml content.
"""
content = super(ParserClient, self).send(method, xml)
self.result = content
return content
class ResumeParser(object):
""" Connects to the Resume Parser's XML api to get parsed data.
"""
def __init__(self, simple=True, timeout=60):
""" Initializes the ResumeParser class.
"""
self.wsdl = \
'http://jobsite.onlineresumeparser.com/rPlusParseResume.asmx?WSDL'
self.secret = 'my-secret-key' # Enter key here
self.encoding = 'base64'
self.simple = simple
self.client = ParserClient(wsdl=self.wsdl, timeout=timeout)
self.names = []
def get_file_content(self, file_path):
""" Return the encoded content for the given file.
"""
file_obj = open(os.path.abspath(file_path), 'r')
content = file_obj.read().encode(self.encoding)
file_obj.close()
return content
def get_names(self, path):
"""
Given a path to a folder that contains resume files this method
will parse the resumes and will return the names of the candidates
as a list.
"""
opt = os.path
resumes = [opt.join(path, r) for r in os.listdir(path)
if opt.isfile(opt.join(path, r))]
# Parse information for each resume.
for resume in resumes:
try:
xml_data = self.get_xml(resume)
name = self.get_name_from_xml(xml_data)
if name:
self.names.append(name)
except Exception, err:
# print name
print 'Error parsing resume: %s' % str(err)
return list(set(self.names))
def get_name_from_xml(self, data):
""" Returns the full name from the xml data given.
"""
xmldata = minidom.parseString(data)
name = xmldata.getElementsByTagName('CANDIDATE_FULL_NAME')
name = name[0].childNodes[0].data.title()
return name
def get_xml(self, filepath):
""" Fetches and returns the xml for the given file from the api.
"""
filename = os.path.basename(filepath)
extension = os.path.splitext(filepath)[1]
base64 = self.get_file_content(filepath)
filedata = {
'B64FileZippedContent': base64,
'FileName': filename,
'InputType': extension,
'UserID': 1,
'secretKey': self.secret,
}
get = \
(self.client.GetSimpleXML if self.simple else self.client.getHRXML)
get(**filedata)
return self.process_raw_xml()
def process_raw_xml(self, data=None):
""" Processes and returns the clean XML.
"""
raw = (data if data else self.client.result)
parsed = minidom.parseString(raw)
result = parsed.getElementsByTagName('GetSimpleXMLResult')[0]
text_node = result.childNodes[0]
data = text_node.data.encode('UTF-8')
return data
Upon running the code I am getting an error
TypeError: wsdl_call_get_params() got an unexpected keyword argument 'secretKey'
What am I doing wrong?
It looks like you are incorrectly overriding wsdl_call.
Firstly, we can see that SoapClient (which you extend in ParserClient), has a __getattr__ function that fetches pseudo-attributes of the SoapClient.
def __getattr__(self, attr):
"Return a pseudo-method that can be called"
if not self.services: # not using WSDL?
return lambda self=self, *args, **kwargs: self.call(attr,*args,**kwargs)
else: # using WSDL:
return lambda *args, **kwargs: self.wsdl_call(attr,*args,**kwargs)
You can see that this function is using wsdl_call to help it map functions to unknown attributes.
The specific pseudo-method that is causing the problem is in your code (or appears to be):
filedata = {
'B64FileZippedContent': base64,
'FileName': filename,
'InputType': extension,
'UserID': 1,
'secretKey': self.secret, # <-- the secretKey key word argument
}
get = \
(self.client.GetSimpleXML if self.simple else self.client.getHRXML)
get(**filedata)
# here client is an instance of your `ParserClient` (and `SoapClient`).
This above bit took me a while to track down. With a full stack trace I would have found it much quicker. Please always post stack traces (when there is one) in future when asking for help.
How to solve this
Provide a concrete implementation of GetSimpleXML and getHRXML. This will solve the immediate problem, but not the larger problem.
Rewrite wsdl_call
The rewritten section of code should check the value of the method argument and either do what you want, or delegate to the SoapClient implementation.
eg.
def wsdl_call(self, method, *args, **kwargs):
if method == "some_method":
return self._my_wsdl_call(method, *args, **kwargs)
else:
return super(ParserClient, self).wsdl_call(method, *args, **kwargs)
def _my_wsdl_call(self, method, *args, **kwargs):
...

How can I mock requests and the response?

I am trying to use Pythons mock package to mock Pythons requests module. What are the basic calls to get me working in below scenario?
In my views.py, I have a function that makes variety of requests.get() calls with different response each time
def myview(request):
res1 = requests.get('aurl')
res2 = request.get('burl')
res3 = request.get('curl')
In my test class I want to do something like this but cannot figure out exact method calls
Step 1:
# Mock the requests module
# when mockedRequests.get('aurl') is called then return 'a response'
# when mockedRequests.get('burl') is called then return 'b response'
# when mockedRequests.get('curl') is called then return 'c response'
Step 2:
Call my view
Step 3:
verify response contains 'a response', 'b response' , 'c response'
How can I complete Step 1 (mocking the requests module)?
This is how you can do it (you can run this file as-is):
import requests
import unittest
from unittest import mock
# This is the class we want to test
class MyGreatClass:
def fetch_json(self, url):
response = requests.get(url)
return response.json()
# This method will be used by the mock to replace requests.get
def mocked_requests_get(*args, **kwargs):
class MockResponse:
def __init__(self, json_data, status_code):
self.json_data = json_data
self.status_code = status_code
def json(self):
return self.json_data
if args[0] == 'http://someurl.com/test.json':
return MockResponse({"key1": "value1"}, 200)
elif args[0] == 'http://someotherurl.com/anothertest.json':
return MockResponse({"key2": "value2"}, 200)
return MockResponse(None, 404)
# Our test case class
class MyGreatClassTestCase(unittest.TestCase):
# We patch 'requests.get' with our own method. The mock object is passed in to our test case method.
#mock.patch('requests.get', side_effect=mocked_requests_get)
def test_fetch(self, mock_get):
# Assert requests.get calls
mgc = MyGreatClass()
json_data = mgc.fetch_json('http://someurl.com/test.json')
self.assertEqual(json_data, {"key1": "value1"})
json_data = mgc.fetch_json('http://someotherurl.com/anothertest.json')
self.assertEqual(json_data, {"key2": "value2"})
json_data = mgc.fetch_json('http://nonexistenturl.com/cantfindme.json')
self.assertIsNone(json_data)
# We can even assert that our mocked method was called with the right parameters
self.assertIn(mock.call('http://someurl.com/test.json'), mock_get.call_args_list)
self.assertIn(mock.call('http://someotherurl.com/anothertest.json'), mock_get.call_args_list)
self.assertEqual(len(mock_get.call_args_list), 3)
if __name__ == '__main__':
unittest.main()
Important Note: If your MyGreatClass class lives in a different package, say my.great.package, you have to mock my.great.package.requests.get instead of just 'request.get'. In that case your test case would look like this:
import unittest
from unittest import mock
from my.great.package import MyGreatClass
# This method will be used by the mock to replace requests.get
def mocked_requests_get(*args, **kwargs):
# Same as above
class MyGreatClassTestCase(unittest.TestCase):
# Now we must patch 'my.great.package.requests.get'
#mock.patch('my.great.package.requests.get', side_effect=mocked_requests_get)
def test_fetch(self, mock_get):
# Same as above
if __name__ == '__main__':
unittest.main()
Enjoy!
Try using the responses library. Here is an example from their documentation:
import responses
import requests
#responses.activate
def test_simple():
responses.add(responses.GET, 'http://twitter.com/api/1/foobar',
json={'error': 'not found'}, status=404)
resp = requests.get('http://twitter.com/api/1/foobar')
assert resp.json() == {"error": "not found"}
assert len(responses.calls) == 1
assert responses.calls[0].request.url == 'http://twitter.com/api/1/foobar'
assert responses.calls[0].response.text == '{"error": "not found"}'
It provides quite a nice convenience over setting up all the mocking yourself.
There's also HTTPretty:
It's not specific to requests library, more powerful in some ways though I found it doesn't lend itself so well to inspecting the requests that it intercepted, which responses does quite easily
There's also httmock.
A new library gaining popularity recently over the venerable requests is httpx, which adds first-class support for async. A mocking library for httpx is: https://github.com/lundberg/respx
Here is what worked for me:
import mock
#mock.patch('requests.get', mock.Mock(side_effect = lambda k:{'aurl': 'a response', 'burl' : 'b response'}.get(k, 'unhandled request %s'%k)))
I used requests-mock for writing tests for separate module:
# module.py
import requests
class A():
def get_response(self, url):
response = requests.get(url)
return response.text
And the tests:
# tests.py
import requests_mock
import unittest
from module import A
class TestAPI(unittest.TestCase):
#requests_mock.mock()
def test_get_response(self, m):
a = A()
m.get('http://aurl.com', text='a response')
self.assertEqual(a.get_response('http://aurl.com'), 'a response')
m.get('http://burl.com', text='b response')
self.assertEqual(a.get_response('http://burl.com'), 'b response')
m.get('http://curl.com', text='c response')
self.assertEqual(a.get_response('http://curl.com'), 'c response')
if __name__ == '__main__':
unittest.main()
this is how you mock requests.post, change it to your http method
#patch.object(requests, 'post')
def your_test_method(self, mockpost):
mockresponse = Mock()
mockpost.return_value = mockresponse
mockresponse.text = 'mock return'
#call your target method now
Here is a solution with requests Response class. It is cleaner IMHO.
import json
from unittest.mock import patch
from requests.models import Response
def mocked_requests_get(*args, **kwargs):
response_content = None
request_url = kwargs.get('url', None)
if request_url == 'aurl':
response_content = json.dumps('a response')
elif request_url == 'burl':
response_content = json.dumps('b response')
elif request_url == 'curl':
response_content = json.dumps('c response')
response = Response()
response.status_code = 200
response._content = str.encode(response_content)
return response
#mock.patch('requests.get', side_effect=mocked_requests_get)
def test_fetch(self, mock_get):
response = requests.get(url='aurl')
assert ...
If you want to mock a fake response, another way to do it is to simply instantiate an instance of the base HttpResponse class, like so:
from django.http.response import HttpResponseBase
self.fake_response = HttpResponseBase()
I started out with Johannes Farhenkrug's answer here and it worked great for me. I needed to mock the requests library because my goal is to isolate my application and not test any 3rd party resources.
Then I read up some more about python's Mock library and I realized that I can replace the MockResponse class, which you might call a 'Test Double' or a 'Fake', with a python Mock class.
The advantage of doing so is access to things like assert_called_with, call_args and so on. No extra libraries are needed. Additional benefits such as 'readability' or 'its more pythonic' are subjective, so they may or may not play a role for you.
Here is my version, updated with using python's Mock instead of a test double:
import json
import requests
from unittest import mock
# defube stubs
AUTH_TOKEN = '{"prop": "value"}'
LIST_OF_WIDGETS = '{"widgets": ["widget1", "widget2"]}'
PURCHASED_WIDGETS = '{"widgets": ["purchased_widget"]}'
# exception class when an unknown URL is mocked
class MockNotSupported(Exception):
pass
# factory method that cranks out the Mocks
def mock_requests_factory(response_stub: str, status_code: int = 200):
return mock.Mock(**{
'json.return_value': json.loads(response_stub),
'text.return_value': response_stub,
'status_code': status_code,
'ok': status_code == 200
})
# side effect mock function
def mock_requests_post(*args, **kwargs):
if args[0].endswith('/api/v1/get_auth_token'):
return mock_requests_factory(AUTH_TOKEN)
elif args[0].endswith('/api/v1/get_widgets'):
return mock_requests_factory(LIST_OF_WIDGETS)
elif args[0].endswith('/api/v1/purchased_widgets'):
return mock_requests_factory(PURCHASED_WIDGETS)
raise MockNotSupported
# patch requests.post and run tests
with mock.patch('requests.post') as requests_post_mock:
requests_post_mock.side_effect = mock_requests_post
response = requests.post('https://myserver/api/v1/get_widgets')
assert response.ok is True
assert response.status_code == 200
assert 'widgets' in response.json()
# now I can also do this
requests_post_mock.assert_called_with('https://myserver/api/v1/get_widgets')
Repl.it links:
https://repl.it/#abkonsta/Using-unittestMock-for-requestspost#main.py
https://repl.it/#abkonsta/Using-test-double-for-requestspost#main.py
This worked for me, although I haven't done much complicated testing yet.
import json
from requests import Response
class MockResponse(Response):
def __init__(self,
url='http://example.com',
headers={'Content-Type':'text/html; charset=UTF-8'},
status_code=200,
reason = 'Success',
_content = 'Some html goes here',
json_ = None,
encoding='UTF-8'
):
self.url = url
self.headers = headers
if json_ and headers['Content-Type'] == 'application/json':
self._content = json.dumps(json_).encode(encoding)
else:
self._content = _content.encode(encoding)
self.status_code = status_code
self.reason = reason
self.encoding = encoding
Then you can create responses :
mock_response = MockResponse(
headers={'Content-Type' :'application/json'},
status_code=401,
json_={'success': False},
reason='Unauthorized'
)
mock_response.raise_for_status()
gives
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: http://example.com
One possible way to work around requests is using the library betamax, it records all requests and after that if you make a request in the same url with the same parameters the betamax will use the recorded request, I have been using it to test web crawler and it save me a lot time.
import os
import requests
from betamax import Betamax
from betamax_serializers import pretty_json
WORKERS_DIR = os.path.dirname(os.path.abspath(__file__))
CASSETTES_DIR = os.path.join(WORKERS_DIR, u'resources', u'cassettes')
MATCH_REQUESTS_ON = [u'method', u'uri', u'path', u'query']
Betamax.register_serializer(pretty_json.PrettyJSONSerializer)
with Betamax.configure() as config:
config.cassette_library_dir = CASSETTES_DIR
config.default_cassette_options[u'serialize_with'] = u'prettyjson'
config.default_cassette_options[u'match_requests_on'] = MATCH_REQUESTS_ON
config.default_cassette_options[u'preserve_exact_body_bytes'] = True
class WorkerCertidaoTRT2:
session = requests.session()
def make_request(self, input_json):
with Betamax(self.session) as vcr:
vcr.use_cassette(u'google')
response = session.get('http://www.google.com')
https://betamax.readthedocs.io/en/latest/
Can you use requests-mock instead?
Suppose your myview function instead takes a requests.Session object, makes requests with it, and does something to the output:
# mypackage.py
def myview(session):
res1 = session.get("http://aurl")
res2 = session.get("http://burl")
res3 = session.get("http://curl")
return f"{res1.text}, {res2.text}, {res3.text}"
# test_myview.py
from mypackage import myview
import requests
def test_myview(requests_mock):
# set up requests
a_req = requests_mock.get("http://aurl", text="a response")
b_req = requests_mock.get("http://burl", text="b response")
c_req = requests_mock.get("http://curl", text="c response")
# test myview behaviour
session = requests.Session()
assert myview(session) == "a response, b response, c response"
# check that requests weren't called repeatedly
assert a_req.called_once
assert b_req.called_once
assert c_req.called_once
assert requests_mock.call_count == 3
You can also use requests_mock with frameworks other than Pytest - the documentation is great.
Using requests_mock is easy to patch any requests
pip install requests-mock
from unittest import TestCase
import requests_mock
from <yourmodule> import <method> (auth)
class TestApi(TestCase):
#requests_mock.Mocker()
def test_01_authentication(self, m):
"""Successful authentication using username password"""
token = 'token'
m.post(f'http://localhost/auth', json= {'token': token})
act_token =auth("user", "pass")
self.assertEqual(act_token, token)
I will add this information since I had a hard time figuring how to mock an async api call.
Here is what I did to mock an async call.
Here is the function I wanted to test
async def get_user_info(headers, payload):
return await httpx.AsyncClient().post(URI, json=payload, headers=headers)
You still need the MockResponse class
class MockResponse:
def __init__(self, json_data, status_code):
self.json_data = json_data
self.status_code = status_code
def json(self):
return self.json_data
You add the MockResponseAsync class
class MockResponseAsync:
def __init__(self, json_data, status_code):
self.response = MockResponse(json_data, status_code)
async def getResponse(self):
return self.response
Here is the test. The important thing here is I create the response before since init function can't be async and the call to getResponse is async so it all checked out.
#pytest.mark.asyncio
#patch('httpx.AsyncClient')
async def test_get_user_info_valid(self, mock_post):
"""test_get_user_info_valid"""
# Given
token_bd = "abc"
username = "bob"
payload = {
'USERNAME': username,
'DBNAME': 'TEST'
}
headers = {
'Authorization': 'Bearer ' + token_bd,
'Content-Type': 'application/json'
}
async_response = MockResponseAsync("", 200)
mock_post.return_value.post.return_value = async_response.getResponse()
# When
await api_bd.get_user_info(headers, payload)
# Then
mock_post.return_value.post.assert_called_once_with(
URI, json=payload, headers=headers)
If you have a better way of doing that tell me but I think it's pretty clean like that.
The simplest way so far:
from unittest import TestCase
from unittest.mock import Mock, patch
from .utils import method_foo
class TestFoo(TestCase):
#patch.object(utils_requests, "post") # change to desired method here
def test_foo(self, mock_requests_post):
# EXPLANATION: mocked 'post' method above will return some built-in mock,
# and its method 'json' will return mock 'mock_data',
# which got argument 'return_value' with our data to be returned
mock_data = Mock(return_value=[{"id": 1}, {"id": 2}])
mock_requests_post.return_value.json = mock_data
method_foo()
# TODO: asserts here
"""
Example of method that you can test in utils.py
"""
def method_foo():
response = requests.post("http://example.com")
records = response.json()
for record in records:
print(record.get("id"))
# do other stuff here
For those, who don't want to install additional libs for pytest, there is an example. I will duplicate it here with some extension, based on examples above:
import datetime
import requests
class MockResponse:
def __init__(self, json_data, status_code):
self.json_data = json_data
self.status_code = status_code
self.elapsed = datetime.timedelta(seconds=1)
# mock json() method always returns a specific testing dictionary
def json(self):
return self.json_data
def test_get_json(monkeypatch):
# Any arguments may be passed and mock_get() will always return our
# mocked object, which only has the .json() method.
def mock_get(*args, **kwargs):
return MockResponse({'mock_key': 'mock_value'}, 418)
# apply the monkeypatch for requests.get to mock_get
monkeypatch.setattr(requests, 'get', mock_get)
# app.get_json, which contains requests.get, uses the monkeypatch
response = requests.get('https://fakeurl')
response_json = response.json()
assert response_json['mock_key'] == 'mock_value'
assert response.status_code == 418
assert response.elapsed.total_seconds() == 1
============================= test session starts ==============================
collecting ... collected 1 item
test_so.py::test_get_json PASSED [100%]
============================== 1 passed in 0.07s ===============================
To avoid installing other dependencies you should create a fake response. This FakeResponse could be a child of Response (I think this is a good approach because it's more realistic) or just a simple class with the attributes you need.
Simple Fake class
class FakeResponse:
status_code = None
def __init__(self, *args, **kwargs):
self.status_code = 500
self.text = ""
Child of Response
class FakeResponse(Response):
encoding = False
_content = None
def __init__(*args, **kwargs):
super(FakeResponse).__thisclass__.status_code = 500
# Requests requires to be not be None, if not throws an exception
# For reference: https://github.com/psf/requests/issues/3698#issuecomment-261115119
super(FakeResponse).__thisclass__.raw = io.BytesIO()
Just a helpful hint to those that are still struggling, converting from urllib or urllib2/urllib3 to requests AND trying to mock a response- I was getting a slightly confusing error when implementing my mock:
with requests.get(path, auth=HTTPBasicAuth('user', 'pass'), verify=False) as url:
AttributeError: __enter__
Well, of course, if I knew anything about how with works (I didn't), I'd know it was a vestigial, unnecessary context (from PEP 343). Unnecessary when using the requests library because it does basically the same thing for you under the hood. Just remove the with and use bare requests.get(...) and Bob's your uncle.
For pytest users there is a convinient fixture from https://pypi.org/project/pytest-responsemock/
For example to mock GET to http://some.domain you can:
def test_me(response_mock):
with response_mock('GET http://some.domain -> 200 :Nice'):
response = send_request()
assert result.ok
assert result.content == b'Nice'
I will demonstrate how to detach your programming logic from the actual external library by swapping the real request with a fake one that returns the same data. In your view if external api call then this process is best
import pytest
from unittest.mock import patch
from django.test import RequestFactory
#patch("path(projectname.appname.filename).requests.post")
def test_mock_response(self, mock_get, rf: RequestFactory):
mock_get.return_value.ok = Mock(ok=True)
mock_get.return_value.status_code = 400
mock_get.return_value.json.return_value = {you can define here dummy response}
request = rf.post("test/", data=self.payload)
response = view_name_view(request)
expected_response = {
"success": False,
"status": "unsuccessful",
}
assert response.data == expected_response
assert response.status_code == 400
If using pytest:
>>> import pytest
>>> import requests
>>> def test_url(requests_mock):
... requests_mock.get('http://test.com', text='data')
... assert 'data' == requests.get('http://test.com').text
Taken from the official documentation

Why doesn't my tastypie cache get called?

I'm looking at the tastypie caching docs and trying to set up my own simple caching thing, but the cache doesn't seem to get called. When I visit http://localhost:8000/api/poll/?format=json, I get my tastypie generated json, but I don't get the output from the cache class.
from tastypie.resources import ModelResource
from tastypie.cache import NoCache
from .models import Poll
class JSONCache(NoCache):
def _load(self):
print 'loading cache'
data_file = open(settings.TASTYPIE_JSON_CACHE, 'r')
return json.load(data_file)
def _save(self, data):
print 'saving to cache'
data_file = open(settings.TASTYPIE_JSON_CACHE, 'w')
return json.dump(data, data_file)
def get(self, key):
print 'jsoncache.get'
data = self._load()
return data.get(key, None)
def set(self, key, value, timeout=60):
print 'jsoncache.set'
data = self._load()
data[key] = value
self._save(data)
class PollResource(ModelResource):
class Meta:
queryset = Poll.objects.all()
resource_name = 'poll'
cache = JSONCache()
It seems that Tastypie doesn't automatically cache lists, tastypie.resources around line 1027:
def get_list(self, request, **kwargs):
# ...
# TODO: Uncached for now. Invalidation that works for everyone may be
# impossible.
objects = self.obj_get_list(
request=request, **self.remove_api_resource_names(kwargs))
# ...
, whereas with details (around line 1050):
def get_detail(self, request, **kwargs):
# ...
try:
obj = self.cached_obj_get(
request=request, **self.remove_api_resource_names(kwargs))
# ...
... note that in the former snippet obj_get_list is called instead of cached_obj_get_list. Perhaps overriding get_list and using cached_obj_get_list would allow you to use cache here as well?
Now you probably would get output from your class for http://localhost:8000/api/poll/<pk>/?format=json (detail view) but not for http://localhost:8000/api/poll/?format=json (list view) by default.

Is there a better way to write this URL Manipulation in Python?

I'm curious if there's a simpler way to remove a particular parameter from a url. What I came up with is the following. This seems a bit verbose. Libraries to use or a more pythonic version appreciated.
parsed = urlparse(url)
if parsed.query != "":
params = dict([s.split("=") for s in parsed.query.split("&")])
if params.get("page"):
del params["page"]
url = urlunparse((parsed.scheme,
None,
parsed.path,
None,
urlencode(params.items()),
parsed.fragment,))
parsed = urlparse(url)
Use urlparse.parse_qsl() to crack the query string. You can filter this in one go:
params = [(k,v) for (k,v) in parse_qsl(parsed.query) if k != 'page']
I've created a small helper class to represent a url in a structured way:
import cgi, urllib, urlparse
class Url(object):
def __init__(self, url):
"""Construct from a string."""
self.scheme, self.netloc, self.path, self.params, self.query, self.fragment = urlparse.urlparse(url)
self.args = dict(cgi.parse_qsl(self.query))
def __str__(self):
"""Turn back into a URL."""
self.query = urllib.urlencode(self.args)
return urlparse.urlunparse((self.scheme, self.netloc, self.path, self.params, self.query, self.fragment))
Then you can do:
u = Url(url)
del u.args['page']
url = str(u)
More about this: Web development peeve.

Categories