Testing of async tornado RequestHandler method in a complex environment - python

I am trying to write unit testing code for a child of tornado.web.RequestHandler that runs an aggregate query to the database. I have already wasted several days trying to get the tests to work.
The tests are using pytest and factoryboy. A lot of the important tornado class have factories for the tests.
This is the class that is being tested:
class AggregateRequestHandler(StreamlyneRequestHandler):
'''
'''
SUPPORTED_METHODS = (
"GET", "POST", "OPTIONS")
def get(self):
self.aggregate()
#auth.hmac_auth
##tornado.web.asynchronous
#tornado.web.removeslash
#tornado.gen.coroutine
def aggregate(self):
'''
'''
self.logger.info('api aggregate')
data = self.data
print("Data: {0}".format(data))
pipeline = data['pipeline']
self.logger.debug('pipeline : {0}'.format(pipeline))
self.logger.debug('utc tz : {0}'.format(tz_util.utc))
# execute pipeline query
print(self.collection)
try:
cursor_future = self.collection.aggregate(pipeline, cursor={})
print(cursor_future)
cursor = yield cursor_future
print("Cursor: {0}".format(cursor))
except Exception as e:
print(e)
documents = yield cursor.to_list(length=None)
self.logger.debug('results : {0}'.format(documents))
# process MongoDB JSON extended
results = json.loads(json_util.dumps(documents))
pipeline = json.loads(json_util.dumps(pipeline))
response_data = {
'pipeline': pipeline,
'results': results
}
self.respond(response_data)
The method used to test it is here:
##tornado.testing.gen_test
def test_time_inside(self):
current_time = gen_time()
past_time = gen_time() - datetime.timedelta(minutes=20)
test_query = copy.deepcopy(QUERY)
oid = ObjectId("53a72de12fb05c0788545ed6")
test_query[0]['$match']['attribute'] = oid
test_query[0]['$match']['date_created']['$gte'] = past_time
test_query[0]['$match']['date_created']['$lte'] = current_time
request = produce.HTTPRequest(
method="GET",
headers=produce.HTTPHeaders(
kwargs = {
"Content-Type": "application/json",
"Accept": "application/json",
"X-Sl-Organization": "test",
"Hmac": "83275edec557e2a339e0ec624201db604645e1e1",
"X-Sl-Username": "test#test.co",
"X-Sl-Expires": 1602011725
}
),
uri="/api/v1/attribute-data/aggregate?{0}".format(json_util.dumps({
"pipeline": test_query
}))
)
self.ARH = produce.AggregateRequestHandler(request=request)
#io_loop = tornado.ioloop.IOLoop.instance()
self.io_loop.run_sync(self.ARH.get)
#def stop_test():
#self.stop()
#self.ARH.test_get(stop_test)
#self.wait()
output = self.ARH.get_written_output()
assert output == ""
This is the way I set up the factory for the Request Handler:
class OutputTestAggregateRequestHandler(slapi.rest.AggregateRequestHandler, tornado.testing.AsyncTestCase):
'''
'''
_written_output = []
def write(self, chunk):
print("Previously written: {0}".format(self._written_output))
print("Len: {0}".format(len(self._written_output)))
if self._finished:
raise RuntimeError("Cannot write() after finish(). May be caused "
"by using async operations without the "
"#asynchronous decorator.")
if isinstance(chunk, dict):
print("Going to encode a chunk")
chunk = escape.json_encode(chunk)
self.set_header("Content-Type", "application/json; charset=UTF-8")
chunk = escape.utf8(chunk)
print("Writing")
self._written_output = []
self._written_output.append(chunk)
print(chunk)
def flush(self, include_footers=False, callback=None):
pass
def get_written_output(self):
for_return = self._written_output
self._written_output = []
return for_return
class AggregateRequestHandler(StreamlyneRequestHandler):
'''
'''
class Meta:
model = OutputTestAggregateRequestHandler
model = slapi.model.AttributeDatum
When running the tests, the test simply stops in def aggregate(self): somewhere between print(cursor_future) and print("Cursor: {0}".format(cursor)).
The in the stdout you see
MotorCollection(Collection(Database(MongoClient([]), u'test'), u'attribute_datum'))
<tornado.concurrent.Future object at 0x7fbc737993d0>
and nothing else comes out of the test with it failing on
> assert output == ""
E AssertionError: assert [] == ''
After a lot of time looking at documentation and examples and stack overflow I managed to get a functioning test by adding the following code to OutputTestAggregateRequestHandler:
def set_io_loop(self):
self.io_loop = tornado.ioloop.IOLoop.instance()
def ioloop(f):
#functools.wraps(f)
def wrapper(self, *args, **kwargs):
print(args)
self.set_io_loop()
return f(self, *args, **kwargs)
return wrapper
def runTest(self):
pass
Then copying all of the code from AggregateRequestHandler.aggregate into OutputTestAggregateRequestHandler but with different decorators:
#ioloop
#tornado.testing.gen_test
def _aggregate(self):
......
I then received the output:
assert output == ""
E AssertionError: assert ['{\n "pipeline": [\n {\n "$match": {\n "attribute": {\n "$oid"... "$oid": "53cec0e72dc9832c4c4185f2"\n }, \n "quality": 9001\n }\n ]\n}'] == ''
which is actually a success, but I was just triggering an assertion error on purpose to see the output.
The big problem that I have, is how do I achieve the desired outcome, which is the output received by adding the extra code, and copying the aggregate method.
Obviously when copying the code out of the aggregate method the test is no longer useful after I make changes to the actual method. How can I get the actual aggregate method to function properly in the tests instead of stopping seemingly when it encounters asynchronous code?
Thanks for any help,
Cheers!
-Liam

In general, the intended way to test RequestHandlers is with AsyncHTTPTestCase, not AsyncTestCase. This will set up the HTTP client and server for you and everything will go through the HTTP plumbing. Using RequestHandlers outside of an Application and HTTP server is not fully supported, although in Tornado 4.0 it might be feasible to use a dummy HTTPConnection to avoid the full server stack. This might be faster, although it's kind of uncharted territory at this point.

Related

Is there a way to find and extract tables using OCR in python/Azure?

I'm working on writing an azure custom skill to find and extract tables from large documents. Specifically, i'm trying to find something that will pull out dungeons and dragons PDF documents, which can then be used as a custom skill by azure cognitive search.
So far, i've tried writing a custom skill which searches for something that should only be present in a stat block, the challenge rating. This works in the local test environment, but fails when applied as an azure custom skill, with a message to the effect of "data does not contain text" even when it is paired with an OCR skill to extract and transform the data to text. All the documents are PDF.
Is there a way to remove this validation without breaking the skill? Or is there a better way of writing this skill? Currently it's just pulling out instances of the challenge rating, which isn't quite perfect but is close enough.
import azure.functions as func
import json
import re
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
try:
body = json.dumps(req.get_json())
except ValueError:
return func.HttpResponse(
"Invalid body",
status_code=400
)
if body:
result = compose_response(body, identify_stat_blocks)
return func.HttpResponse(result, mimetype="application/json")
else:
return func.HttpResponse(
"Invalid body",
status_code=400
)
def identify_stat_blocks(text):
if type(text) is not str:
raise TypeError('Input data is not text/str')
challenge = get_all_matches(text, ['(?i)Challenge[\s]+[0-9\s]+'])
extracteds = {"Challenge Rating":challenge}
return extracteds
def get_all_matches(text, reference_re):
retlist = []
for i in reference_re:
matches = re.findall(i, text)
if matches:
retlist.extend(matches)
if len(retlist) > 0:
return retlist
else:
return None
def compose_response(json_data, func):
values = json.loads(json_data)['values']
# Prepare the Output before the loop
results = {}
results["values"] = []
for value in values:
output_record = transform_value(value, func)
if output_record != None:
results["values"].append(output_record)
return json.dumps(results, ensure_ascii=False)
# Perform an operation on a record
def transform_value(value, func):
try:
recordId = value['recordId']
except AssertionError as error:
return None
# Validate the inputs
try:
assert ('data' in value), "'data' field is required."
data = value['data']
assert ('text' in data), "'text' field is required in 'data' object."
except AssertionError as error:
return (
{
"recordId": recordId,
"errors": [ { "message": "Error:" + error.args[0] } ]
})
challenge = func(data['text'])
return ({
"recordId": recordId,
"data": {
"text": challenge
}
})```
I'd be super grateful for any help/guidance!

How to mock failed operations with moto, #mock_dynamodb2?

I am currently trying to write unit tests for my python code using Moto & #mock_dynamodb2 . So far it's been working for me to test my "successful operation" test cases. But I'm having trouble getting it to work for my "failure cases".
In my test code I have:
#mock_dynamodb2
class TestClassUnderTestExample(unittest.TestCase):
def setUp(self):
ddb = boto3.resource("dynamodb", "us-east-1")
self.table = ddb.create_table(<the table definition)
self.example_under_test = ClassUnderTestExample(ddb)
def test_some_thing_success(self):
expected_response = {<some value>}
assert expected_response = self.example_under_test.write_entry(<some value>)
def test_some_thing_success(self):
response = self.example_under_test.write_entry(<some value>)
# How to assert exception is thrown by forcing put item to fail?
The TestClassUnderTestExample would look something like this:
class ClassUnderTestExample:
def __init__(self, ddb_resource=None):
if not ddb_resource:
ddb_resource = boto3.resource('dynamodb')
self.table = ddb_resource.Table(.....)
def write_entry(some_value)
ddb_item = <do stuff with some_value to create sanitized item>
response = self.table.put_item(
Item=ddb_item
)
if pydash.get(response, "ResponseMetadata.HTTPStatusCode") != 200:
raise SomeCustomErrorType("Unexpected response from DynamoDB when attempting to PutItem")
return ddb_item
I've been completely stuck when it comes to actually mocking the .put_item operation to return a non-success value so that I can test that the ClassUnderTestExample will handle it as expected and throw the custom error. I've tried things like deleting the table before running the test, but that just throws an exception when getting the table rather than an executed PutItem with an error code.
I've also tried putting a patch for pydash or for the table above the test but I must be doing something wrong. I can't find anything in moto's documentation. Any help would be appreciated!
The goal of Moto is to completely mimick AWS' behaviour, including how to behave when the user supplies erroneous inputs. In other words, a call to put_item() that fails against AWS, would/should also fail against Moto.
There is no build-in way to force an error response on a valid input.
It's difficult to tell from your example how this can be forced, but it looks like it's worth playing around with this line to create an invalid input:
ddb_item = <do stuff with some_value to create sanitized item>
Yes, you can. Use mocking for this. Simple and runnable example:
from unittest import TestCase
from unittest.mock import Mock
from uuid import uuid4
import boto3
from moto import mock_dynamodb2
def create_user_table(table_name: str) -> dict:
return dict(
TableName=table_name,
KeySchema=[
{
'AttributeName': 'id',
'KeyType': 'HASH'
},
],
AttributeDefinitions=[
{
'AttributeName': 'id',
'AttributeType': 'S'
},
],
BillingMode='PAY_PER_REQUEST'
)
class UserRepository:
table_name = 'users'
def __init__(self, ddb_resource):
if not ddb_resource:
ddb_resource = boto3.resource('dynamodb')
self.table = ddb_resource.Table(self.table_name)
def create_user(self, username):
return self.table.put_item(Item={'id': str(uuid4), 'username': username})
#mock_dynamodb2
class TestUserRepository(TestCase):
def setUp(self):
ddb = boto3.resource("dynamodb", "us-east-1")
self.table = ddb.create_table(**create_user_table('users'))
self.test_user_repo = UserRepository(ddb)
def tearDown(self):
self.table.delete()
def test_some_thing_success(self):
user = self.test_user_repo.create_user(username='John')
assert len(self.table.scan()['Items']) == 1
def test_some_thing_failure(self):
self.test_user_repo.table = table = Mock()
table.put_item.side_effect = Exception('Boto3 Exception')
with self.assertRaises(Exception) as exc:
self.test_user_repo.create_user(username='John')
self.assertTrue('Boto3 Exception' in exc.exception)

ValueError: not enough values to unpack while running unit tests Django ModelViewSet

Am testing an endpoint that retrieves data using a ModelViewSet, and am passing a param via a URL to it to get data but am getting this error when I run the unit tests:
File "/Users/lutaayaidris/Documents/workspace/project_sample/project_sample/financing_settings/tests.py", line 195, in test_get_blocks
self.block_get_data), content_type='application/json')
File "/Users/lutaayaidris/Documents/workspace/project_sample/lib/python3.6/site-packages/rest_framework/test.py", line 286, in get
response = super().get(path, data=data, **extra)
File "/Users/lutaayaidris/Documents/workspace/project_sample/lib/python3.6/site-packages/rest_framework/test.py", line 194, in get
'QUERY_STRING': urlencode(data or {}, doseq=True),
File "/Users/lutaayaidris/Documents/workspace/project_sample/lib/python3.6/site-packages/django/utils/http.py", line 93, in urlencode
for key, value in query:
ValueError: not enough values to unpack (expected 2, got 1)
This is how I have structured my tests , plus some dummy data for testing :
class TemplateData:
"""Template Mock data."""
step_get_data = {
"param": "step"
}
block_get_data = {
"param": "block"
}
get_no_data = {
"param_": "block"
}
class TemplateViewTests(TestCase, TemplateData):
"""Template Tests (Block & Step)."""
def setUp(self):
"""
Initialize client, Step and Block id and data created.
"""
self.client = APIClient()
self.block_id = 0
self.step_id = 0
self.create_block_step_data()
def create_block_step_data(self):
"""Create ProcessVersion, Step, & Block mock data."""
self.process_version = ProcessVersion.objects.create(
tag="TESTING_TAG",
is_process_template=False,
status="IN EDITING",
attr_map="TESTING_ATTR",
loan_options=None
)
self.step = Step.objects.create(
version=self.process_version,
is_process_template=True,
title="TESTING",
help_text="TESTING",
order=1,
slug="slug",
can_be_duplicated=False,
max_duplicated_number=2,
)
self.step_id = self.step.pk
self.block_id = Block.objects.create(
step=self.step,
is_process_template=True,
title="TESTING",
information_text="This is testing "
"information",
order=1,
depending_field="depending_field",
visibility_value="visibility_value",
slug="slug",
can_be_duplicated=False,
max_duplicated_number=2,
).pk
self.process_version_1 = ProcessVersion.objects.create(
tag="TESTING_TAG",
is_process_template=False,
status="IN EDITING",
attr_map="TESTING_ATTR",
loan_options=None
)
self.step_1 = Step.objects.create(
version=self.process_version_1,
is_process_template=True,
title="TESTING",
help_text="TESTING",
order=1,
slug="slug",
can_be_duplicated=False,
max_duplicated_number=2,
)
self.block_1 = Block.objects.create(
step=self.step,
is_process_template=True,
title="TESTING",
information_text="This is testing "
"information",
order=1,
depending_field="depending_field",
visibility_value="visibility_value",
slug="slug",
can_be_duplicated=False,
max_duplicated_number=2,
).pk
def test_get_blocks(self):
"""Test get list of Block. """
response = self.client.get(
"/api/v1/financing-settings/template/",
data=json.dumps(
self.block_get_data), content_type='application/json')
self.assertEqual(response.status_code, 200)
def test_get_steps(self):
"""Test get list of Step. """
response = self.client.get(
"/api/v1/financing-settings/template/",
data=json.dumps(
self.block_get_data),
content_type='application/json')
self.assertEqual(response.status_code, 200)
def test_no_step_or_block(self):
"""Test get no list of Step or Block. """
response = self.client.get(
"/api/v1/financing-settings/template/",
data=json.dumps(
self.block_get_data),
content_type='application/json')
self.assertEqual(response.status_code, 204)
As you can see above those are my tests, I have already setup the data , now I want to retrieve back the data, but because of the exception above I can't.
Lastly, in my endpoint implementation, I used a Viewset to handle this , below is the code :
class TemplateView(ModelViewSet):
"""ViewSet for Saving Block/ Step template."""
def list(self, request, *args, **kwargs):
"""Get list of Block/Steps with is_process_template is equal to True."""
param = request.data['param']
if param == "block":
_block = Block.objects.filter(is_process_template=True).values()
return JsonResponse({"data": list(_block)}, safe=False, status=200)
elif param == "step":
_step = Step.objects.filter(is_process_template=True)
return JsonResponse({"data": list(_step)}, safe=False, status=200)
return Response(status=status.HTTP_204_NO_CONTENT)
What is causing this , in my understanding I feel like everything should work.
The function Client.get expect a dictionary as data argument and try to encode it in the url using the function urlencode. You could do something like that:
from django.test import Client
c = Client()
block_get_data = {
"param": "block"
}
c.get('path', block_get_data)
block_get_data will be sent in the url as 'param=block'
If you want to send JSON formated data in a GET method, you can use Client.generic function as follow:
from django.test import Client
import json
c = Client()
block_get_data = {
"param": "block"
}
c.generic('GET', 'path', json.dumps(block_get_data), 'application/json')
You are facing this error because this dict
block_get_data = {
"param": "block"
}
you are trying to use it in this way
for key,val in block_get_data
and it will produce the error like
for key,val in block_get_data:
ValueError: too many values to unpack (expected 2)
It will be solved if your loop through dict by using .items() method.
for key,val in block_get_data.items():
I think by passing parameter as self.block_get_data.items() may solve your problem.

Airflow SimpleHttpOperator

Hi I am experiencing weird behavior from SimpleHttpOperator.
I have extended this operator like this:
class EPOHttpOperator(SimpleHttpOperator):
"""
Operator for retrieving data from EPO API, performs token validity check,
gets a new one, if old one close to not valid.
"""
#apply_defaults
def __init__(self, entity_code, *args, **kwargs):
super().__init__(*args, **kwargs)
self.entity_code = entity_code
self.endpoint = self.endpoint + self.entity_code
def execute(self, context):
try:
token_data = json.loads(Variable.get(key="access_token_data", deserialize_json=False))
if (datetime.now() - datetime.strptime(token_data["created_at"],
'%Y-%m-%d %H:%M:%S.%f')).seconds >= 19 * 60:
Variable.set(value=json.dumps(get_EPO_access_token(), default=str), key="access_token_data")
self.headers = {
"Authorization": f"Bearer {token_data['token']}",
"Accept": "application/json"
}
super(EPOHttpOperator, self).execute(context)
except HTTPError as http_err:
logging.error(f'HTTP error occurred during getting EPO data: {http_err}')
raise http_err
except Exception as e:
logging.error(e)
raise e
And I have written a simple unit test:
def test_get_EPO_data(requests_mock):
requests_mock.get('http://ops.epo.org/rest-services/published-data/publication/epodoc/EP1522668',
text='{"text": "test"}')
requests_mock.post('https://ops.epo.org/3.2/auth/accesstoken',
text='{"access_token":"test", "status": "we just testing"}')
dag = DAG(dag_id='test_data', start_date=datetime.now())
task = EPOHttpOperator(
xcom_push=True,
do_xcom_push=True,
http_conn_id='http_EPO',
endpoint='published-data/publication/epodoc/',
entity_code='EP1522668',
method='GET',
task_id='get_data_task',
dag=dag,
)
ti = TaskInstance(task=task, execution_date=datetime.now(), )
task.execute(ti.get_template_context())
assert ti.xcom_pull(task_ids='get_data_task') == {"text": "test"}
Test doesn't pass though, the XCOM value from HttpHook is never pushed as an XCOM, I have checked that code responsible for the push logic in the hook class gets called:
....
if self.response_check:
if not self.response_check(response):
raise AirflowException("Response check returned False.")
if self.xcom_push_flag:
return response.text
What did I do wrong? Is this a bug?
So I actually managed to make it work by setting an xcom value to the result of super(EPOHttpOperator, self).execute(context).
def execute(self, context):
try:
.
.
.
self.headers = {
"Authorization": f"Bearer {token_data['token']}",
"Accept": "application/json"
}
super(EPOHttpOperator, self).execute(context) -> Variable.set(value=super(EPOHttpOperator, self).execute(context),key='foo')
Documentation is kind of misleading on this one; or am I doing something wrong after all?

Flask/mongodb webapp inconsistent test

I have a Flask integration test backed by a 1-node mongodb that randomly fails:
pytest/test_webapi.py:59: in test_register_test
> assert res.status_code == 302
E assert <Response streamed [404 NOT FOUND]>.status_code == 302
Fail rate is roughly 50%.
Test in test_webapi.py looking thus:
def test_register_user(self):
res = self.client.get("/logout")
class MySMTPServer(smtpd.SMTPServer):
mails = []
def process_message(self, peer, mailfrom, rcpttos, data):
self.mails.append((rcpttos[0], data))
server = MySMTPServer(('localhost', 12345), None)
t = threading.Thread(target=asyncore.loop, args=(1,))
t.start()
time.sleep(.1)
try:
res = self.client.post("/register", data=self.registration)
assert res.status_code == 200
mail, hash = server.mails[0]
self.conn.fsync()
time.sleep(.1)
res = self.client.get('/activate/' + hash)
assert res.status_code == 302
finally:
server.close()
The relevant Flask methods from webapi.py:
#app.route("/register", methods=["POST"])
def register_user():
mail = flask.request.form['mail']
user = flask.request.form["user"]
pw = flask.request.form["pass"]
hash = users.register(user, pw, mail=mail)
return flask.jsonify({'_id': None}) # XXX
#app.route('/activate/<hash>', methods=['GET'])
def activate_user(hash):
key = users.activate(hash=hash)
if not key:
flask.abort(404)
return flask.redirect("/")
... are backed by action methods:
make_key = lambda : base64.encodestring(os.urandom(32)).strip()
def register(self, user, pw, **kw):
hash = self.make_key()
user = self.new(user, pw, activation=hash, **kw)
self._send_mail(**user)
return hash
def activate(self, hash):
user = self.users.find_one({'activation': hash})
if not user:
return None
key = self.make_key()
activation = {
'$unset': {'activation': 1},
'$set': {'status': 'active', 'key': key} }
self.users.update({'_id': user['_id']}, activation)
return user
... where self.users is a mongodb collection.
self.new() persists the entity using safe=True.
Interestingly, several other tests doing similar things never seem to encounter this problem.
I had thought that this would be enough to make sure that the persisted object would be visible to other threads in the pymongo connection pool. What part of the mongodb/pymongo documentation should I have read more carefully? Or is there some weird interaction with asyncore?
(ported from comments thread)
In the context of the tests, is the data value just the base64-encoded string of the activation key? base64 contains letters and digits, but also "+" and "/", both of which will be misinterpreted by URL parsers (particularly "/").

Categories