Testing aiohttp & mongo with pytest - python

I have a simple coroutine register that accepts login and password
as post arguments, then it goes into the database and so on.
The problem I have is that I do not know how to test the coroutine.
I followed examples from
https://aiohttp.readthedocs.io/en/latest/testing.html.
And everything seemed easy until I started writing tests myself.
Code for test_register.py
from main import make_app
pytest_plugins = 'aiohttp.pytest_plugin'
#pytest.fixture
def cli(loop, test_client):
return loop.run_until_complete(test_client(make_app))
async def test_register(cli):
resp = await cli.post('/register', data={'login': 'emil', 'password': 'qwerty'})
assert resp.status == 200
text = await resp.text()
And register.py
from settings import db
async def register(request):
post_data = await request.post()
print('Gotta: ', post_data)
login, password = post_data['login'], post_data['password']
matches = await db.users.find({'login': login}).count()
...
main.py
from aiohttp import web
from routes import routes
def make_app(loop=None):
app = web.Application(loop=loop)
for route in routes:
app.router.add_route(route.method, route.url, route.handler)
return app
def main():
web.run_app(make_app())
if __name__ == "__main__":
main()
settings.py
from motor.motor_asyncio import AsyncIOMotorClient
DBNAME = 'testdb'
db = AsyncIOMotorClient()[DBNAME]
And then I ran py.test test_register.py and it got stuck on database operation
matches = await db.users.find({'login': login}).count()

The root of your problem is global variable usage.
I suggest the following changes:
from aiohttp import web
from motor.motor_asyncio import AsyncIOMotorClient
from routes import routes
def make_app(loop=None):
app = web.Application(loop=loop)
DBNAME = 'testdb'
mongo = AsyncIOMotorClient(io_loop=loop)
db = mongo[DBNAME]
app['db'] = db
async def cleanup(app):
mongo.close()
app.on_cleanup.append(cleanup)
for route in routes:
app.router.add_route(route.method, route.url, route.handler)
return app
register.py
async def register(request):
post_data = await request.post()
print('Gotta: ', post_data)
login, password = post_data['login'], post_data['password']
matches = await request.app['db'].users.find(
{'login': login}).count()
...
Pushing common-used objects into application's storage is an appreciated way for handling database connections etc.

Related

Async script in python runs fully time

How can I run a/the following script asyncron so that it always runs and you can add arguments so that a new thread is always created, so to speak.
Example Script
app = FastAPI()
#app.get("/")
async def home():
return {"Data": "Test"}
#app.get("/live-check/")
async def live_check(response: Response):
response.headers["Status"] = "OK"
return "live check successful"
#app.get("/get-latest-file-user/", status_code=200)
async def show_file()
...
My test-script:
def get_config():
'''get the arguments'''
config_object = ConfigParser()
try:
...
except KeyError:
print("INI File not found!")
quit()
def main(conf_args: dict):
'''main function - Controlling and starting of most functions'''
serialNumber = get_serial(conf_args['cert'])
if __name__ == "__main__":
conf_args = get_args(DictIni) #Get Input(args)
main(conf_args)
Can anyone transfer it to this?
The script makes no sense I know, I just want to understand the principle.

how to overwrite a route in sanic when using blueprint.copy?

from sanic import Blueprint
from sanic.response import json
from sanic import Sanic
app = Sanic('test')
bpv1 = Blueprint('bpv1', version=1)
#bpv1.route('/hello')
async def root(request):
return json('hello v1')
app.blueprint(bpv1)
bpv2 = bpv1.copy('bpv2', version=2)
#bpv2.route('/hello')
async def root(request):
return json('hello v2')
app.blueprint(bpv2)
I want to overwrite the implement of route partially when they belong to different blueprint, but it raises sanic_routing.exceptions.RouteExists.
How can I get this target?
I got the answer from forum.
bpv2 = bpv1.copy("bpv2", version=2)
bpv2._future_routes = {
route for route in bpv2._future_routes if route.uri != "/hello"
}
#bpv2.route("/hello")
async def root2(request):
return json("hello v2")
link
https://community.sanicframework.org/t/how-to-overwrite-a-route-when-using-blueprint-copy/1067

Check if url matches mask in aiohttp

Here is the aiohttp server initialising:
self.app = web.Application()
self.app.add_routes([
web.get('/{endpoint:.*}', self.handle),
web.post('/{endpoint:.*}', self.handle),
web.options('/{endpoint:.*}', self.handle),
web.put('/{endpoint:.*}', self.handle),
])
runner = web.AppRunner(self.app)
await runner.setup()
site = web.TCPSite(runner, **self.config)
await site.start()
and handle function looks something like this:
async def handle(self, request):
endpoint = request.match_info['endpoint']
if endpoint == 'api/user/{user_id}/':
some_stuff()
How can I check if the request URL matches the endpoint mask? It's easy to add this mask to add_routes, but I'm looking for an alternative solution without any changes in initialising.

TypeError: encoding without a string argument on twitter account activity api

I am setting up a flask server that will act as a webhook to the twitter account activity api. However I came up with this issue that I have no idea how to solve, I'm fairly new to programming so please bear with me. I just used this repository https://github.com/RickRedSix/twitter-webhook-boilerplate-python/blob/master/Main.py
This is the error:
line 28, in twitterCrcValidation
key=bytes(CONSUMER_SECRET, encoding ='utf-8'),
TypeError: encoding without a string argument
Here's the code:
#!/usr/bin/env python
from flask import Flask, request, send_from_directory, make_response
from http import HTTPStatus
import Twitter, hashlib, hmac, base64, os, logging, json
from dotenv import load_dotenv
load_dotenv('.env')
CONSUMER_SECRET = os.getenv('CONSUMER_SECRET')
CURRENT_USER_ID = os.getenv('CURRENT_USER_ID')
app = Flask(__name__)
#generic index route
#app.route('/')
def default_route():
return send_from_directory('www', 'index.html')
#The GET method for webhook should be used for the CRC check
#TODO: add header validation (compare_digest https://docs.python.org/3.6/library/hmac.html)
#app.route("/webhook", methods=["GET"])
def twitterCrcValidation():
crc = request.args['crc_token']
validation = hmac.new(
key=bytes(CONSUMER_SECRET, encoding ='utf-8'),
msg=bytes(crc, encoding = 'utf-8'),
digestmod = hashlib.sha256
)
digested = base64.b64encode(validation.digest())
response = {
'response_token': 'sha256=' + format(str(digested)[2:-1])
}
print('responding to CRC call')
return json.dumps(response)
#The POST method for webhook should be used for all other API events
#TODO: add event-specific behaviours beyond Direct Message and Like
#app.route("/webhook", methods=["POST"])
def twitterEventReceived():
requestJson = request.get_json()
#dump to console for debugging purposes
print(json.dumps(requestJson, indent=4, sort_keys=True))
if 'favorite_events' in requestJson.keys():
#Tweet Favourite Event, process that
likeObject = requestJson['favorite_events'][0]
userId = likeObject.get('user', {}).get('id')
#event is from myself so ignore (Favourite event fires when you send a DM too)
if userId == CURRENT_USER_ID:
return ('', HTTPStatus.OK)
Twitter.processLikeEvent(likeObject)
elif 'direct_message_events' in requestJson.keys():
#DM recieved, process that
eventType = requestJson['direct_message_events'][0].get("type")
messageObject = requestJson['direct_message_events'][0].get('message_create', {})
messageSenderId = messageObject.get('sender_id')
#event type isnt new message so ignore
if eventType != 'message_create':
return ('', HTTPStatus.OK)
#message is from myself so ignore (Message create fires when you send a DM too)
if messageSenderId == CURRENT_USER_ID:
return ('', HTTPStatus.OK)
Twitter.processDirectMessageEvent(messageObject)
else:
#Event type not supported
return ('', HTTPStatus.OK)
return ('', HTTPStatus.OK)
if __name__ == '__main__':
# Bind to PORT if defined, otherwise default to 65010.
port = int(os.environ.get('PORT', 65010))
gunicorn_logger = logging.getLogger('gunicorn.error')
app.logger.handlers = gunicorn_logger.handlers
app.logger.setLevel(gunicorn_logger.level)
app.run(host='0.0.0.0', port=port, debug=True)
You need to verify input to your program and environment variables are no exception. As a minumum, check that these variables actually exist. os.getenv returns None if the environment variable doesn't exist. None is not a string and thus your error. You could do this with a slightly different os call.
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
CURRENT_USER_ID = os.environ['CURRENT_USER_ID']
Now an exception is raised on failure. This could be wrapped in an exception handler if you want different error reporting than the standard traceback.

Tornado: How to get and return large data with less memory usage?

I have web-crawler and http interface for it.
Crawler gets grouped urls as dictionary. I need to return a result in the same format in JSON. But I was faced with a large memory usage, which is not returned to the operating system. How can I implement this solution without large memory usage?
Code:
#!/usr/bin/env python
# coding=utf-8
import collections
import tornado.web
import tornado.ioloop
import tornado.queues
import tornado.httpclient
class ResponseError(Exception):
pass
class Crawler(object):
client = tornado.httpclient.AsyncHTTPClient()
def __init__(self, groups, concurrency=10, retries=3, validators=None):
self.groups = groups
self.concurrency = concurrency
self.retries = retries
self.validators = validators or []
self.requests = tornado.queues.Queue()
self.responses = collections.defaultdict(list)
async def worker(self):
while True:
await self.consume()
async def validate(self, response):
for validator in self.validators:
validator(response)
async def save(self, response):
self.responses[response.request.group].append(response.body.decode('utf-8'))
async def consume(self):
async for request in self.requests:
try:
response = await self.client.fetch(request, raise_error=False)
await self.validate(response)
await self.save(response)
except ResponseError:
if request.retries < self.retries:
request.retries += 1
await self.requests.put(request)
finally:
self.requests.task_done()
async def produce(self):
for group, urls in self.groups.items():
for url in urls:
request = tornado.httpclient.HTTPRequest(url)
request.group = group
request.retries = 0
await self.requests.put(request)
async def fetch(self):
await self.produce()
for __ in range(self.concurrency):
tornado.ioloop.IOLoop.current().spawn_callback(self.worker)
await self.requests.join()
class MainHandler(tornado.web.RequestHandler):
async def get(self):
urls = []
with open('urls') as f: # mock
for line in f:
urls.append(line.strip())
crawler = Crawler({'default': urls})
await crawler.fetch()
self.write(crawler.responses)
if __name__ == '__main__':
app = tornado.web.Application(
(tornado.web.url(r'/', MainHandler),), debug=True
)
app.listen(8000)
tornado.ioloop.IOLoop.current().start()
It looks to me like most of the memory usage is devoted to self.responses. Since you seem to be ordering responses by "group" before writing them to a file, I can understand why you do it this way. One idea is to store them in a database (MySQL or MongoDB or whatever) with the "group" as column or field value in the database record.
The database might be the final destination of your data, or else it might be a temporary place to store the data until crawler.fetch completes. Then, query all the data from the database, ordered by "group", and write it to the file.
This doesn't solve the problem, it just means that the database process is responsible for most of your memory usage, instead of the Python process. This may be preferable for you, however.

Categories