Provide contextvars.Context with a ContextManager - python

I'm trying to manage transactions in my DB framework (I use MongoDB with umongo over pymongo).
To use transaction, one must pass a session kwarg along the whole call chain. I would like to provide a context manager that would isolate the transaction. Only the function at the end of the call chain would need to be aware of the session object.
I found out about context variables and I'm close to something but not totally there.
What I would like to have:
with Transaction():
# Do stuff
d = MyDocument.find_one()
d.attr = 12
d.commit()
Here's what I came up with for now:
s = ContextVar('session', default=None)
class Transaction(AbstractContextManager):
def __init__(self):
self.ctx = copy_context()
# Create a new DB session
session = db.create_session()
# Set session in context
self.ctx.run(s.set, session)
def __exit__(self, *args, **kwargs):
pass
# Adding a run method for convenience
def run(self, func, *args, **kwargs):
self.ctx.run(func, *args, **kwargs)
def func():
d = MyDocument.find_one()
d.attr = 12
d.commit()
with Transaction() as t:
t.run(func)
But I don't have the nice context manager syntax. The point of the context manager would be so say "everyting that's in there should be run in that context".
What I wrote above is not really better than just using a function:
def run_transaction(func, *args, **kwargs):
ctx = copy_context()
session = 12
ctx.run(s.set, session)
ctx.run(func)
run_transaction(func)
Am I on the wrong track?
Am I misusing context variables?
Any other way to achieve what I'm trying to do?
Basically, I'd like to be able to open a context like a context manager
session = ContextVar('session', default=None)
with copy_context() as ctx:
session = db.create_session()
# Do stuff
d = MyDocument.find_one()
d.attr = 12
d.commit()
I'd embed this in a Transaction context manager to manage the session stuff and only keep operations on d in user code.

You can use a contextmanager to create the session and transaction and store the session in the ContextVar for use by other functions.
from contextlib import contextmanager
from contextvars import ContextVar
import argparse
import pymongo
SESSION = ContextVar("session", default=None)
#contextmanager
def transaction(client):
with client.start_session() as session:
with session.start_transaction():
t = SESSION.set(session)
try:
yield
finally:
SESSION.reset(t)
def insert1(client):
client.test.txtest1.insert_one({"data": "insert1"}, session=SESSION.get())
def insert2(client):
client.test.txtest2.insert_one({"data": "insert2"}, session=SESSION.get())
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--url", default="mongodb://localhost:27017")
args = parser.parse_args()
client = pymongo.MongoClient(args.url)
# Create and lear collections, collections must be created outside the transaction
insert1(client)
client.test.txtest1.delete_many({})
insert2(client)
client.test.txtest2.delete_many({})
with transaction(client):
insert1(client)
insert2(client)
for doc in client.test.txtest1.find({}):
print(doc)
for doc in client.test.txtest2.find({}):
print(doc)
if __name__ == "__main__":
main()

Related

SQLAlchemy session cleared in celery job and on_success function

I am building a tool that fetches data from a different database, transforms it, and stores it in my own database. I'm migrating from APScheduler to Celery, but I ran into the following problem:
I use a class I call JobRecords to store when a job ran, whether it was successful and which errors it encountered. I use this to know not too look too far back for updated entries, especially since some tables have multiple millions of rows.
Since the system is the same for all jobs, I created a subclass from the celery Task object. I make sure the job is executed within the Flask app context, and I fetch the latest time this Job finished successfully. I also make sure I register a value for now to avoid timing issues between querying the database and adding the job record.
class RecordedTask(Task):
"""
Task sublass that uses JobRecords to get the last run date
and add new JobRecords on completion
"""
now: datetime = None
ignore_result = True
_session: scoped_session = None
success: bool = True
info: dict = None
#property
def session(self) -> Session:
"""Making sure we have one global session instance"""
if self._session is None:
from app.extensions import db
self._session = db.session
return self._session
def __call__(self, *args, **kwargs):
from app.models import JobRecord
kwargs['last_run'] = (
self.session.query(func.max(JobRecord.run_at_))
.filter(JobRecord.job_id == self.name, JobRecord.success)
.first()
)[0] or datetime.min
self.now = kwargs['now'] = datetime.utcnow()
with app.app_context():
super(RecordedTask, self).__call__(*args, **kwargs)
def on_failure(self, exc, task_id, args: list, kwargs: dict, einfo):
self.session.rollback()
self.success = False
self.info = dict(
args=args,
kwargs=kwargs,
error=exc.args,
exc=format_exception(exc.__class__, exc, exc.__traceback__),
)
app.logger.error(f"Error executing job '{self.name}': {exc}")
def on_success(self, retval, task_id, args: list, kwargs: dict):
app.logger.info(f"Executed job '{self.name}' successfully, adding JobRecord")
for entry in self.to_trigger:
if len(entry) == 2:
job, kwargs = entry
else:
job, = entry
kwargs = {}
app.logger.info(f"Scheduling job '{job}'")
current_celery_app.signature(job, **kwargs).delay()
def after_return(self, *args, **kwargs):
from app.models import JobRecord
record = JobRecord(
job_id=self.name,
run_at_=self.now,
info=self.info,
success=self.success
)
self.session.add(record)
self.session.commit()
self.session.remove()
I added an example of a job to update a model called Location, but there are a lot of jobs just like this one.
#celery.task(bind=True, name="update_locations")
def update_locations(self, last_run: datetime = datetime.min, **_):
"""Get the locations from the external database and check for updates"""
locations: List[ExternalLocation] = ExternalLocation.query.filter(
ExternalLocation.updated_at_ >= last_run
).order_by(ExternalLocation.id).all()
app.logger.info(f"ExternalLocation: collected {len(locations)} updated locations")
for update_location in locations:
existing_location: Location = Location.query.filter(
Location.external_id == update_location.id
).first()
if existing_location is None:
self.session.add(Location.from_worker(update_location))
else:
existing_location.update_from_worker(update_location)
The problem is that when I run this job, the Location objects are not committed with the JobRecord, so only the latter is created. If I track it with the debugger, Location.query.count() returns the correct value inside the function, but as soon as it enters the on_success callback, it's back to 0, and self._session.new returns an empty dict.
I already tried adding the session as a property to make sure it's the same instance everywhere, but the problem still persists. Maybe it has something to do with it being a scoped_session because of Flask-SQLAlchemy?
Sorry about the large amount of code, I did try to strip as much away as possible. Any help is welcome!
I found out that the culprit was the combination of scoped_session and the Flask app context. Like any contextmanager, running the code with app.app_context() triggered the __exit__ function on leaving, which in turn caused the ScopedRegistry, where the scoped_session was stored, to be cleared. Then, a new session was created, the JobRecords were added to that, and that session was committed. Therefore, the locations would not be written to the database.
There are two possible solutions. If you don't use sessions in other files than in your task, you can add a session property to the task. This way, you avoid the scoped_session alltogether, and can clean up in your after_return function.
#property
def session(self):
if self._session is None:
from dashboard.extensions import db
self._session = db.create_session(options={})()
return self._session
However, I was accessing the session in my model definition files as well, through from extensions import db. Therefore, I was using two different sessions. I ended up using app.app_context().push() instead of the contextmanager, thus avoiding the __exit__ function
app.app_context().push()
super(RecordedTask, self).__call__(*args, **kwargs)

Run a subcommand inside a context manager

In the context of a python click CLI application, I would like to run a subcommand inside of a context manager that would be setup in a higher level command. How is it possible to do that with click? My pseudo-code looks something like:
import click
from contextlib import contextmanager
#contextmanager
def database_context(db_url):
try:
print(f'setup db connection: {db_url}')
yield
finally:
print('teardown db connection')
#click.group
#click.option('--db',default='local')
def main(db):
print(f'running command against {db} database')
db_url = get_db_url(db)
connection_manager = database_context(db_url)
# here come the mysterious part that makes all subcommands
# run inside the connection manager
#main.command
def do_this_thing()
print('doing this thing')
#main.command
def do_that_thing()
print('doing that thing')
And this would be called like:
> that_cli do_that_thing
running command against local database
setup db connection: db://user:pass#localdb:db_name
doing that thing
teardown db connection
> that_cli --db staging do_this_thing
running command against staging database
setup db connection: db://user:pass#123.456.123.789:db_name
doing this thing
teardown db connection
Edit: note that the above example is forged to better illustrate the missing functionality of click, not that I want to solve this problem in particular. I know I could repeat the same code in all commands and achieve the same effect, which I already do in my real use case. My question is precisely on what could I do only in the main function, that would run all transparently subcommands in a context manager.
Decorating commands
Define a context manager decorator using contextlib.ContextDecorator
Use click.pass_context decorator on main(), so you can explore click context
Create an instance db_context of the context manager
Iterate on commands defined for group main using ctx.command.commands
For each command, replace the original callback (function called by the command) with the same callback decorated with the context manager db_context(cmd)
This way you will programmatically modify each command to behave just like:
#main.command()
#db_context
def do_this_thing():
print('doing this thing')
But without requiring to change your code beyond your function main().
See the code below for a working example:
import click
from contextlib import ContextDecorator
class Database_context(ContextDecorator):
"""Decorator context manager."""
def __init__(self, db_url):
self.db_url = db_url
def __enter__(self):
print(f'setup db connection: {self.db_url}')
def __exit__(self, type, value, traceback):
print('teardown db connection')
#click.group()
#click.option('--db', default='local')
#click.pass_context
def main(ctx, db):
print(f'running command against {db} database')
db_url = db # get_db_url(db)
# here come the mysterious part that makes all subcommands
# run inside the connection manager
db_context = Database_context(db_url) # Init context manager decorator
for name, cmd in ctx.command.commands.items(): # Iterate over main.commands
cmd.allow_extra_args = True # Seems to be required, not sure why
cmd.callback = db_context(cmd.callback) # Decorate command callback with context manager
#main.command()
def do_this_thing():
print('doing this thing')
#main.command()
def do_that_thing():
print('doing that thing')
if __name__ == "__main__":
main()
It does what you describe in your question, hope it will work as expected in real code.
Using click.pass_context
This code below will give you an idea of how to do it using click.pass_context.
import click
from contextlib import contextmanager
#contextmanager
def database_context(db_url):
try:
print(f'setup db connection: {db_url}')
yield
finally:
print('teardown db connection')
#click.group()
#click.option('--db',default='local')
#click.pass_context
def main(ctx, db):
ctx.ensure_object(dict)
print(f'running command against {db} database')
db_url = db #get_db_url(db)
# Initiate context manager
ctx.obj['context'] = database_context(db_url)
#main.command()
#click.pass_context
def do_this_thing(ctx):
with ctx.obj['context']:
print('doing this thing')
#main.command()
#click.pass_context
def do_that_thing(ctx):
with ctx.obj['context']:
print('doing that thing')
if __name__ == "__main__":
main(obj={})
Another solution to avoid explicit with statement could be passing the context manager as a decorator using contextlib.ContextDecorator, but it would likely be more complex to setup with click.
This use case is supported natively in Click from v8.0 by using
ctx.with_resource(context_manager)
https://click.palletsprojects.com/en/8.0.x/api/#click.Context.with_resource
There is a worked example in the Click advanced documentation
https://click.palletsprojects.com/en/8.0.x/advanced/#managing-resources

Mock entire python class

I'm trying to make a simple test in python, but I'm not able to figure it out how to accomplish the mocking process.
This is the class and def code:
class FileRemoveOp(...)
#apply_defaults
def __init__(
self,
source_conn_keys,
source_conn_id='conn_default',
*args, **kwargs):
super(v4FileRemoveOperator, self).__init__(*args, **kwargs)
self.source_conn_keys = source_conn_keys
self.source_conn_id = source_conn_id
def execute (self, context)
source_conn = Connection(conn_id)
try:
for source_conn_key in self.source_keys:
if not source_conn.check_for_key(source_conn_key):
logging.info("The source key does not exist")
source_conn.remove_file(source_conn_key,'')
finally:
logging.info("Remove operation successful.")
And this is my test for the execute function:
#mock.patch('main.Connection')
def test_remove_execute(self,MockConn):
mock_coon = MockConn.return_value
mock_coon.value = #I'm not sure what to put here#
remove_operator = FileRemoveOp(...)
remove_operator.execute(self)
Since the execute method try to make a connection, I need to mock that, I don't want to make a real connection, just return something mock. How can I make that? I'm used to do testing in Java but I never did on python..
First it is very important to understand that you always need to Mock where it the thing you are trying to mock out is used as stated in the unittest.mock documentation.
The basic principle is that you patch where an object is looked up,
which is not necessarily the same place as where it is defined.
Next what you would need to do is to return a MagicMock instance as return_value of the patched object. So to summarize this you would need to use the following sequence.
Patch Object
prepare MagicMock to be used
return the MagicMock we've just created as return_value
Here a quick example of a project.
connection.py (Class we would like to Mock)
class Connection(object):
def execute(self):
return "Connection to server made"
file.py (Where the Class is used)
from project.connection import Connection
class FileRemoveOp(object):
def __init__(self, foo):
self.foo = foo
def execute(self):
conn = Connection()
result = conn.execute()
return result
tests/test_file.py
import unittest
from unittest.mock import patch, MagicMock
from project.file import FileRemoveOp
class TestFileRemoveOp(unittest.TestCase):
def setUp(self):
self.fileremoveop = FileRemoveOp('foobar')
#patch('project.file.Connection')
def test_execute(self, connection_mock):
# Create a new MagickMock instance which will be the
# `return_value` of our patched object
connection_instance = MagicMock()
connection_instance.execute.return_value = "testing"
# Return the above created `connection_instance`
connection_mock.return_value = connection_instance
result = self.fileremoveop.execute()
expected = "testing"
self.assertEqual(result, expected)
def test_not_mocked(self):
# No mocking involved will execute the `Connection.execute` method
result = self.fileremoveop.execute()
expected = "Connection to server made"
self.assertEqual(result, expected)
I found that this simple solution works in python3: you can substitute a whole class before it is being imported for the first time. Say I have to mock class 'Manager' from real.manager
class MockManager:
...
import real.manager
real.manager.Manager = MockManager
It is possible to do this substitution in init.py if there is no better place.
It may work in python2 too but I did not check.

Celery creates several instances of Task

I'm creating a task (by subclassing celery.task.Task) that creates a connection to Twitter's streaming API. For the Twitter API calls, I am using tweepy. As I've read from the celery-documentation, 'a task is not instantiated for every request, but is registered in the task registry as a global instance.' I was expecting that whenever I call apply_async (or delay) for the task, I will be accessing the task that was originally instantiated but that doesn't happen. Instead, a new instance of the custom task class is created. I need to be able to access the original custom task since this is the only way I can terminate the original connection created by the tweepy API call.
Here's some piece of code if this would help:
from celery import registry
from celery.task import Task
class FollowAllTwitterIDs(Task):
def __init__(self):
# requirements for creation of the customstream
# goes here. The CustomStream class is a subclass
# of tweepy.streaming.Stream class
self._customstream = CustomStream(*args, **kwargs)
#property
def customstream(self):
if self._customstream:
# terminate existing connection to Twitter
self._customstream.running = False
self._customstream = CustomStream(*args, **kwargs)
def run(self):
self._to_follow_ids = function_that_gets_list_of_ids_to_be_followed()
self.customstream.filter(follow=self._to_follow_ids, async=False)
follow_all_twitterids = registry.tasks[FollowAllTwitterIDs.name]
And for the Django view
def connect_to_twitter(request):
if request.method == 'POST':
do_stuff_here()
.
.
.
follow_all_twitterids.apply_async(args=[], kwargs={})
return
Any help would be appreciated. :D
EDIT:
For additional context for the question, the CustomStream object creates an httplib.HTTPSConnection instance whenever the filter() method is called. This connection needs to be closed whenever there is another attempt to create one. The connection is closed by setting customstream.running to False.
The task should only be instantiated once, if you think it is not for some reason,
I suggest you add a
print("INSTANTIATE")
import traceback
traceback.print_stack()
to the Task.__init__ method, so you could tell where this would be happening.
I think your task could be better expressed like this:
from celery.task import Task, task
class TwitterTask(Task):
_stream = None
abstract = True
def __call__(self, *args, **kwargs):
try:
return super(TwitterTask, self).__call__(stream, *args, **kwargs)
finally:
if self._stream:
self._stream.running = False
#property
def stream(self):
if self._stream is None:
self._stream = CustomStream()
return self._stream
#task(base=TwitterTask)
def follow_all_ids():
ids = get_list_of_ids_to_follow()
follow_all_ids.stream.filter(follow=ids, async=false)

How can I mix decorators with the #contextmanager decorator?

Here is the code I'm working with:
from contextlib import contextmanager
from functools import wraps
class with_report_status(object):
def __init__(self, message):
self.message = message
def __call__(self, f):
#wraps(f)
def wrapper(_self, *a, **kw):
try:
return f(_self, *a, **kw)
except:
log.exception("Handling exception in reporting operation")
if not (hasattr(_self, 'report_status') and _self.report_status):
_self.report_status = self.message
raise
return wrapper
class MyClass(object):
#contextmanager
#with_report_status('unable to create export workspace')
def make_workspace(self):
temp_dir = tempfile.mkdtemp()
log.debug("Creating working directory in %s", temp_dir)
self.workspace = temp_dir
yield self.workspace
log.debug("Cleaning up working directory in %s", temp_dir)
shutil.rmtree(temp_dir)
#with_report_status('working on step 1')
def step_one(self):
# do something that isn't a context manager
The problem is, #with_report_status does not yield, as expected by #contextmanager. However, I can't wrap it the other way around either, because #contextmanager returns a generator object (i think!) instead of the value itself.
How can I make #contextmanager play nice with decorators?
Try moving #contextmanager at the bottom of the decorator list.
That is kind of a weird question: #contextmanager returns a context manager, not a generator. But for some reason you want to treat that context manager like a function? That's not something you can make work, they have nothing in common.
I think what you want is a MyClass.make_workspace that is context manager and also has a report_status field in case of exceptions. For that you need to write a context manager yourself that sets this field in it's __exit__ method, #contextmanager can't help you here.
You can subclass contextlib.GeneratorContextManager to avoid most of the work. It's not documented, so use the source, Luke.

Categories