How to use sqlalchemy AsyncSession in Celery

How to use sqlalchemy AsyncSession in Celery - python

In celery, I want use asyncsession to operate mysql database,this is my code:
db.py
async_engine = create_async_engine(
ASYNC_DATABASE_URL,
future=True,
echo=True,
pool_size=1000,
pool_pre_ping=True,
pool_recycle=28800-300,
max_overflow=0, )
async_session = sessionmaker(
async_engine, expire_on_commit=False, class_=AsyncSession)
celery.py
app = Celery(
'tasks',
broker=f"{config.REDIS_URL}/{config.CELERY_DB}",
CELERY_TASK_SERIALIZER='pickle',
CELERY_RESULT_SERIALIZER='pickle',
CELERYD_CONCURRENCY=20,
CELERY_ACCEPT_CONTENT=[
'pickle',
'json'],
include=['celery_task.user_permission', 'celery_task.repair_gpu']
)
#app.task(name='repair_gpu')
def repair_gpu(task_id, repair_map: list):
\# print("test")
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
\#
\#
loop.run_until_complete(repair_gpu_task(task_id, repair_map))
async def repair_gpu_task(task_id, repair_map: list):
status = True
async with async_session() as session:
query = select(module.Task).where(module.Task.id == task_id)
task: module.Task = (await session.execute(query)).scalars().first()
....many codes......
The program works fine when I only run one task，Running multiple tasks at the same time will report the following error
The garbage collector is trying to clean up connection <AdaptedConnection <asyncmy.connection.Connection object at 0x000001DECDAA37C0>>. This feature is unsupported on unsupported on asyn
cio dbapis that lack a "terminate" feature, since no IO can be performed at this stage to reset the connection. Please close out all connections when they are no longer used, calling close() or using a context manager to manage
their lifetime.
How can i use it? How to fix this error?
Thank u.
I don't know how to fix and try......

Related

How To Setup a SQLAlchemy Asynchronous Scoped Session In Python Behave Synchronous Hooks?

I have also asked this question on behave GitHub discussions and SQLAlchemy GitHub discussions.
I am trying to hookup a SQLAlchemy 1.4 engine and global scoped asynchronous session in behave before_all and before_scenario hooks to model testing similar to that outlined in the following blog article
The approach is to have a parent transaction and each test running in a nested transaction that gets rolled back when the test completes.
Unfortunately the before_all, before_scenario hooks are synchronous.
The application under test uses an asynchronous engine and asynchronous session created using sessionmaker:
def _create_session_factory(engine) -> sessionmaker[AsyncSession]:
factory = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
factory.configure(bind=engine)
return factory
In the before_scenario test hook the following line raises an error when I try to create a scoped session.
"""THIS RAISES AN ERROR RuntimeError: no running event loop"""
context.session = context.Session(bind=context.connection, loop=loop)
The full code listing for setting up the test environment is listed below.
How do I get an asynchronous scoped session created in the synchronous before_all, before_scenario test hooks of behave?
import asyncio
import logging
from behave.api.async_step import use_or_create_async_context
from behave.log_capture import capture
from behave.runner import Context
from behave.model import Scenario
from sqlalchemy import event
from sqlalchemy.ext.asyncio import AsyncSession, async_scoped_session
from sqlalchemy.ext.asyncio.engine import async_engine_from_config
from sqlalchemy.orm.session import sessionmaker
from fastapi_graphql.server.config import Settings
logger = logging.getLogger()
#capture(level=logging.INFO)
def before_all(context: Context) -> None:
"""Setup database engine and session factory."""
logging.info("Setting up logging for behave tests...")
context.config.setup_logging()
logging.info("Setting up async context...")
use_or_create_async_context(context)
loop = context.async_context.loop
asyncio.set_event_loop(loop)
logging.info("Configuring db engine...")
settings = Settings()
config = settings.dict()
config["sqlalchemy_url"] = settings.db_url
engine = async_engine_from_config(config, prefix="sqlalchemy_")
logging.info(f"Db engine configured for connecting to: {settings.db_url}")
logging.info("Creating a global session instance")
factory = sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False)
# factory.configure(bind=engine)
Session = async_scoped_session(factory(), scopefunc=asyncio.current_task)
context.engine = engine
context.connection = loop.run_until_complete(engine.connect())
context.factory = factory
context.Session = Session
#capture(level=logging.INFO)
def after_all(context: Context) -> None:
"""Teardown database engine gracefully."""
loop = context.async_context.loop
logging.info("Closing connection")
loop.run_until_complete(context.connection.close())
logging.info("Closing database engine...")
loop.run_until_complete(context.engine.dispose())
logging.info("Database engine closed")
#capture(level=logging.INFO)
def before_scenario(context: Context, scenario: Scenario) -> None:
"""Create a database session."""
loop = context.async_context.loop
logging.info("Starting a transaction...")
context.transaction = loop.run_until_complete(context.connection.begin())
logging.info("Transaction started...")
logging.info("Creating a db session...")
breakpoint()
# THIS RAISES AN ERROR RuntimeError: no running event loop
context.session = context.Session(bind=context.connection, loop=loop)
logging.info("Db session created")
breakpoint()
logging.info("Starting a nested transaction...")
context.session.begin_nested()
logging.info("Nested transaction started...")
#event.listens_for(context.session, "after_transaction_end")
def restart_savepoint(db_session, transaction):
"""Support tests with rollbacks.
This is required for tests that call some services that issue
rollbacks in try-except blocks.
With this event the Session always runs all operations within
the scope of a SAVEPOINT, which is established at the start of
each transaction, so that tests can also rollback the
“transaction” as well while still remaining in the scope of a
larger “transaction” that’s never committed.
"""
if context.transaction.nested and not context.transaction._parent.nested:
# ensure that state is expired the way session.commit() at
# the top level normally does
context.session.expire_all()
context.session.begin_nested()
#capture(level=logging.INFO)
def after_scenario(context: Context, scenario: Scenario) -> None:
"""Close the database session."""
logging.info("Closing db session...")
loop = asyncio.get_event_loop()
loop.run_until_complete(context.Session.remove())
logging.info("Db session closed")
logging.info("Rolling back transaction...")
loop.run_until_complete(context.transaction.rollback())
logging.info("Rolled back transaction")

event loop is closed in a celery worker

I am facing multiple issues using an async ODM inside my celery worker
First i wasn't able to init my database models using celery worker signal
i am using beanie for the db connection.
First Implementation
from asyncer import syncify
from asgiref.sync import async_to_sync
client = AsyncIOMotorClient(
DATABASE_URL, uuidRepresentation="standard" )
db = client[DB_NAME]
async def db_session():
await init_beanie(
database=db,
document_models=[Project, User],
)
#worker_ready.connect
def startup_celery_ecosystem(**kwargs):
logger.info('Startup celery worker process')
async_to_sync(db_session)()
logger.info('FINISHED : Startup celery worker process')
async def get_users():
users = User.find()
users_list = await users.to_list()
return users_list
#celery_app.task
def pool_db():
async_to_sync(get_users)()
#syncify(get_users)() same error User class is not initialized yet (init_beanie should have already initialized all the models )
With this implementation i could not access my database using the User and Project class and it raises an error as if User and Project haven't been instantiated yet
The workaround is to call db_session() at the module level which solve the problem with database models instantiation, But now when querying the database i get the following error from my celery task
RuntimeError: Event loop is closed
Second Implementation
from asyncer import syncify
from asgiref.sync import async_to_sync client = AsyncIOMotorClient(
DATABASE_URL, uuidRepresentation="standard" )
db = client[DB_NAME]
async def db_session():
await init_beanie(
database=db,
document_models=[Project, User],
)
# now init_beanie at module level
async_to_sync(db_session)()
async def get_users():
users = User.find()
users_list = await users.to_list()
return users_list
#celery_app.task
def pool_db():
# this raises the following Runtime error RuntimeError('Event loop is closed')
async_to_sync(get_users)()
#syncify(get_users)() same error
i am not very familiar with how asyncio is implemented and how asyncer and asgiref allows to run async code inside a sync thread which left me confused, any help would be appriciated

After many investigation using flower for monitoring workers and logging the workers Id ( processes ids) it turns out that Celery worker itself does not process any tasks, it spawns other child processes ( this is my case because i am using the default executor pool which is prefork), while the signal ( worker_ready.connect ) is only run on the supervisor process Celery worker and not the childs, and since processes are isoleted memory wise, this means that you can't have access to db connection or any initialized ressources from the child processes.
Now i am using celery with gevent which only spawn 1 process, because initially my project doesn't require CPU heavy tasks which means i don't need all the cpu power provided by the prefork pool

Pytest Alembic initialize database with async migrations

The existing posts didn't provide a useful answer to me.
I'm trying to run asynchronous database tests using Pytest (db is Postgres with asyncpg), and I'd like to initialize my database using my Alembic migrations so that I can verify that they work properly in the meantime.
My first attempt was this:
#pytest.fixture(scope="session")
async def tables():
"""Initialize a database before the tests, and then tear it down again"""
alembic_config: config.Config = config.Config('alembic.ini')
command.upgrade(alembic_config, "head")
yield
command.downgrade(alembic_config, "base")
which didn't actually do anything at all (migrations were never applied to the database, tables not created).
Both Alembic's documentation & Pytest-Alembic's documentation say that async migrations should be run by configuring your env like this:
async def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
asyncio.run(run_migrations_online())
but this doesn't resolve the issue (however it does work for production migrations outside of pytest).
I stumpled upon a library called pytest-alembic that provides some built-in tests for this.
When running pytest --test-alembic, I get the following exception:
got Future attached to a different loop
A few comments on pytest-asyncio's GitHub repository suggest that the following fixture might fix it:
#pytest.fixture(scope="session")
def event_loop() -> Generator:
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
but it doesn't (same exception remains).
Next I tried to run the upgrade test manually, using:
async def test_migrations(alembic_runner):
alembic_runner.migrate_up_to("revision_tag_here")
which gives me
alembic_runner.migrate_up_to("revision_tag_here")
venv/lib/python3.9/site-packages/pytest_alembic/runner.py:264: in run_connection_task
return asyncio.run(run(engine))
RuntimeError: asyncio.run() cannot be called from a running event loop
However this is an internal call by pytest-alembic, I'm not calling asyncio.run() myself, so I can't apply any of the online fixes for this (try-catching to check if there is an existing event loop to use, etc.). I'm sure this isn't related to my own asyncio.run() defined in the alembic env, because if I add a breakpoint - or just raise an exception above it - the line is actually never executed.
Lastly, I've also tried nest-asyncio.apply(), which just hangs forever.
A few more blog posts suggest to use this fixture to initialize database tables for tests:
async with engine.begin() as connection:
await connection.run_sync(Base.metadata.create_all)
which works for the purpose of creating a database to run tests against, but this doesn't run through the migrations so that doesn't help my case.
I feel like I've tried everything there is & visited every docs page, but I've got no luck so far. Running an async migration test surely can't be this difficult?
If any extra info is required I'm happy to provide it.

I got this up and running pretty easily with the following
env.py - the main idea here is that the migration can be run synchronously
import asyncio
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import AsyncEngine
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = mymodel.Base.metadata
def run_migrations_online():
connectable = context.config.attributes.get("connection", None)
if connectable is None:
connectable = AsyncEngine(
engine_from_config(
context.config.get_section(context.config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
future=True
)
)
if isinstance(connectable, AsyncEngine):
asyncio.run(run_async_migrations(connectable))
else:
do_run_migrations(connectable)
async def run_async_migrations(connectable):
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def do_run_migrations(connection):
context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
)
with context.begin_transaction():
context.run_migrations()
run_migrations_online()
then I added a simple db init script
init_db.py
from alembic import command
from alembic.config import Config
from sqlalchemy.ext.asyncio import create_async_engine
__config_path__ = "/path/to/alembic.ini"
__migration_path__ = "/path/to/folder/with/env.py"
cfg = Config(__config_path__)
cfg.set_main_option("script_location", __migration_path__)
async def migrate_db(conn_url: str):
async_engine = create_async_engine(conn_url, echo=True)
async with async_engine.begin() as conn:
await conn.run_sync(__execute_upgrade)
def __execute_upgrade(connection):
cfg.attributes["connection"] = connection
command.upgrade(cfg, "head")
then your pytest fixture can look like this
conftest.py
...
#pytest_asyncio.fixture(autouse=True)
async def migrate():
await migrate_db(conn_url)
yield
...
Note: I don't scope my migrate fixture to the test session, I tend to drop and migrate after each test.

PendingRollbackError when accessing test database in FastAPI async test

I'm trying to mimic Django behavior when running tests on FastAPI: I want to create a test database in the beginning of each test, and destroy it in the end. The problem is the async nature of FastAPI is breaking everything. When I did a sanity check and turned everything synchronous, everything worked beautifully. When I try to run things async though, everything breaks. Here's what I have at the moment:
The fixture:
#pytest.fixture(scope="session")
def event_loop():
return asyncio.get_event_loop()
#pytest.fixture(scope="session")
async def session():
sync_test_db = "postgresql://postgres:postgres#postgres:5432/test"
if not database_exists(sync_test_db):
create_database(sync_test_db)
async_test_db = "postgresql+asyncpg://postgres:postgres#postgres:5432/test"
engine = create_async_engine(url=async_test_db, echo=True, future=True)
async with engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async with Session() as session:
def get_session_override():
return session
app.dependency_overrides[get_session] = get_session_override
yield session
drop_database(sync_test_db)
The test:
class TestSomething:
#pytest.mark.asyncio
async def test_create_something(self, session):
data = {"some": "data"}
response = client.post(
"/", json=data
)
assert response.ok
results = await session.execute(select(Something)) # <- This line fails
assert len(results.all()) == 1
The error:
E sqlalchemy.exc.PendingRollbackError: This Session's transaction has been rolled back due to a previous exception during flush. To begin a new transaction with this Session, first issue Session.rollback(). Original exception was: Task <Task pending name='anyio.from_thread.BlockingPortal._call_func' coro=<BlockingPortal._call_func() running at /usr/local/lib/python3.9/site-packages/anyio/from_thread.py:187> cb=[TaskGroup._spawn.<locals>.task_done() at /usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py:629]> got Future <Future pending cb=[Protocol._on_waiter_completed()]> attached to a different loop (Background on this error at: https://sqlalche.me/e/14/7s2a)
/usr/local/lib/python3.9/site-packages/sqlalchemy/orm/session.py:601: PendingRollbackError
Any ideas what I might be doing wrong?

Check if other statements in your test-cases involving the database might fail before this error is raised.
For me the PendingRollbackError was caused by an InsertionError that was raised by a prior test.
All my tests were (async) unit tests that involved database insertions into a postgres database.
After the tests, the database session was supposed to do a rollback of its entries.
The InsertionError was caused by Insertions to the database that failed a unique constraint. All subsequent tests raised the PendingRollbackError.

sqlalchemy when does an object become "not persistent"

I have a function that has a semi-long running session that I use for a bunch of database rows... and at a certain point I want to reload or "refresh" one of the rows to make sure none of the state has changed. most of the time this code works fine, but every now and then I get this error
sqlalchemy.exc.InvalidRequestError: Instance '<Event at 0x58cb790>' is not persistent within this Session
I've been reading up on state but cannot understand why an object would stop being persistent? I'm still within a session, so I'm not sure why I would stop being persistent.
Can someone explain what could cause my object to be "not persistent" within the session? I'm not doing any writing to the object prior to this point.
db_event below is the object that is becoming "not persistent"
async def event_white_check_mark_handler(
self: Events, ctx, channel: TextChannel, member: discord.Member, message: Message
):
"""
This reaction is for completing an event
"""
session = database_objects.SESSION()
try:
message_id = message.id
db_event = self.get_event(session, message_id)
if not db_event:
return
logger.debug(f"{member.display_name} wants to complete an event {db_event.id}")
db_guild = await db.get_or_create(
session, db.Guild, name=channel.guild.name, discord_id=channel.guild.id
)
db_member = await db.get_or_create(
session,
db.Member,
name=member.name,
discord_id=member.id,
nick=member.display_name,
guild_id=db_guild.discord_id,
)
db_scheduler_config: db.SchedulerConfig = (
session.query(db.SchedulerConfig)
.filter(db.SchedulerConfig.guild_id == channel.guild.id)
.one()
)
# reasons to not complete the event
if len(db_event) == 0:
await channel.send(
f"{member.display_name} you cannot complete an event with no one on it!"
)
elif (
db_member.discord_id == db_event.creator_id
or await db_scheduler_config.check_permission(
ctx, db_event.event_name, member, db_scheduler_config.MODIFY
)
):
async with self.EVENT_LOCKS[db_event.id]:
session.refresh(db_event) ########### <---- right here is when I get the error thrown
db_event.status = const.COMPLETED
session.commit()
self.DIRTY_EVENTS.add(db_event.id)
member_list = ",".join(
filter(
lambda x: x not in const.MEMBER_FIELD_DEFAULT,
[str(x.mention) for x in db_event.members],
)
)
await channel.send(f"Congrats on completing a event {member_list}!")
logger.info(f"Congrats on completing a event {member_list}!")
# await self.stop_tracking_event(db_event)
del self.REMINDERS_BY_EVENT_ID[db_event.id]
else:
await channel.send(
f"{member.display_name} you did not create this event and do not have permission to delete the event!"
)
logger.warning(f"{member.display_name} you did not create this event!")
except Exception as _e:
logger.error(format_exc())
session.rollback()
finally:
database_objects.SESSION.remove()

I am fairly certain that the root cause in this case is a race condition. Using a scoped session in its default configuration manages scope based on the thread only. Using coroutines on top can mean that 2 or more end up sharing the same session, and in case of event_white_check_mark_handler they then race to commit/rollback and to remove the session from the scoped session registry, effectively closing it and expunging all remaining instances from the now-defunct session, making the other coroutines unhappy.
A solution is to not use scoped sessions at all in event_white_check_mark_handler, because it fully manages its session's lifetime, and seems to pass the session forward as an argument. If on the other hand there are some paths that use the scoped session database_objects.SESSION instead of receiving the session as an argument, define a suitable scopefunc when creating the registry:
https://docs.sqlalchemy.org/en/13/orm/contextual.html#using-custom-created-scopes
SQLAlchemy+Tornado: How to create a scopefunc for SQLAlchemy's ScopedSession?
Correct usage of sqlalchemy scoped_session with python asyncio

I experienced this issue when retrieving a session from a generator, and try to run the exact same query again from different yielded sessions:
SessionLocal = sessionmaker(bind=engine, class_=Session)
def get_session() -> Generator:
with SessionLocal() as session:
yield session
The solution was to use session directly (in my case).
Perhaps in your case I would commit the session, before executing a new query.
def get_data():
with Session(engine) as session:
statement = select(Company)
results = session.exec(statement)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to use sqlalchemy AsyncSession in Celery - python

Related

How To Setup a SQLAlchemy Asynchronous Scoped Session In Python Behave Synchronous Hooks?

event loop is closed in a celery worker

Pytest Alembic initialize database with async migrations

PendingRollbackError when accessing test database in FastAPI async test

sqlalchemy when does an object become "not persistent"

Categories

Resources