I am using Flask-SQLAlchemy, with autocommit set to False and autoflush set to True. It's connecting to a mysql database.
I have 3 methods like this:
def insert_something():
insert_statement = <something>
db.session.execute(insert_statement);
db.session.commit()
def delete_something():
delete_statement = <something>
db.session.execute(delete_statement);
db.session.commit()
def delete_something_else():
delete_statement = <something>
db.session.execute(delete_statement);
db.session.commit()
Sometimes I want to run these methods individually; no problems there — but sometimes I want to run them together in a nested transaction. I want insert_something to run first, and delete_something to run afterwards, and delete_something_else to run last. If any of those methods fail then I want everything to be rolled back.
I've tried the following:
db.session.begin_nested()
insert_something()
delete_something()
delete_something_else()
db.session.commit()
This doesn't work, though, because insert_something exits the nested transaction (and releases the savepoint). Then, when delete_something runs db.session.commit() it actually commits the deletion to the database because it is in the outermost transaction.
That final db.session.commit() in the code block above doesn't do anything..everything is already committed by that point.
Maybe I can do something like this, but it's ugly as hell:
db.session.begin_nested()
db.session.begin_nested()
db.session.begin_nested()
db.session.begin_nested()
insert_something()
delete_something()
delete_something_else()
db.session.commit()
There's gotta be a better way to do it without touching the three methods..
Edit:
Now I'm doing it like this:
with db.session.begin_nested():
insert_something()
with db.session.begin_nested():
delete_something()
with db.session.begin_nested():
delete_something_else()
db.session.commit()
Which is better, but still not great.
I'd love to be able to do something like this:
with db.session.begin_nested() as nested:
insert_something()
delete_something()
delete_something_else()
nested.commit() # though I feel like you shouldn't need this in a with block
The docs discuss avoiding this pattern in arbitrary-transaction-nesting-as-an-antipattern and session-faq-whentocreate.
But there is an example in the docs that is similar to this but it is for testing.
https://docs.sqlalchemy.org/en/14/orm/session_transaction.html?highlight=after_transaction_end#joining-a-session-into-an-external-transaction-such-as-for-test-suites
Regardless, here is a gross transaction manager based on the example that "seems" to work but don't do this. I think there are a lot of gotchas in here.
import contextlib
from sqlalchemy import (
create_engine,
Integer,
String,
)
from sqlalchemy.schema import (
Column,
MetaData,
)
from sqlalchemy.orm import declarative_base, Session
from sqlalchemy import event
from sqlalchemy.sql import delete, select
db_uri = 'postgresql+psycopg2://username:password#/database'
engine = create_engine(db_uri, echo=True)
metadata = MetaData()
Base = declarative_base(metadata=metadata)
class Device(Base):
__tablename__ = "devices"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String(50))
def get_devices(session):
return [d.name for (d,) in session.execute(select(Device)).all()]
def create_device(session, name):
session.add(Device(name=name))
session.commit()
def delete_device(session, name):
session.execute(delete(Device).filter(Device.name == name))
session.commit()
def almost_create_device(session, name):
session.add(Device(name=name))
session.flush()
session.rollback()
#contextlib.contextmanager
def force_nested_transaction_forever(session, commit_on_complete=True):
"""
Keep re-entering a nested transaction everytime a transaction ends.
"""
d = {
'nested': session.begin_nested()
}
#event.listens_for(session, "after_transaction_end")
def end_savepoint(session, transaction):
# Start another nested trans if the prior one is no longer active.
if not d['nested'].is_active:
d['nested'] = session.begin_nested()
try:
yield
finally:
# Stop trapping us in perpetual nested transactions.
# Is this the right place for this ?
event.remove(session, "after_transaction_end", end_savepoint)
# This seems like it would be error prone.
if commit_on_complete and d['nested'].is_active:
d.pop('nested').commit()
if __name__ == '__main__':
metadata.create_all(engine)
with Session(engine) as session:
with session.begin():
# THIS IS NOT RECOMMENDED
with force_nested_transaction_forever(session):
create_device(session, "0")
create_device(session, "a")
delete_device(session, "a")
almost_create_device(session, "a")
create_device(session, "b")
assert len(get_devices(session)) == 2
assert len(get_devices(session)) == 2
Related
Description of the problem
I wanted to use ormar with SQLite for my project but ran into the problem that ormar doesn't save changes to the database. Although everything seems to be done according to the documentation. (Additionally, I used the faker to generate unique names to fill db and loguru to logging)
My code
import asyncio
from databases import Database
from faker import Faker
from loguru import logger as log
from ormar import ModelMeta, Model, Integer, String
from sqlalchemy import create_engine, MetaData
fake = Faker()
DB_PATH = 'sqlite:///db.sqlite3'
database = Database(DB_PATH)
metadata = MetaData()
class BaseMeta(ModelMeta):
database = database
metadata = metadata
class User(Model):
class Meta(BaseMeta):
tablename = 'users'
id: int = Integer(primary_key=True)
name: str = String(max_length=64)
# Also I tried without `with_connect` function, but it also doesn't work
async def with_connect(function):
async with database:
await function()
async def create():
return f"User created: {await User(name=fake.name()).save()}"
# Also I tried this: `User.objects.get_or_create(name=fake.name())`
# but it also doesn't work:
async def read():
return f"All data from db: {await User.objects.all()}"
async def main():
log.info(await create())
log.info(await create())
log.info(await read())
if __name__ == '__main__':
engine = create_engine(DB_PATH)
metadata.drop_all(engine)
metadata.create_all(engine)
try:
asyncio.run(with_connect(main))
finally:
metadata.drop_all(engine)
Results
As a result, I expected that after each run of the code, data would be printed during previous runs. That is so that the created objects are saved to the file db.sqlite3.
The actual result is that after each run of the code, only the data generated during that run is printed.
Conclusion
Why is the data not saved to the database file? Maybe I misunderstood how ORMs work?
I'm working with sqlalchemy and postgresql using python modules.
There is a database module which creates the objects that I need in order to work with postgresql tables. The init method is the following:
class Database:
is_instantiated = False
def __init__(self):
if not Database.is_instantiated:
self.engine = create_engine("postgresql+psycopg2://name:pwd#localhost:5432/udemy")
self.metadata = MetaData()
self.create_table()
Database.is_instantiated = True
print("Database connection success")
else:
print("Already connected")
The create_table() method just creates tables objects like:
def create_table(self):
self.tbl_employee = Table("employee", self.metadata,
Column("id", Integer(), primary_key=True),
Column("first_name", String(), nullable=False),
Column("last_name", String(), nullable=False),
Column("birthday", String(10), nullable=False),
Column("department_name", String(), nullable=False)
)
Now, there is a start module with a button that if pressed it opens another window using the following method:
def manage_employees(self):
self.hide()
self.employee_window = EmployeeMenu(self)
self.employee_window.show()
The employee module, when instanciated, populate the QTableWidget using this method:
def populate_tbl(self):
self.db = Database()
headers, data = self.db.get_data_for_tab()
tbl_nr_rows = len(data)
tbl_nr_columns = len(headers)
self.tbl_center.setRowCount(tbl_nr_rows)
self.tbl_center.setColumnCount(tbl_nr_columns)
self.tbl_center.setHorizontalHeaderLabels(tuple(headers))
self.tbl_center.setSelectionMode(qw.QAbstractItemView.SelectionMode.SingleSelection)
self.tbl_center.horizontalHeader().setSectionResizeMode(qw.QHeaderView.Stretch)
self.tbl_center.setSelectionBehavior(qw.QAbstractItemView.SelectRows)
for row in range(tbl_nr_rows):
for col in range(tbl_nr_columns):
self.tbl_center.setItem(row, col, qw.QTableWidgetItem(str(data[row][col])))
The self.db.get_data_for_tab() simply takes data from the postgresql in order to populate the table widget.
The first time that I open the employee window everything works fine, but I have also a button that can bring me back to the start menu:
def show_start(self):
self.hide()
self.start_menu.show()
If I use this method and then try to reopen the employee window (using the manage_employees), it will raise an error because the Database.is_instantiated attribute is True now and therefore it will not run the code for creating the engine, metadata, ...
My first question is if it is correct to try to create the engine and others database's objects only one time, and in that case how can I set up that properly in order to avoid this kind of issues.
If it is not necessary, I have seen that if I remove the initial check in the init method of the Database class, the employee table will be populated properly, but I don't know if this is the right way to work with sqlalchmey objects.
Many thanks in advance for your help.
Andrea
I am studying the "Cosmic Python" book and chapter 6 explains how to use the Unit of Work pattern to change the interaction with the database/repository.
Chapter 6 of the book can be accessed here:
https://www.cosmicpython.com/book/chapter_06_uow.html
The code provided by the author is the following:
from __future__ import annotations
import abc
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm.session import Session
from allocation import config
from allocation.adapters import repository
class AbstractUnitOfWork(abc.ABC):
products: repository.AbstractRepository
def __enter__(self) -> AbstractUnitOfWork:
return self
def __exit__(self, *args):
self.rollback()
#abc.abstractmethod
def commit(self):
raise NotImplementedError
#abc.abstractmethod
def rollback(self):
raise NotImplementedError
DEFAULT_SESSION_FACTORY = sessionmaker(bind=create_engine(
config.get_postgres_uri(),
isolation_level="REPEATABLE READ",
))
class SqlAlchemyUnitOfWork(AbstractUnitOfWork):
def __init__(self, session_factory=DEFAULT_SESSION_FACTORY):
self.session_factory = session_factory
def __enter__(self):
self.session = self.session_factory() # type: Session
self.products = repository.SqlAlchemyRepository(self.session)
return super().__enter__()
def __exit__(self, *args):
super().__exit__(*args)
self.session.close()
def commit(self):
self.session.commit()
def rollback(self):
self.session.rollback()
I am trying to test my endpoints on Flask but I could not make it rollback the data inserted after each test.
To solve that I tried to install the package pytest-flask-sqlalchemy but with the following error:
'SqlAlchemyUnitOfWork' object has no attribute 'engine'
I do not quite understand how pytest-flask-sqlalchemy works and I have no clue on how to make the Unit of Work rollback transactions after a test.
Is it possible to make it work the way the author implemented it?
Edited
It is possible to replicate my situation through the following repository:
https://github.com/Santana94/CosmicPythonRollbackTest
You should get that the test is not rolling back previous actions by cloning it and running make all.
Finally, I got to make the rollback functionality happen after every test.
I got that working when I saw a package called pytest-postgresql implementing it on itself. I just made my adjustments to make tests rollback the database data that I was working with. For that, I just had to implement this function on conftest.py:
#pytest.fixture(scope='function')
def db_session():
engine = create_engine(config.get_postgres_uri(), echo=False, poolclass=NullPool)
metadata.create_all(engine)
pyramid_basemodel.Session = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
pyramid_basemodel.bind_engine(
engine, pyramid_basemodel.Session, should_create=True, should_drop=True)
yield pyramid_basemodel.Session
transaction.commit()
metadata.drop_all(engine)
After that, I had to place the db_session as a parameter of a test if I wanted to rollback transactions:
#pytest.mark.usefixtures('postgres_db')
#pytest.mark.usefixtures('restart_api')
def test_happy_path_returns_202_and_batch_is_allocated(db_session):
orderid = random_orderid()
sku, othersku = random_sku(), random_sku('other')
earlybatch = random_batchref(1)
laterbatch = random_batchref(2)
otherbatch = random_batchref(3)
api_client.post_to_add_batch(laterbatch, sku, 100, '2011-01-02')
api_client.post_to_add_batch(earlybatch, sku, 100, '2011-01-01')
api_client.post_to_add_batch(otherbatch, othersku, 100, None)
r = api_client.post_to_allocate(orderid, sku, qty=3)
assert r.status_code == 202
r = api_client.get_allocation(orderid)
assert r.ok
assert r.json() == [
{'sku': sku, 'batchref': earlybatch},
]
It is possible to check out the requirements for that and other aspects of that implementation on my GitHub repository.
https://github.com/Santana94/CosmicPythonRollbackTest
In my application I'm using SQLAlchemy for storing most persistent data across app restarts. For this I have a db package containing my mapper classes (like Tag, Group etc.) and a support class creating a single engine instance using create_engine and a single, global, Session factory using sessionmaker.
Now my understanding of how to use SQLAlchemys sessions is, that I don't pass them around in my app but rather create instances using the global factory whenever I need database access.
This leads to situations were a record is queried in one session and then passed on to another part of the app, which uses a different session instance. This gives me exceptions like this one:
Traceback (most recent call last):
File "…", line 29, in delete
session.delete(self.record)
File "/usr/lib/python3.3/site-packages/sqlalchemy/orm/session.py", line 1444, in delete
self._attach(state, include_before=True)
File "/usr/lib/python3.3/site-packages/sqlalchemy/orm/session.py", line 1748, in _attach
state.session_id, self.hash_key))
sqlalchemy.exc.InvalidRequestError: Object '<Group at 0x7fb64c7b3f90>' is already attached to session '1' (this is '3')
Now my question is: did I get the usage of Session completly wrong (so I should use one session only at a time and pass that session around to other components together with records from the database) or could this result from actual code issue?
Some example code demonstrating my exact problem:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base, declared_attr
Base = declarative_base()
class Record(Base):
__tablename__ = "record"
id = Column(Integer, primary_key=True)
name = Column(String)
def __init__(self, name):
self.name = name
def __repr__(self):
return "<%s('%s')>" % (type(self).__name__, self.name)
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
s1 = Session()
record = Record("foobar")
s1.add(record)
s1.commit()
# This would be a completly different part of app
s2 = Session()
record = s2.query(Record).filter(Record.name == "foobar").first()
def delete_record(record):
session = Session()
session.delete(record)
session.commit()
delete_record(record)
For now I switched over to using a single, global session instance. That's neither nice nor clean in my opinion, but including lots and lots of boiler plate code to expunge objects from one session just to add them back to their original session after handing it over to some other application part was no realistic option, either.
I suppose this will completely blow up if I start using multiple threads to access the database via the very same session…
I am trying to setup a simple server on heroku using bottle and sqlalchemy but I keep getting the error that I have not given my methods enough arguments because the "db" keyword is not getting injected.
Here is what I tried:
import os
from bottle import *
import bottle
from bottle.ext import sqlalchemy
from sqlalchemy import create_engine, Column, Integer, Sequence, String
from sqlalchemy.ext.declarative import declarative_base
import json
app = bottle.Bottle()
database_url = os.environ["DATABASE_URL"]
Base = declarative_base()
engine = create_engine(database_url, echo=True)
plugin = sqlalchemy.Plugin(
engine,
Base.metadata,
keyword='db',
create=True,
commit=True,
use_kwargs=False
)
app.install(plugin)
class Path(Base):
__tablename__ = "path"
id = Column(Integer, Sequence("id_seq"), primary_key=True)
start_node = Column(String)
end_node = Column(String)
path = Column(String)
def __init__(self, start_node, end_node, path):
self.start_node = start_node
self.end_node = end_node
self.path = path
def __repr__(self):
return "<Path(id: '%d', start_node: '%s', end_node: '%s', path: '%s')>" & (self.id, self.start_node, self.end_node, self.path)
#app.get("/get", db)
def node_value(db):
node = "face"
start_path = db.query(Path).filter_by(start_node=node)
end_path = db.query(Path).filter_by(end_node=node)
if start_path and end_path:
data = start_node
data.append(end_path)
if start_path:
return start_node
if end_path:
return end_path
else:
return "ERROR"
#route("/")
def hello_world():
return "Hello, World!"
run(host="0.0.0.0", port=int(os.environ.get("PORT", 3000)))
Thanks for all your help!
I ran your code and it worked for me. The only things I changed seem inconsistent with what you describe as your experience, but maybe they will help:
Removed the db param from your app.get decoration.
You need to call your app's run method, not bottle's. I.e.,
So these two lines changed from:
#app.get("/get", db)
...
run(host="0.0.0.0", port=int(os.environ.get("PORT", 3000)))
to
#app.get("/get")
...
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 3000)))
Then it works as expected. If these changes don't solve your problem, please post exactly what you're doing to generate the error message and what the message is, and I'll see if I can reproduce it on my end.
PS, When I say it "works for me," I mean that the node_value method gets called, per original Q. I'm not saying it actually does anything. :) Oh, FYI I also set DATABASE_URL to "sqlite:///:memory:"