Where are rows created with pytest-postgresql - python

I inherited a fast-api project that uses PostgreSQL and pytest-postgresql to provide pytest fixtures to test against PostgreSQL. Out of curiosity I placed some breakpoint() statements in several places after row model creation, and commits(), but before any cleanup. With breakpoint() holding the process, I then looked at the database server to see if I could find the data that was entered with pytest-postgresql. I could find nothing. Where would this data be?
In my conftest.py file, I have the following for pytest-postgresql setup.
from pytest_postgresql import factories
...
postgresql_proc = factories.postgresql_proc(
host="localhost",
user="REDACTED",
port="5432",
password="REDACTED",
)
pg_fixture = factories.postgresql("postgresql_proc", db_name="REDACTED")
#pytest.fixture(scope="function")
def db_session(pg_fixture):
"""
A session object to a non persistent db.
Will clean up the database after each test run, in its cleanup stage
"""
sqlalchemy_uri = (
f"postgresql://{pg_fixture.info.user}:{pg_fixture.info.password}#"
f"{pg_fixture.info.host}:{pg_fixture.info.port}"
f"/{pg_fixture.info.dbname}"
)
engine = get_engine(sqlalchemy_uri)
models.base.Base.metadata.create_all(engine) # CREATES VARIOUS MODELS
Session = sessionmaker(bind=engine)
yield Session()
models.base.Base.metadata.drop_all(engine)
The tests work, so I assume that its setup correctly. And by work pass when they should and fail when they should not. But for the life of me I can not understand where pytest-postgresql is putting the row data inserted with the model creation in test setup.

Related

Flask/FastAPI SQLite pytest fixture returns None unless row ID is specified

I'm testing a FastAPI app with pytest. I've created a client fixture which includes a sqlite DB created from CSVs:
import pytest
from os import path, listdir, remove
from pandas import read_csv
from fastapi.testclient import TestClient
from api.main import app
from api.db import engine, db_url
#pytest.fixture(scope="session")
def client():
db_path = db_url.split("///")[-1]
if path.exists(db_path):
remove(db_path)
file_path = path.dirname(path.realpath(__file__))
table_path = path.join(file_path, "mockdb")
for table in listdir(table_path):
df = read_csv(path.join(table_path, table))
df.to_sql(table.split('.')[0], engine, if_exists="append", index=False)
client = TestClient(app)
yield client
My DB setup in the FastAPI app:
import os
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
dirname = os.path.dirname(__file__)
if "pytest" in modules:
mock_db_path = os.path.join(dirname, '../test/mockdb/test.db')
db_url = f"sqlite:///{mock_db_path}"
else:
db_url = os.environ.get("DATABASE_URL", None)
if "sqlite" in db_url:
engine = create_engine(db_url, connect_args={"check_same_thread": False})
else:
engine = create_engine(db_url)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
This works: I can set up tests for app endpoints which query the DB and the data I put in the CSVs is returned, e.g. after adding one row to mockdb/person.csv:
from api.db import SessionLocal
db = SessionLocal()
all = db.query(Person).all()
print(all)
[<tables.Person object at 0x7fc829f81430>]
I am now trying to test code which adds new rows to tables in the database.
This only works if I specify the ID (assume this occurs during the pytest run):
db.add(Person(id=2, name="Alice"))
db.commit()
all = db.query(Person).all()
print(all)
[<tables.Person object at 0x7fc829f81430>, <tables.Person object at 0x7fc829f3bdc0>]
The above result is as I'd expect the program to behave. However, if I don't specify the ID, then the result is None:
db.add(Person(name="Alice"))
db.commit()
all = db.query(Person).all()
print(all)
[<tables.Person object at 0x7fc829f81430>, None]
This result is not how I expect the program to behave.
The code that I want to test does not specify IDs, it uses autoincrement as is good practice. Thus, I am unable to test this code. It simply creates these Nones.
At first, I though the culprit was not creating tables with Base.metadata.create_all(). However, I have tried placing this both in my client fixture, and following my DB setup (i.e. the first 2 code blocks above), but the result is the same: Nones.
Stepping through with the debugger, when the Person row is added, the following error appears:
sqlalchemy.orm.exc.ObjectDeletedError: Instance '<Person at 0x7fc829f3bdc0>' has been deleted, or its row is otherwise not present.
Why is the resulting row None and how do I solve this error?
The cause of the error was that I had a column type in my DB that was not compatible with SQLite, namely PostgresSQL's ARRAY type. Unfortunately there was no error message hinting at this. The simplest solution is to remove or change the type of this column.
It is also possible to retain the column and the SQLite fixture by changing client() as follows:
from mytableschema import MyOffendingTable
#pytest.fixture(scope="session")
def client():
table_meta = SBEvent.metadata.tables[MyOffendingTable.__tablename__]
table_meta._columns.remove(table_meta._columns["my_offending_column"])
Base.metadata.create_all(bind=engine)
db_path = db_url.split("///")[-1]
if path.exists(db_path):
remove(db_path)
file_path = path.dirname(path.realpath(__file__))
table_path = path.join(file_path, "mockdb")
for table in listdir(table_path):
df = read_csv(path.join(table_path, table))
df.to_sql(table.split('.')[0], engine, if_exists="append", index=False)
client = TestClient(app)
yield client
It is now possible to proceed as normal if you remove my_offending_column from the MyOffendingTable CSV. No more Nones!
Sadly querying the offending table during the test run will still run into issues as the SELECT statement will look for the nonexistent my_offending_column. For those needing to query said table, I recommend using dialect-specific compilation rules.

Django data leak between 2 separated tests

In my whole tests base, I experience a weird behaviour with two tests. They are completely isolated. However, I can find data from the first test in the second one. Here are the tests:
file1 (services.tests)
class ServiceTestCase(TestCase):
#patch('categories.models.ArticlesByCategory.objects.has_dish_type')
def test_build_dishtype_conflicts(self, mock_has_dish_type):
# WARN: create interference in tests
restaurant = RestaurantFactory()
dt_1 = DishTypeFactory(restaurant=restaurant)
cat_1 = CategoryFactory(restaurant=restaurant)
art_1 = ArticleFactory(name='fooA1', restaurant=restaurant)
art_2 = ArticleFactory(name='fooA2', restaurant=restaurant)
abc_1 = ArticlesByCategory.objects.create(category=cat_1, article=art_1, is_permanent=True,
dish_type=dt_1)
abc_2 = ArticlesByCategory.objects.create(category=cat_1, article=art_2, is_permanent=True,
dish_type=dt_1)
mock_has_dish_type.return_value = [abc_1, abc_2]
abcs_to_check = ArticlesByCategory.objects.filter(pk__in=[abc_1.pk, abc_2.pk])
conflicts = ServiceFactory()._build_dishtype_conflicts(abcs_to_check)
self.assertDictEqual(conflicts, {dt_1.pk: 2})
file2 (products.tests)
class ArticleQuerySetTestCase(TestCase):
def test_queryset_usable_for_category(self):
restaurant = RestaurantFactory()
category_1 = CategoryFactory(name='fooB1', restaurant=restaurant)
category_2 = CategoryFactory(name='fooB2', restaurant=restaurant)
article_1 = ArticleFactory(restaurant=restaurant)
article_2 = ArticleFactory(restaurant=restaurant)
ArticlesByCategory.objects.create(article=article_1, category=category_1, is_permanent=True)
queryset_1 = Article.objects.usable_for_category(category_1)
# This line is used for debug
for art in Article.objects.all():
print(art.name)
When running test_build_dishtype_conflicts THEN test_queryset_usable_for_category in the same command, here are the results of the print in the second test:
fooA1
fooA2
fooB1
fooB2
I suspect I did something wrong but can't find what.
Ok found the problem from Django documentation.
If your tests rely on database access such as creating or querying models, be sure to create your test classes as subclasses of django.test.TestCase rather than unittest.TestCase.
Using unittest.TestCase avoids the cost of running each test in a transaction and flushing the database, but if your tests interact with the database their behavior will vary based on the order that the test runner executes them. This can lead to unit tests that pass when run in isolation but fail when run in a suite.

Django TestCase: recreate database in self.subTest(...)

I need to test a function with different parameters, and the most proper way for this seems to be using the with self.subTest(...) context manager.
However, the function writes something to the db, and it ends up in an inconsistent state. I can delete the things I write, but it would be cleaner if I could recreate the whole db completely. Is there a way to do that?
Not sure how to recreate the database in self.subTest() but I have another technique I am currently using which might be of interest to you. You can use fixtures to create a "snapshot" of your database which will basically be copied in a second database used only for testing purposes. I currently use this method to test code on a big project I'm working on at work.
I'll post some example code to give you an idea of what this will look like in practice, but you might have to do some extra research to tailor the code to your needs (I've added links to guide you).
The process is rather straighforward. You would be creating a copy of your database with only the data needed by using fixtures, which will be stored in a .yaml file and accessed only by your test unit.
Here is what the process would look like:
List item you want to copy to your test database to populate it using fixtures. This will only create a db with the needed data instead of stupidly copying the entire db. It will be stored in a .yaml file.
generate.py
django.setup()
stdout = sys.stdout
conf = [
{
'file': 'myfile.yaml',
'models': [
dict(model='your.model', pks='your, primary, keys'),
dict(model='your.model', pks='your, primary, keys')
]
}
]
for fixture in conf:
print('Processing: %s' % fixture['file'])
with open(fixture['file'], 'w') as f:
sys.stdout = FixtureAnonymiser(f)
for model in fixture['models']:
call_command('dumpdata', model.pop('model'), format='yaml',indent=4, **model)
sys.stdout.flush()
sys.stdout = stdout
In your test unit, import your generated .yaml file as a fixture and your test will automatically use this the data from the fixture to carry out the tests, keeping your main database untouched.
test_class.py
from django.test import TestCase
class classTest(TestCase):
fixtures = ('myfile.yaml',)
def setUp(self):
"""setup tests cases"""
# create the object you want to test here, which will use data from the fixtures
def test_function(self):
self.assertEqual(True,True)
# write your test here
You can read up more here:
Django
YAML
If you have any questions because things are unclear just ask, I'd be happy to help you out.
Maybe my solution will help someone
I used transactions to roll back to the database state that I had at the start of the test.
I use Eric Cousineau's decorator function to parametrizing tests
More about database transactions at django documentation page
import functools
from django.db import transaction
from django.test import TransactionTestCase
from django.contrib.auth import get_user_model
User = get_user_model()
def sub_test(param_list):
"""Decorates a test case to run it as a set of subtests."""
def decorator(f):
#functools.wraps(f)
def wrapped(self):
for param in param_list:
with self.subTest(**param):
f(self, **param)
return wrapped
return decorator
class MyTestCase(TransactionTestCase):
#sub_test([
dict(email="new#user.com", password='12345678'),
dict(email="new#user.com", password='password'),
])
def test_passwords(self, email, password):
# open a transaction
with transaction.atomic():
# Creates a new savepoint. Returns the savepoint ID (sid).
sid = transaction.savepoint()
# create user and check, if there only one with this email in DB
user = User.objects.create(email=email, password=password)
self.assertEqual(User.objects.filter(email=user.email).count(), 1)
# Rolls back the transaction to savepoint sid.
transaction.savepoint_rollback(sid)

django database inserts not getting picked up

We have a little bit of a complicated setup:
In our normal code, we connect manually to a mysql db. We're doing this because I guess the connections django normally uses are not threadsafe? So we let django make the connection, extract the information from it, and then use a mysqldb connection to do the actual querying.
Our code is largely an update process, so we have autocommit turned off to save time.
For ease of creating test data, I created django models that represent the tables, and use them to create rows to test on. So I have functions like:
def make_thing(**overrides):
fields = deepcopy(DEFAULT_THING)
fields.update(overrides)
s = Thing(**fields)
s.save()
transaction.commit(using='ourdb')
reset_queries()
return s
However, it doesn't seem to actually be committing! After I make an object, I later have code that executes raw sql against the mysqldb connection:
def get_information(self, value):
print self.api.rawSql("select count(*) from thing")[0][0]
query = 'select info from thing where column = %s' % value
return self.api.rawSql(query)[0][0]
This print statement prints 0! Why?
Also, if I turn autocommit off, I get
TransactionManagementError: This is forbidden when an 'atomic' block is active.
when we try to alter the autocommit level later.
EDIT: I also just tried https://groups.google.com/forum/#!topic/django-users/4lzsQAWYwG0, which did not help.
EDIT2: I checked from a shell against the database--the commit is working, it's just not getting picked up. I've tried setting the transaction isolation level but it isn't helping. I should add that a function further up from get_information uses this decorator:
def single_transaction(fn):
from django.db import transaction
from django.db import connection
def wrapper(*args, **kwargs):
prior_autocommit = transaction.get_autocommit()
transaction.set_autocommit(False)
connection.cursor().execute('set transaction isolation level read committed')
connection.cursor().execute("SELECT ##session.tx_isolation")
try:
result = fn(*args, **kwargs)
transaction.commit()
return result
finally:
transaction.set_autocommit(prior_autocommit)
django.db.reset_queries()
gc.collect()
wrapper.__name__ = fn.__name__
return wrapper

How can I copy an in-memory SQLite database to another in-memory SQLite database in Python?

I'm writing a test suite for Django that runs tests in a tree-like fashion. For example, Testcase A might have 2 outcomes, and Testcase B might have 1, and Testcase C might have 3. The tree looks like this
X
/
A-B-C-X
\ \
B X
\ X
\ /
C-X
\
X
For each path in the tree above, the database contents may be different. So at each fork, I'm thinking of creating an in-memory copy of the current state of the database, and then feeding that parameter into the next test.
Anyone have an idea about how to essentially copy the in-memory database to another one, and then get a reference to pass that database around?
Thanks!
Alright, after a fun adventure I figured this one out.
from django.db import connections
import sqlite3
# Create a Django database connection for our test database
connections.databases['test'] = {'NAME': ":memory:", 'ENGINE': "django.db.backends.sqlite3"}
# We assume that the database under the source_wrapper hasn't been created
source_wrapper = connections['default'] # put alias of source db here
target_wrapper = connections['test']
# Create the tables for the source database
source_wrapper.creation.create_test_db()
# Dump the database into a single text query
query = "".join(line for line in source_wrapper.connection.iterdump())
# Generate an in-memory sqlite connection
target_wrapper.connection = sqlite3.connect(":memory:")
target_wrapper.connection.executescript(query)
And now the database called test will be a carbon copy of the default database. Use target_wrapper.connection as a reference to the newly created database.
Here is a function that copies databases. Both the source and destination can be in-memory or on-disk (the default destination is a copy in-memory):
import sqlite3
def copy_database(source_connection, dest_dbname=':memory:'):
'''Return a connection to a new copy of an existing database.
Raises an sqlite3.OperationalError if the destination already exists.
'''
script = ''.join(source_connection.iterdump())
dest_conn = sqlite3.connect(dest_dbname)
dest_conn.executescript(script)
return dest_conn
And here is an example of how it applies to your use case:
from contextlib import closing
with closing(sqlite3.connect('root_physical.db')) as on_disk_start:
in_mem_start = copy_database(on_disk_start)
a1 = testcase_a_outcome1(copy_database(in_mem_start))
a2 = testcase_a_outcome1(copy_database(in_mem_start))
a1b = test_case_b(a1)
a2b = test_case_b(a2)
a1bc1 = test_case_c_outcome1(copy_database(a1b))
a1bc2 = test_case_c_outcome2(copy_database(a1b))
a1bc3 = test_case_c_outcome3(copy_database(a1b))
a2bc1 = test_case_c_outcome1(copy_database(a2b))
a2bc2 = test_case_c_outcome2(copy_database(a2b))
a2bc3 = test_case_c_outcome3(copy_database(a2b))

Categories