Validating SQLAlchemy Fields - python

I have a dictionary that gets created from a programatic process that looks like
{'field1: 3, 'field2: 'TEST'}
I feed this dictionary into the model as its fields (for example: Model(**dict))
I want to run a series of unit tests that determine whether the fields are of valid data type.
How do I validate that these data types are valid for my database without having to do an insertion and rollback as this would introduce flakiness into my tests as I would interacting with an actual database correct? (MySQL).

I do not have much experience with sqlalchemy but if you use data-types in Columns of your models, won't that work?
This link might help you :

Here's a rudimentary way to do what you asked
class Sample_Table(Base):
__tablename__ = 'Sample_Table'
__table_args__ = {'sqlite_autoincrement': True}
id = Column(Integer, primary_key=True, nullable=False)
col1 = Column(Integer)
col2 = Column(Integer)
def __init__(self, **kwargs):
for k,v in kwargs.items():
col_type = str(self.__table__.c[k].type)
if str(type(v).__name__) in col_type.lower():
setattr(self, k, v)
raise Exception("BAD COLUMN TYPE FOR COL " + k)
except ValueError as e:
print e.message
If you try to use the above to insert a record with a column type that is different than what you specified, it will throw an error, i.e. it will not perform an insertion and rollback.
To prove that this works, try the following full-working code:
from sqlalchemy import Column, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Sample_Table(Base):
__tablename__ = 'Sample_Table'
__table_args__ = {'sqlite_autoincrement': True}
id = Column(Integer, primary_key=True, nullable=False)
col1 = Column(Integer)
col2 = Column(Integer)
def __init__(self, **kwargs):
for k,v in kwargs.items():
col_type = str(self.__table__.c[k].type)
if str(type(v).__name__) in col_type.lower():
setattr(self, k, v)
raise Exception("BAD COLUMN TYPE FOR COL " + k)
except ValueError as e:
print e.message
engine = create_engine('sqlite:///')
session = sessionmaker()
s = session()
data = {"col1" : 1, "col2" : 2}
record = Sample_Table(**data)
s.add(record) #works
data = {"col1" : 1, "col2" : "2"}
record = Sample_Table(**data)
s.add(record) #doesn't work!
(Even though I used SQLite, it will work for a MySQL database alike.)


SQLAlchemy + mysql / mariadb: bulk upsert with composite keys

Using SQLAlchemy and a MariaDB backend, I need to bulk upsert data. Using this answer I was able to make it work for model with a single primary key. However, I can't make it work with composite keys.
The key part of the code is this one:
# for single pk
primary_key = [ for key in inspect(model).primary_key][0]
# get all entries to be updated
for each in DBSession.query(model).filter(getattr(model, primary_key).in_(entries.keys())).all():
entry = entries.pop(str(getattr(each, primary_key)))
I tried to change it to make it work with composite keys:
primary_keys = tuple([ for key in inspect(model).primary_key])
# get all entries to be updated
for each in DBSession.query(model).filter(and_(*[getattr(model, col).in_(entries.keys()) for col in primary_keys])).all():
print("This is never printed :(")
I guess this DBSession.query(model).filter(and_(*[getattr(model, col).in_(entries.keys()) for col in primary_keys])).all() doesn't work as intended.
For reference, here is a fully working snippet:
from sqlalchemy import Column, create_engine, and_, or_
from sqlalchemy.types import String
from sqlalchemy.inspection import inspect
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy import inspect, tuple_
DBSession = scoped_session(sessionmaker())
Base = declarative_base()
class Accounts(Base):
__tablename__ = 'accounts'
account = Column(String(50), primary_key=True)
comment = Column(String(50))
class Users(Base):
__tablename__ = 'users'
user = Column(String(50), primary_key=True)
account = Column(String(50), primary_key=True)
comment = Column(String(50))
accounts_data = {"account1": {"account": "account1", "comment": "test"}, "account2": {"account": "account2", "comment": None}}
users_data = {("user1", "account1"): {"user": "user1", "account": "account1", "comment": ""}, ("user1", "account2"): {"user": "user1", "account": "account2", "comment": ""}}
def upsert_data_single_pk(entries, model):
primary_key = [ for key in inspect(model).primary_key][0]
entries_to_update = []
entries_to_insert = []
# get all entries to be updated
for each in DBSession.query(model).filter(getattr(model, primary_key).in_(entries.keys())).all():
entry = entries.pop(str(getattr(each, primary_key)))
# get all entries to be inserted
for entry in entries.values():
DBSession.bulk_insert_mappings(model, entries_to_insert)
DBSession.bulk_update_mappings(model, entries_to_update)
def upsert_data_multiple_pk(entries, model):
primary_keys = tuple([ for key in inspect(model).primary_key])
entries_to_update = []
entries_to_insert = []
# get all entries to be updated
for each in DBSession.query(model).filter(and_(*[getattr(model, col).in_(entries.keys()) for col in primary_keys])).all():
# Print the composite primary key value by concatenating the values of the individual columns
print('-'.join([str(getattr(each, col)) for col in primary_keys]))
# get all entries to be inserted
for entry in entries.values():
DBSession.bulk_insert_mappings(model, entries_to_insert)
DBSession.bulk_update_mappings(model, entries_to_update)
db_connection_uri = "mysql+pymysql://XXXX:XXXX#XXXX:XXXX/XXXX?charset=utf8mb4"
engine = create_engine(db_connection_uri, echo=False)
DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
#Base.metadata.drop_all(engine, checkfirst=True)
#upsert_data_single_pk(accounts_data, Accounts)
upsert_data_multiple_pk(users_data, Users)
I wrote a different function to do what I needed:
def upsert(self, model: Type[Base], data: List[Dict[str, Any]]) -> None:
"""Upsert a record into the database.
If the record already exists, it will be updated. If it does not exist, it will be inserted.
model: The SQLAlchemy model representing the table.
data: The data to be inserted or updated, as a list of dictionaries.
if not data:"No data to insert")
return None"{len(data)} rows to insert/update to {model.__table__}")
insert_stmt = insert(model.__table__).values(data)
primary_keys = ModelTools.get_primary_keys(model)
to_update = {
k: getattr(insert_stmt.inserted, k)
for k in data[0].keys()
if k not in primary_keys
on_conflict_stmt = insert_stmt.on_duplicate_key_update(**to_update)
It is probably not the best time efficient, but it works as intended so for now I'm keeping it.

How to test if a class object was created using Pytest

I wrote a habit tracker app and used SQLAlchemy to store the data in an SQLite3 database. Now I'm writing the unit tests using Pytest for all the functions I wrote. Besides functions returning values, there are functions that create entries in the database by creating objects. Here's my object-relational mapper setup and the two main classes:
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Date
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
# Setting up SQLAlchemy to connect to the local SQLite3 database
Base = declarative_base()
engine = create_engine('sqlite:///:main:', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
class Habit(Base):
__tablename__ = 'habit'
habit_id = Column('habit_id', Integer, primary_key=True)
name = Column('name', String, unique=True)
periodicity = Column('periodicity', String)
start_date = Column('start_date', Date)
class HabitEvent(Base):
__tablename__ = 'habit_event'
event_id = Column('event_id', Integer, primary_key=True)
date = Column('date', Date)
habit_id = Column('fk_habit_id', Integer, ForeignKey(Habit.habit_id))
One of the creating functions is the following:
def add_habit(name, periodicity):
if str(periodicity) not in ['d', 'w']:
print('Wrong periodicity. \nUse d for daily or w for weekly.')
h = Habit() = str(name)
if str(periodicity) == 'd':
h.periodicity = 'Daily'
if str(periodicity) == 'w':
h.periodicity = 'Weekly'
h.start_date =
print('Habit added.')
Here's my question: Since this functions doesn't return a value which can be matched with an expected result, I don't know how to test if the object was created. The same problem occurs to me, when I want to check if all objects were deleted using the following function:
def delete_habit(habitID):
id_list = []
id_query = session.query(Habit).all()
for i in id_query:
if habitID in id_list:
delete_id = int(habitID)
HabitEvent.habit_id == delete_id).delete()
session.query(Habit).filter(Habit.habit_id == delete_id).delete()
print('Habit deleted.')
print('Non existing Habit ID.')
If I understand correctly, you can utilize the get_habits function as part of the test for add_habit.
def test_add_habit():
name = 'test_add_habit'
periodicity = 'd'
add_habit(name, periodicity)
# not sure of the input or output from get_habits, but possibly:
results = get_habits(name)
assert name in results['name']

SQLAlchemy cascading polymorphic column updates

I have a parent Employee table and a child Engineer table. From a client perspective I only want to interact with the Employee model. This is easily implemented for READ and DELETE, but issues arise when trying to UPDATE or INSERT.
The sqlalchemy docs state:
Currently, only one discriminator column may be set, typically on the base-most class in the hierarchy. “Cascading” polymorphic columns are not yet supported.
So it would seem that by default this is not going to work. I'm looking for ideas on how to make this work.
Here's a complete test setup using postgres with psycopg2. The SQL might work with other SQL databases, but I have test any others.
SQL script to create test database (testdb) and tables (employee, engineer):
\c testdb;
CREATE TABLE employee(
name TEXT,
type TEXT
CREATE TABLE engineer(
engineer_name TEXT,
employee_id INT REFERENCES employee(id)
Python test script:
As-is the INSERT test will fail, but the DELETE will pass. If you change the code (comment/uncomment) to use the child Engineer model it will pass both cases.
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import (
Base = declarative_base()
class Employee(Base):
__tablename__ = 'employee'
id = Column(Integer, primary_key=True)
name = Column(Text(), default='John')
type = Column(Text, default='engineer')
__mapper_args__ = {
'with_polymorphic': '*',
class Engineer(Employee):
__tablename__ = 'engineer'
id = Column(Integer, ForeignKey('',
ondelete='CASCADE', onupdate='CASCADE'), primary_key=True)
engineer_name = Column(Text(), default='Eugine')
__mapper_args__ = {
def count(session, Model):
query = session.query(Model)
count = len(query.all())
return count
url = 'postgresql+psycopg2://postgres#localhost/testdb'
engine = sa.create_engine(url)
Base.metadata.bind = engine
Session = orm.sessionmaker(engine)
session = Session()
if __name__ == '__main__':
print '#'*30, 'INSERT', '#'*30
id += id
# I only want to interact with the Employee table
e = Employee(id=id)
# Use the child model to see the INSERT test pass
# e = Engineer(id=id)
print 'pass' if count(session, Employee) == count(session, Engineer) else 'fail'
print '#'*30, 'DELETE', '#'*30
# e = session.query(Employee).first()
print 'pass' if count(session, Employee) == count(session, Engineer) else 'fail'
Any ideas on how to accomplish this through the sqlalchemy model definitions without having to use explicit controller code?
Well I'm not getting any love for this one. Anybody have ideas on how to accomplish this with controller code?
Using controller logic this can be accomplished by getting the polymorphic subclass using the polymorphic identity.
I'm adding two functions to encapsulate some basic logic.
def get_polymorphic_class(klass, data):
column = klass.__mapper__.polymorphic_on
if column is None:
# The column is not polymorphic so the Class can be returned as-is
return klass
identity = data.get(
if not identity:
raise ValueError('Missing value for "' + + '"', data)
mapper = klass.__mapper__.polymorphic_map.get(identity)
if mapper:
return mapper.class_
raise ValueError('Missing polymorphic_identity definition for "' + identity + '"')
return klass
def insert(klass, data):
klass = get_polymorphic_class(klass, data)
e = klass(**data)
return e
Now I update main to use the insert function and everything works as expected:
if __name__ == '__main__':
print '#'*30, 'INSERT', '#'*30
id += id
e = insert(Employee, {'id': id, 'type': 'engineer'})
print 'pass' if count(session, Employee) == count(session, Engineer) else 'fail'
print '#'*30, 'DELETE', '#'*30
print 'pass' if count(session, Employee) == count(session, Engineer) else 'fail'
There's some extra code in my encapsulation for reusability, but the important part is doing Employee.__mapper__.polymorphic_map['engineer'].class_ which returns the Engineer class so we can do a proper cascading INSERT.

Dynamic Datasets and SQLAlchemy

I am refactoring some old SQLite3 SQL statements in Python into SQLAlchemy. In our framework, we have the following SQL statements that takes in a dict with certain known keys and potentially any number of unexpected keys and values (depending what information was provided).
import sqlite3
import sys
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
def Create_DB(db):
# Delete the database
from os import remove
# Recreate it and format it as needed
with sqlite3.connect(db) as conn:
conn.row_factory = dict_factory
conn.text_factory = str
cursor = conn.cursor()
cursor.execute("CREATE TABLE [Listings] ([ID] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE, [timestamp] REAL NOT NULL DEFAULT(( datetime ( 'now' , 'localtime' ) )), [make] VARCHAR, [model] VARCHAR, [year] INTEGER);")
def Add_Record(db, data):
with sqlite3.connect(db) as conn:
conn.row_factory = dict_factory
conn.text_factory = str
cursor = conn.cursor()
#get column names already in table
cursor.execute("SELECT * FROM 'Listings'")
col_names = list(map(lambda x: x[0], cursor.description))
#check if column doesn't exist in table, then add it
for i in data.keys():
if i not in col_names:
cursor.execute("ALTER TABLE 'Listings' ADD COLUMN '{col}' {type}".format(col=i, type='INT' if type(data[i]) is int else 'VARCHAR'))
#Insert record into table
cursor.execute("INSERT INTO Listings({cols}) VALUES({vals});".format(cols = str(data.keys()).strip('[]'),
vals=str([data[i] for i in data]).strip('[]')
#Database filename
db = 'test.db'
data = {'make': 'Chevy',
'model' : 'Corvette',
'year' : 1964,
'price' : 50000,
'color' : 'blue',
'doors' : 2}
Add_Record(db, data)
data = {'make': 'Chevy',
'model' : 'Camaro',
'year' : 1967,
'price' : 62500,
'condition' : 'excellent'}
Add_Record(db, data)
This level of dynamicism is necessary because there's no way we can know what additional information will be provided, but, regardless, it's important that we store all information provided to us. This has never been a problem because in our framework, as we've never expected an unwieldy number of columns in our tables.
While the above code works, it's obvious that it's not a clean implementation and thus why I'm trying to refactor it into SQLAlchemy's cleaner, more robust ORM paradigm. I started going through SQLAlchemy's official tutorials and various examples and have arrived at the following code:
from sqlalchemy import Column, String, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Listing(Base):
__tablename__ = 'Listings'
id = Column(Integer, primary_key=True)
make = Column(String)
model = Column(String)
year = Column(Integer)
engine = create_engine('sqlite:///')
session = sessionmaker()
data = {'make':'Chevy',
'model' : 'Corvette',
'year' : 1964}
record = Listing(**data)
s = session()
and it works beautifully with that data dict. Now, when I add a new keyword, such as
data = {'make':'Chevy',
'model' : 'Corvette',
'year' : 1964,
'price' : 50000}
I get a TypeError: 'price' is an invalid keyword argument for Listing error. To try and solve the issue, I modified the class to be dynamic, too:
class Listing(Base):
__tablename__ = 'Listings'
id = Column(Integer, primary_key=True)
make = Column(String)
model = Column(String)
year = Column(Integer)
def __checker__(self, data):
for i in data.keys():
if i not in [a for a in dir(self) if not a.startswith('__')]:
if type(i) is int:
setattr(self, i, Column(Integer))
setattr(self, i, Column(String))
self[i] = data[i]
But I quickly realized this would not work at all for several reasons, e.g. the class was already initialized, the data dict cannot be fed into the class without reinitializing it, it's a hack more than anything, et al.). The more I think about it, the less obvious the solution using SQLAlchemy seems to me. So, my main question is, how do I implement this level of dynamicism using SQLAlchemy?
I've researched a bit to see if anyone has a similar issue. The closest I've found was Dynamic Class Creation in SQLAlchemy but it only talks about the constant attributes ("tablename" et al.). I believe the unanswered may be asking the same question. While Python is not my forte, I consider myself a highly skilled programmer (C++ and JavaScript are my strongest languages) in the context scientific/engineering applications, so I may not hitting the correct Python-specific keywords in my searches.
I welcome any and all help.
class Listing(Base):
__tablename__ = 'Listings'
id = Column(Integer, primary_key=True)
make = Column(String)
model = Column(String)
year = Column(Integer)
def __init__(self,**kwargs):
for k,v in kwargs.items():
if hasattr(self,k):
engine.execute("ALTER TABLE %s AD COLUMN %s"%(self.__tablename__,k)
setattr(self.__class__,Column(k, String))
might work ... maybe ... I am not entirely sure I did not test it
a better solution would be to use a relational table
class Attribs(Base):
listing_id = Column(Integer,ForeignKey("Listing"))
name = Column(String)
val = Column(String)
class Listing(Base):
id = Column(Integer,primary_key = True)
attributes = relationship("Attribs",backref="listing")
def __init__(self,**kwargs):
for k,v in kwargs.items():
def __str__(self):
return "\n".join(["A LISTING",] + ["%s:%s"%(,a.val) for a in self.attribs])
another solution would be to store json
class Listing(Base):
__tablename__ = 'Listings'
id = Column(Integer, primary_key=True)
data = Column(String)
def __init__(self,**kwargs): = json.dumps(kwargs)
self.data_dict = kwargs
the best solution would be to use a no-sql key,value store (maybe even just a simple json file? or perhaps shelve? or even pickle I guess)

Delete children after parent is deleted in SQLAlchemy

My problem is the following:
I have the two models Entry and Tag linked by a many-to-many relationship in SQLAlchemy. Now I want to delete every Tag that doesn't have any corresponding Entry after an Entry is deleted.
Example to illustrate what I want:
Entry 1 with tags python, java
Entry 2 with tags python, c++
With these two entries the database contains the tags python, java, and c++. If I now delete Entry 2 I want SQLAlchemy to automatically delete the c++ tag from the database. Is it possible to define this behavior in the Entry model itself or is there an even more elegant way?
this question was asked awhile back here: Setting delete-orphan on SQLAlchemy relationship causes AssertionError: This AttributeImpl is not configured to track parents
This is the "many-to-many orphan" problem. jadkik94 is close in that you should use events to catch this, but I try to recommend against using the Session inside of mapper events, though it works in this case.
Below, I take the answer verbatim from the other SO question, and replace the word "Role" with "Entry":
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import event
from sqlalchemy.orm import attributes
Base= declarative_base()
tagging = Table('tagging',Base.metadata,
Column('tag_id', Integer, ForeignKey('', ondelete='cascade'), primary_key=True),
Column('entry_id', Integer, ForeignKey('', ondelete='cascade'), primary_key=True)
class Tag(Base):
__tablename__ = 'tag'
id = Column(Integer, primary_key=True)
name = Column(String(100), unique=True, nullable=False)
def __init__(self, name=None): = name
class Entry(Base):
__tablename__ = 'entry'
id = Column(Integer, primary_key=True)
tag_names = association_proxy('tags', 'name')
tags = relationship('Tag',
#event.listens_for(Session, 'after_flush')
def delete_tag_orphans(session, ctx):
# optional: look through Session state to see if we want
# to emit a DELETE for orphan Tags
flag = False
for instance in session.dirty:
if isinstance(instance, Entry) and \
attributes.get_history(instance, 'tags').deleted:
flag = True
for instance in session.deleted:
if isinstance(instance, Entry):
flag = True
# emit a DELETE for all orphan Tags. This is safe to emit
# regardless of "flag", if a less verbose approach is
# desired.
if flag:
e = create_engine("sqlite://", echo=True)
s = Session(e)
r1 = Entry()
r2 = Entry()
r3 = Entry()
t1, t2, t3, t4 = Tag("t1"), Tag("t2"), Tag("t3"), Tag("t4")
r1.tags.extend([t1, t2])
r2.tags.extend([t2, t3])
s.add_all([r1, r2, r3])
assert s.query(Tag).count() == 4
assert s.query(Tag).count() == 4
assert s.query(Tag).count() == 3
assert s.query(Tag).count() == 2
two almost identical SO questions qualifies this as something to have on hand so I've added it to the wiki at
I will let code speak for me:
from sqlalchemy import create_engine, exc, event
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import func, Table, Column, Integer, String, Float, Boolean, MetaData, ForeignKey
from sqlalchemy.orm import relationship, backref
# Connection
engine = create_engine('sqlite:///', echo=True)
Base = declarative_base(bind=engine)
Session = sessionmaker(bind=engine)
# Models
entry_tag_link = Table('entry_tag', Base.metadata,
Column('entry_id', Integer, ForeignKey('')),
Column('tag_id', Integer, ForeignKey(''))
class Entry(Base):
__tablename__ = 'entries'
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False, default='')
tags = relationship("Tag", secondary=entry_tag_link, backref="entries")
def __repr__(self):
return '<Entry %s>' % (,)
class Tag(Base):
__tablename__ = 'tags'
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False)
def __repr__(self):
return '<Tag %s>' % (,)
# Delete listener
def delete_listener(mapper, connection, target):
print "---- DELETING %s ----" % (target,)
print '-' * 20
for t in target.tags:
if len(t.entries) == 0:
print ' ' * 5, t, 'is to be deleted'
print '-' * 20
event.listen(Entry, 'before_delete', delete_listener)
# Utility functions
def dump(session):
entries = session.query(Entry).all()
tags = session.query(Tag).all()
print '*' * 20
print 'Entries', entries
print 'Tags', tags
print '*' * 20
session = Session()
t1, t2, t3 = Tag(name='python'), Tag(name='java'), Tag(name='c++')
e1, e2 = Entry(name='Entry 1', tags=[t1, t2]), Entry(name='Entry 2', tags=[t1, t3])
raw_input("---- Press return to delete the second entry and see the result ----")
This code above uses the after_delete event of the SQLAlchemy ORM events. This line does the magic:
event.listen(Entry, 'before_delete', delete_listener)
This says to listen to all deletes to an Entry item, and call our listener which will do what we want. However, the docs do not recommend changing the session inside the events (see the warning in the link I added). But as far as I can see, it works, so it's up to you to see if this works for you.
