Can't override method in Sqlalchemy inherited class

Can't override method in Sqlalchemy inherited class - python

I'm struggling with something which I suspect is really simple, because it's an obvious use-case and I can't see anything relevant in the sqlalchemy docs.
I have set up a SourceArchive class, which inherits from SourceFile. They use a joined table approach, as per this page: http://docs.sqlalchemy.org/en/latest/orm/inheritance.html.
import os
import json
from sqlalchemy import Column, ForeignKey, Integer, String, orm
import py7zlib
import filesys_ops
from db_base import Base
# Base = declarative_base()
class SourceDir(Base):
__tablename__ = 'source_dirs'
id = Column(Integer, primary_key=True)
dir_path = Column(String)
# files = relationship(SourceFile)
def __init__(self, dir_path):
self.dir_path = dir_path
def get_files(self):
return filesys_ops.read_files(self.dir_path)
class SourceFile(Base):
__tablename__ = 'source_file'
# Create a 'rom_names' variable, verify on instantiation and then add to obj.
# Then have consistent 'get_names' method across subclasses
id = Column(Integer, primary_key=True)
full_path = Column(String)
name = Column(String)
# type = Column(String)
parent_dir_id = Column(Integer, ForeignKey('source_dirs.id'))
type = Column(String(50))
def __init__(self, file_path, parent_dir):
self.full_path = file_path
# path, self.name = os.path.split(self.full_path)
# self.type = magic.from_file(self.full_path)
self.parent_dir_id = parent_dir.id
def ext(self):
base_name, extension = os.path.splitext(self.name)
return extension
# #orm.reconstructor
# def init_on_load(self):
# pass
def get_rom_names(self):
print('super')
return [self.name]
__mapper_args__ = {
'polymorphic_identity': 'source_file',
'polymorphic_on': type
}
class SourceArchive(SourceFile):
__tablename__ = 'source_archive'
id = Column(Integer, ForeignKey('source_file.id'), primary_key=True)
members_json = Column(String)
def __init__(self, file_path, parent_dir):
super().__init__(file_path, parent_dir)
file = open(file_path, 'rb')
archive = py7zlib.Archive7z(file)
members = archive.getnames()
self.members_json = json.dumps(members)
# print(self.members_json)
# print(type(self.members_json))
# print(self.members_json)
def get_rom_names(self):
print('or')
# print('**** - ' + self.members)
if type(json.loads(self.members_json)) is list:
return json.loads(self.members_json)
else:
return [json.loads(self.members_json)]
def extractall(self, target_dir):
for name in self.members:
self.extract_member(name, target_dir)
def extract_member(self, member_name, path):
output_path = os.path.join(path, member_name)
output_dir = os.path.dirname(output_path)
filesys_ops.validate_dir(output_dir, create=True)
outfile = open(output_path, 'wb')
outfile.write(self.archive.getmember(member_name).read())
outfile.close()
__mapper_args__ = {
'polymorphic_identity': 'source_archive',
}
Once the database has been populated with a combination of SourceFile and SourceArchive objects I am using the following code to retrieve them:
all_files = with_polymorphic(SourceFile, SourceArchive)
query = session.query(all_files)
source_files = query.all()
This only returns SourceFile objects though, with the result that any call to the 'get_roms' method calls the SourceFile implementation.
In a previous version of the code I didn't use the boilerplate from the linked tutorial and just approached the class inheritance as I would with general Python, in some cases sharing a table, in others not. I could then return all objects as SourceFiles or all as SourceArchives, just depending on which I queried, with the same results (though it was also possible to call SourceArchive's implementation of get_roms on a SourceFile object too).
If anybody can help I'd be very grateful,

Related

Preserve key naming when creating a joinedload sql_query in SQLAlchemy

I am extracting a table row and the corresponding row from all referenced tables via SQLAlchemy.
Given the following object structure:
class DNAExtractionProtocol(Base):
__tablename__ = 'dna_extraction_protocols'
id = Column(Integer, primary_key=True)
code = Column(String, unique=True)
name = Column(String)
sample_mass = Column(Float)
mass_unit_id = Column(String, ForeignKey('measurement_units.id'))
mass_unit = relationship("MeasurementUnit", foreign_keys=[mass_unit_id])
digestion_buffer_id = Column(String, ForeignKey("solutions.id"))
digestion_buffer = relationship("Solution", foreign_keys=[digestion_buffer_id])
digestion_buffer_volume = Column(Float)
digestion_id = Column(Integer, ForeignKey("incubations.id"))
digestion = relationship("Incubation", foreign_keys=[digestion_id])
lysis_buffer_id = Column(String, ForeignKey("solutions.id"))
lysis_buffer = relationship("Solution", foreign_keys=[lysis_buffer_id])
lysis_buffer_volume = Column(Float)
lysis_id = Column(Integer, ForeignKey("incubations.id"))
lysis = relationship("Incubation", foreign_keys=[lysis_id])
proteinase_id = Column(String, ForeignKey("solutions.id"))
proteinase = relationship("Solution", foreign_keys=[proteinase_id])
proteinase_volume = Column(Float)
inactivation_id = Column(Integer, ForeignKey("incubations.id"))
inactivation = relationship("Incubation", foreign_keys=[inactivation_id])
cooling_id = Column(Integer, ForeignKey("incubations.id"))
cooling = relationship("Incubation", foreign_keys=[cooling_id])
centrifugation_id = Column(Integer, ForeignKey("incubations.id"))
centrifugation = relationship("Incubation", foreign_keys=[centrifugation_id])
volume_unit_id = Column(String, ForeignKey('measurement_units.id'))
volume_unit = relationship("MeasurementUnit", foreign_keys=[volume_unit_id])
I am using:
sql_query = session.query(DNAExtractionProtocol).options(Load(DNAExtractionProtocol).joinedload("*")).filter(DNAExtractionProtocol.code == code)
for item in sql_query:
pass
mystring = str(sql_query)
mydf = pd.read_sql_query(mystring,engine,params=[code])
print(mydf.columns)
This gives me:
Index([u'dna_extraction_protocols_id', u'dna_extraction_protocols_code',
u'dna_extraction_protocols_name',
u'dna_extraction_protocols_sample_mass',
u'dna_extraction_protocols_mass_unit_id',
u'dna_extraction_protocols_digestion_buffer_id',
u'dna_extraction_protocols_digestion_buffer_volume',
u'dna_extraction_protocols_digestion_id',
u'dna_extraction_protocols_lysis_buffer_id',
u'dna_extraction_protocols_lysis_buffer_volume',
u'dna_extraction_protocols_lysis_id',
u'dna_extraction_protocols_proteinase_id',
u'dna_extraction_protocols_proteinase_volume',
u'dna_extraction_protocols_inactivation_id',
u'dna_extraction_protocols_cooling_id',
u'dna_extraction_protocols_centrifugation_id',
u'dna_extraction_protocols_volume_unit_id', u'measurement_units_1_id',
u'measurement_units_1_code', u'measurement_units_1_long_name',
u'measurement_units_1_siunitx', u'solutions_1_id', u'solutions_1_code',
u'solutions_1_name', u'solutions_1_supplier',
u'solutions_1_supplier_id', u'incubations_1_id', u'incubations_1_speed',
u'incubations_1_duration', u'incubations_1_temperature',
u'incubations_1_movement', u'incubations_1_speed_unit_id',
u'incubations_1_duration_unit_id', u'incubations_1_temperature_unit_id',
u'solutions_2_id', u'solutions_2_code', u'solutions_2_name',
u'solutions_2_supplier', u'solutions_2_supplier_id',
u'incubations_2_id', u'incubations_2_speed', u'incubations_2_duration',
u'incubations_2_temperature', u'incubations_2_movement',
u'incubations_2_speed_unit_id', u'incubations_2_duration_unit_id',
u'incubations_2_temperature_unit_id', u'solutions_3_id',
u'solutions_3_code', u'solutions_3_name', u'solutions_3_supplier',
u'solutions_3_supplier_id', u'incubations_3_id', u'incubations_3_speed',
u'incubations_3_duration', u'incubations_3_temperature',
u'incubations_3_movement', u'incubations_3_speed_unit_id',
u'incubations_3_duration_unit_id', u'incubations_3_temperature_unit_id',
u'incubations_4_id', u'incubations_4_speed', u'incubations_4_duration',
u'incubations_4_temperature', u'incubations_4_movement',
u'incubations_4_speed_unit_id', u'incubations_4_duration_unit_id',
u'incubations_4_temperature_unit_id', u'incubations_5_id',
u'incubations_5_speed', u'incubations_5_duration',
u'incubations_5_temperature', u'incubations_5_movement',
u'incubations_5_speed_unit_id', u'incubations_5_duration_unit_id',
u'incubations_5_temperature_unit_id', u'measurement_units_2_id',
u'measurement_units_2_code', u'measurement_units_2_long_name',
u'measurement_units_2_siunitx', u'dna_extractions_1_id',
u'dna_extractions_1_code', u'dna_extractions_1_protocol_id',
u'dna_extractions_1_source_id'],
dtype='object')
This indeed contains all the columns I want - but the naming does not help me select what I want.
Isit possible to preserve the key names from the original table in this dataframe? e.g. instead of measurement_units_1_code I would like to have mass_unit_code.

This is not what joinedload is supposed to be used for. You want to do an explicit join in this case:
session.query(DNAExtractionProtocol.id.label("id"),
...,
MeasurementUnit.id.label("mass_unit_id"),
...) \
.join(DNAExtractionProtocol.mass_unit) \
.join(DNAExtractionProtocol.digestion_buffer) \
... \
.filter(...)
If you don't want to type out all those names, you can inspect the DNAExtractionProtocol class to find all relationships and dynamically construct the query and labels. An example:
cols = []
joins = []
insp = inspect(DNAExtractionProtocol)
for name, col in insp.columns.items():
cols.append(col.label(name))
for name, rel in insp.relationships.items():
alias = aliased(rel.mapper.class_, name=name)
for col_name, col in inspect(rel.mapper).columns.items():
aliased_col = getattr(alias, col.key)
cols.append(aliased_col.label("{}_{}".format(name, col_name)))
joins.append((alias, rel.class_attribute))
query = session.query(*cols).select_from(DNAExtractionProtocol)
for join in joins:
query = query.join(*join)
EDIT: Depending on your data structure you might need to use outerjoin instead of join on the last line.
You'll probably need to tweak this to your liking. For example, this doesn't take into account potential naming conflicts, e.g. for mass_unit_id, is it DNAExtractionProtocol.mass_unit_id or is it MeasurementUnit.id?
In addition, you'll probably want to execute sql_query.statement instead of str(sql_query). str(sql_query) is for printing purposes, not for execution. I believe you don't need to pass params=[code] if you use sql_query.statement because code will already have been bound to the appropriate parameter in the query.

SqlAlchemy Classes Declaration of two dependent classes

I have a problem in the file where I declare all my classes mappers.
class Application(AbstractId):
.........
key_event_id = ORM.column_property(
SA.select([ApplicationEvent.id],
correlate = True,
from_obj = [Application.__table__.join(ApplicationEvent.__table__)]
).as_scalar().label("tag").where(ApplicationEvent.key_event == 1)
)
SA.select([ApplicationEvent]).filter(
ApplicationEvent.key_event)
class ApplicationEvent(AbstractId):
__tablename__ = 'applications_events'
application_id = SA.Column(SA.Integer, SA.ForeignKey(Application.id), primary_key = True)
application = ORM.relationship(Application, backref = 'events')
event_id = SA.Column(SA.Integer, SA.ForeignKey(Event.id), primary_key = True)
event = ORM.relationship(Event)
This won't work since ApplicationEvent is declared before Application. How can I make this work ? I need key_event_id as a column of Application.
This won't work either:
#declarative.declared_attr
def key_event_id(cls):
return ORM.column_property(
SA.select(['ApplicationEvent.id'],
correlate = True,
from_obj = ['Application.__table__'.join('ApplicationEvent.__table__')]
).as_scalar().where('ApplicationEvent.key_event' == 1).label("key_event_id")
)

You can simply pass the model name as a string to the relationship() call.
argument
a mapped class, or actual Mapper instance, representing the target of
the relationship.
argument may also be passed as a callable function which is evaluated
at mapper initialization time, and may be passed as a Python-evaluable
string when using Declarative.

You can do
application = ORM.relationship("Application", backref = 'events')
and
event = ORM.relationship("Event" , order_by="Event.id")
You can write like this way
application_id = SA.Column("id", SA.ForeignKey("Application.id"), primary_key = True)
application = ORM.relationship("Application", backref = 'events')
event_id = SA.Column("id", SA.ForeignKey("Event.id"), primary_key = True)
event = ORM.relationship("Event")

SQLAlchemy filter for files in a folder-file type hierarchy

I have seen some pure SQL queries that could be solutions to this, but my SQL isn't really on point so a SQLAlchemy solution would be appreciated. I'm also using sqlite.
I have a hierarchy type structure in SQLAlchemy similar to a normal filesystem - nested nodes with leaves at the end. I have a solution for finding files using only Python, but it is quite slow so I'm hoping there's a faster way.
The folders are defined as such:
class folder(Base):
__tablename__ = 'folder'
id = Column(Integer, primary_key=True)
parentid = Column(Integer, ForeignKey('folder.id', ondelete='CASCADE'),index=True)
type = Column(Text(50))
children = relationship('folder',
cascade='all',
backref=backref('parent', remote_side='folder.id'),
)
__mapper_args__ = {
'polymorphic_identity': 'folder',
'polymorphic_on': type
}
def getFiles(self):
""" returns a list of the files in this folder
"""
flist = []
for child in self.children:
flist += child.getFiles()
return flist
And the files:
class file(folder):
__tablename__ = 'file'
id = Column(Integer, ForeignKey('folder.id', ondelete='CASCADE'), primary_key=True)
name = Column(Text(50))
__mapper_args__ = {
'polymorphic_identity': 'file',
'inherit_condition': (id == folder.id),
}
def getFiles(self):
""" return self as the only file
"""
return [self]
Currently I find files by filename like so:
files = some_folder.getFiles()
return [f for f in files if file_name in f.name.lower()]
Is there any better way to query for files in a folder?

I wasn't able to build a query exactly how I wanted, but playing around I found a solution that was much faster. Instead of finding all the files and then searching, I traverse to each folder and query for files in it. It resulted in quite a performance boost.
def search_files(folderid, search):
files = session.query(file).filter(
file.parentid==folderid,
file.name.ilike('%{}%'.format(search))).all()
folders = session.query(folder.ID).filter_by(parentid=folderid).all()
for folder in folders:
files+=search_files(folder.id, search)
return files

SQLAlchemy audit logging; how to handle deletes?

I'm using a modified version of the versioning code example that comes with SQLAlchemy to record a user id and date on changes. However, I also want to modify it so deletes are done by marking a is_deleted type flag instead of running an actual SQL DELETE. My problem is I'm not sure how to capture the delete and replace it with an update.
Here's what I have so far:
''' http://docs.sqlalchemy.org/en/rel_0_8/orm/examples.html?highlight=versioning#versioned-objects '''
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import mapper, class_mapper, attributes, object_mapper, scoping
from sqlalchemy.orm.session import Session
from sqlalchemy.orm.exc import UnmappedClassError, UnmappedColumnError
from sqlalchemy import Table, Column, ForeignKeyConstraint, DateTime, String, Boolean
from sqlalchemy import event
from sqlalchemy.orm.properties import RelationshipProperty
from datetime import datetime
from sqlalchemy.schema import ForeignKey
from sqlalchemy.sql.expression import false
def col_references_table(col, table):
for fk in col.foreign_keys:
if fk.references(table):
return True
return False
def _history_mapper(local_mapper):
cls = local_mapper.class_
# set the "active_history" flag
# on on column-mapped attributes so that the old version
# of the info is always loaded (currently sets it on all attributes)
for prop in local_mapper.iterate_properties:
getattr(local_mapper.class_, prop.key).impl.active_history = True
super_mapper = local_mapper.inherits
super_history_mapper = getattr(cls, '__history_mapper__', None)
polymorphic_on = None
super_fks = []
if not super_mapper or local_mapper.local_table is not super_mapper.local_table:
cols = []
for column in local_mapper.local_table.c:
if column.name.startswith('version_'):
continue
col = column.copy()
col.unique = False
if super_mapper and col_references_table(column, super_mapper.local_table):
super_fks.append((col.key, list(super_history_mapper.local_table.primary_key)[0]))
cols.append(col)
if column is local_mapper.polymorphic_on:
polymorphic_on = col
if super_mapper:
super_fks.append(('version_datetime', super_history_mapper.base_mapper.local_table.c.version_datetime))
super_fks.append(('version_userid', super_history_mapper.base_mapper.local_table.c.version_userid))
super_fks.append(('version_deleted', super_history_mapper.base_mapper.local_table.c.version_deleted))
cols.append(Column('version_datetime', DateTime, default=datetime.now, nullable=False, primary_key=True, info={'colanderalchemy': {'exclude': True}}))
cols.append(Column('version_userid', String(60), ForeignKey("user.login"), nullable=True, info={'colanderalchemy': {'exclude': True}}))
cols.append(Column('version_deleted', Boolean, server_default=false(), nullable=False, info={'colanderalchemy': {'exclude': True}}))
else:
cols.append(Column('version_datetime', DateTime, default=datetime.now, nullable=False, primary_key=True, info={'colanderalchemy': {'exclude': True}}))
cols.append(Column('version_userid', String(60), ForeignKey("user.login"), nullable=True, info={'colanderalchemy': {'exclude': True}}))
cols.append(Column('version_deleted', Boolean, server_default=false(), nullable=False, info={'colanderalchemy': {'exclude': True}}))
if super_fks:
cols.append(ForeignKeyConstraint(*zip(*super_fks)))
table = Table(local_mapper.local_table.name + '_history', local_mapper.local_table.metadata,
*cols
)
else:
# single table inheritance. take any additional columns that may have
# been added and add them to the history table.
for column in local_mapper.local_table.c:
if column.key not in super_history_mapper.local_table.c:
col = column.copy()
col.unique = False
super_history_mapper.local_table.append_column(col)
table = None
if super_history_mapper:
bases = (super_history_mapper.class_,)
else:
bases = local_mapper.base_mapper.class_.__bases__
versioned_cls = type.__new__(type, "%sHistory" % cls.__name__, bases, {})
m = mapper(
versioned_cls,
table,
inherits=super_history_mapper,
polymorphic_on=polymorphic_on,
polymorphic_identity=local_mapper.polymorphic_identity
)
cls.__history_mapper__ = m
if not super_history_mapper:
local_mapper.local_table.append_column(
Column('version_datetime', DateTime, default=datetime.now, nullable=False, primary_key=False, info={'colanderalchemy': {'exclude': True}})
)
local_mapper.add_property("version_datetime", local_mapper.local_table.c.version_datetime)
local_mapper.local_table.append_column(
Column('version_userid', String(60), ForeignKey("user.login"), nullable=True, info={'colanderalchemy': {'exclude': True}})
)
local_mapper.add_property("version_userid", local_mapper.local_table.c.version_userid)
local_mapper.local_table.append_column(
Column('version_deleted', Boolean, server_default=false(), nullable=False, info={'colanderalchemy': {'exclude': True}})
)
local_mapper.add_property("version_deleted", local_mapper.local_table.c.version_deleted)
class Versioned(object):
#declared_attr
def __mapper_cls__(cls):
def map(cls, *arg, **kw):
mp = mapper(cls, *arg, **kw)
_history_mapper(mp)
return mp
return map
def versioned_objects(iter):
for obj in iter:
if hasattr(obj, '__history_mapper__'):
yield obj
def create_version(obj, session, deleted = False):
obj_mapper = object_mapper(obj)
history_mapper = obj.__history_mapper__
history_cls = history_mapper.class_
obj_state = attributes.instance_state(obj)
attr = {}
obj_changed = False
for om, hm in zip(obj_mapper.iterate_to_root(), history_mapper.iterate_to_root()):
if hm.single:
continue
for hist_col in hm.local_table.c:
if hist_col.key.startswith('version_'):
continue
obj_col = om.local_table.c[hist_col.key]
# get the value of the
# attribute based on the MapperProperty related to the
# mapped column. this will allow usage of MapperProperties
# that have a different keyname than that of the mapped column.
try:
prop = obj_mapper.get_property_by_column(obj_col)
except UnmappedColumnError:
# in the case of single table inheritance, there may be
# columns on the mapped table intended for the subclass only.
# the "unmapped" status of the subclass column on the
# base class is a feature of the declarative module as of sqla 0.5.2.
continue
# expired object attributes and also deferred cols might not be in the
# dict. force it to load no matter what by using getattr().
if prop.key not in obj_state.dict:
getattr(obj, prop.key)
a, u, d = attributes.get_history(obj, prop.key)
if d:
attr[hist_col.key] = d[0]
obj_changed = True
elif u:
attr[hist_col.key] = u[0]
else:
# if the attribute had no value.
attr[hist_col.key] = a[0]
obj_changed = True
if not obj_changed:
# not changed, but we have relationships. OK
# check those too
for prop in obj_mapper.iterate_properties:
if isinstance(prop, RelationshipProperty) and \
attributes.get_history(obj, prop.key).has_changes():
obj_changed = True
break
if not obj_changed and not deleted:
return
attr['version_datetime'] = obj.version_datetime
attr['version_userid'] = obj.version_userid
attr['version_deleted'] = obj.version_deleted
hist = history_cls()
for key, value in attr.items():
setattr(hist, key, value)
session.add(hist)
obj.version_datetime = datetime.now()
obj.version_userid = getattr(session, 'userid', None)
obj.version_deleted = deleted
def versioned_session(session):
#event.listens_for(session, 'before_flush')
def before_flush(session, flush_context, instances):
for obj in versioned_objects(session.deleted):
create_version(obj, session, deleted = True)
for obj in versioned_objects(session.dirty):
create_version(obj, session)
def add_userid_to_session(userid, session):
if isinstance(session, scoping.scoped_session):
thread_local_session = session.registry()
thread_local_session.userid = userid
elif isinstance(session, Session):
session.userid = userid
else:
raise TypeError("Not sure how to add the userid into session of type {}".format(type(session)))
And here's how I'm using it (all non-essential parts have been cut out):
Base = declarative_base()
class User(Versioned, Base):
__tablename__ = 'user'
login = Column(String(60), primary_key=True, nullable=False)
groups = association_proxy('user_to_groups', 'group', creator=lambda group: UserToGroup(group_name=group.name))
def __init__(self, login, groups=None):
self.login = login
if groups:
for group in groups:
self.groups.append(group)
class Group(Versioned, Base):
__tablename__ = 'group'
name = Column(String(100), primary_key=True, nullable=False)
description = Column(String(100), nullable=True)
users = association_proxy('group_to_user', 'user', creator=lambda user: UserToGroup(user_login=user.login))
def __eq__(self, other):
return self.name == other.name
class UserToGroup(Versioned, Base):
__tablename__ = 'user_to_group'
user_login = Column(String(60), ForeignKey(User.login), primary_key=true)
group_name = Column(String(100), ForeignKey(Group.name), primary_key=true)
user = relationship(User, backref=backref('user_to_groups', cascade='all, delete-orphan'))
group = relationship(Group, backref=backref('group_to_user', cascade='all, delete-orphan'))
session.configure(bind=engine)
add_userid_to_session("test", session.registry())
versioned_session(session)
user = session.query(User).filter(User.login=='test').one()
user.groups.remove(Group(name ="g:admin"))
Before running that code the database currently has one user called 'test' and two groups that the user is attached to called 'g:admin' and 'g:superadmin'.
What it currently does is: Copy the existing user_to_group entry for the 'test' => 'g:admin' mapping and copy it to the history table. Then delete the entry from user_to_group.
What I'd like it to do is copy the value to the history table and then update the entry in user_to_group to have version_deleted set to true.
I'm thinking the way to do that is to snatch the entry out of the session.deleted (that's why I changed the order from the original code) and modify it put it into session.dirty. I'm just not sure what the "safest" way of doing this.
Another issue (which will likely require another question) is how to detect relationships which are covered in another table as currently the system makes a copy of the 'user' row into the history table and then updates the version information despite no real changes being made to the row.
EDIT: I've decided to do things a bit differently, but still have a problem... Instead of having a "deleted" flag in the live tables I actually delete the content and record another history item indicating when the deletion occurred. If I'm deleting an object directly then this works correctly. If I delete an object off of a relationship I'm not able to do it properly. A DELETE get's issued to the relationship table to remove the link, but I can't seem to figure out how to detect that deletion in the "create_version" method.
For example, if I do:
group = session.query(Group).filter(Group.name=='g:admin').one()
group.users.remove(group.users[0])
No objects are placed in session.deleted. I can detect some sort of deletion via attributes.get_history(obj, prop.key), but it seems to indicate a deletion of a UserToGroup object from Group (which I want to detect and record a history item on), but then also indicates a deletion of a Group from the UserToGroup object (which I don't want to do anything about because the actual Group is not being deleted).

SQLAlchemy logging of changes with date and user

This is very similar to another question that's over 3 years old: What's a good general way to look SQLAlchemy transactions, complete with authenticated user, etc?
I'm working on an application where I'd like to log all changes to particular tables. There's currently a really good "recipe" that does versioning, but I need to modify it to instead record a datetime when the change occurred and a user id of who made the change. I took the history_meta.py example that's packaged with SQLAlchemy and made it record times instead of version numbers, but I'm having trouble figuring out how to pass in a user id.
The question I referenced above suggests including the user id in the session object. That makes a lot of sense, but I'm not sure how to do that. I've tried something simple like session.userid = authenticated_userid(request) but in history_meta.py that attribute doesn't seem to be on the session object any more.
I'm doing all of this in the Pyramid framework and the session object that I'm using is defined as DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension())). In a view I do session = DBSession() and then proceed to use session. (I'm not really sure if that's necessary, but that's what's going on)
Here's my modified history_meta.py in case someone might find it useful:
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import mapper, class_mapper, attributes, object_mapper
from sqlalchemy.orm.exc import UnmappedClassError, UnmappedColumnError
from sqlalchemy import Table, Column, ForeignKeyConstraint, DateTime
from sqlalchemy import event
from sqlalchemy.orm.properties import RelationshipProperty
from datetime import datetime
def col_references_table(col, table):
for fk in col.foreign_keys:
if fk.references(table):
return True
return False
def _history_mapper(local_mapper):
cls = local_mapper.class_
# set the "active_history" flag
# on on column-mapped attributes so that the old version
# of the info is always loaded (currently sets it on all attributes)
for prop in local_mapper.iterate_properties:
getattr(local_mapper.class_, prop.key).impl.active_history = True
super_mapper = local_mapper.inherits
super_history_mapper = getattr(cls, '__history_mapper__', None)
polymorphic_on = None
super_fks = []
if not super_mapper or local_mapper.local_table is not super_mapper.local_table:
cols = []
for column in local_mapper.local_table.c:
if column.name == 'version_datetime':
continue
col = column.copy()
col.unique = False
if super_mapper and col_references_table(column, super_mapper.local_table):
super_fks.append((col.key, list(super_history_mapper.local_table.primary_key)[0]))
cols.append(col)
if column is local_mapper.polymorphic_on:
polymorphic_on = col
if super_mapper:
super_fks.append(('version_datetime', super_history_mapper.base_mapper.local_table.c.version_datetime))
cols.append(Column('version_datetime', DateTime, default=datetime.now, nullable=False, primary_key=True))
else:
cols.append(Column('version_datetime', DateTime, default=datetime.now, nullable=False, primary_key=True))
if super_fks:
cols.append(ForeignKeyConstraint(*zip(*super_fks)))
table = Table(local_mapper.local_table.name + '_history', local_mapper.local_table.metadata,
*cols
)
else:
# single table inheritance. take any additional columns that may have
# been added and add them to the history table.
for column in local_mapper.local_table.c:
if column.key not in super_history_mapper.local_table.c:
col = column.copy()
col.unique = False
super_history_mapper.local_table.append_column(col)
table = None
if super_history_mapper:
bases = (super_history_mapper.class_,)
else:
bases = local_mapper.base_mapper.class_.__bases__
versioned_cls = type.__new__(type, "%sHistory" % cls.__name__, bases, {})
m = mapper(
versioned_cls,
table,
inherits=super_history_mapper,
polymorphic_on=polymorphic_on,
polymorphic_identity=local_mapper.polymorphic_identity
)
cls.__history_mapper__ = m
if not super_history_mapper:
local_mapper.local_table.append_column(
Column('version_datetime', DateTime, default=datetime.now, nullable=False, primary_key=False)
)
local_mapper.add_property("version_datetime", local_mapper.local_table.c.version_datetime)
class Versioned(object):
#declared_attr
def __mapper_cls__(cls):
def map(cls, *arg, **kw):
mp = mapper(cls, *arg, **kw)
_history_mapper(mp)
return mp
return map
def versioned_objects(iter):
for obj in iter:
if hasattr(obj, '__history_mapper__'):
yield obj
def create_version(obj, session, deleted = False):
obj_mapper = object_mapper(obj)
history_mapper = obj.__history_mapper__
history_cls = history_mapper.class_
obj_state = attributes.instance_state(obj)
attr = {}
obj_changed = False
for om, hm in zip(obj_mapper.iterate_to_root(), history_mapper.iterate_to_root()):
if hm.single:
continue
for hist_col in hm.local_table.c:
if hist_col.key == 'version_datetime':
continue
obj_col = om.local_table.c[hist_col.key]
# get the value of the
# attribute based on the MapperProperty related to the
# mapped column. this will allow usage of MapperProperties
# that have a different keyname than that of the mapped column.
try:
prop = obj_mapper.get_property_by_column(obj_col)
except UnmappedColumnError:
# in the case of single table inheritance, there may be
# columns on the mapped table intended for the subclass only.
# the "unmapped" status of the subclass column on the
# base class is a feature of the declarative module as of sqla 0.5.2.
continue
# expired object attributes and also deferred cols might not be in the
# dict. force it to load no matter what by using getattr().
if prop.key not in obj_state.dict:
getattr(obj, prop.key)
a, u, d = attributes.get_history(obj, prop.key)
if d:
attr[hist_col.key] = d[0]
obj_changed = True
elif u:
attr[hist_col.key] = u[0]
else:
# if the attribute had no value.
attr[hist_col.key] = a[0]
obj_changed = True
if not obj_changed:
# not changed, but we have relationships. OK
# check those too
for prop in obj_mapper.iterate_properties:
if isinstance(prop, RelationshipProperty) and \
attributes.get_history(obj, prop.key).has_changes():
obj_changed = True
break
if not obj_changed and not deleted:
return
attr['version_datetime'] = obj.version_datetime
hist = history_cls()
for key, value in attr.items():
setattr(hist, key, value)
session.add(hist)
print(dir(session))
obj.version_datetime = datetime.now()
def versioned_session(session):
#event.listens_for(session, 'before_flush')
def before_flush(session, flush_context, instances):
for obj in versioned_objects(session.dirty):
create_version(obj, session)
for obj in versioned_objects(session.deleted):
create_version(obj, session, deleted = True)
UPDATE:
Okay, it seems that in the before_flush() method the session I get is of type sqlalchemy.orm.session.Session where the session I attached the user_id to was sqlalchemy.orm.scoping.scoped_session. So, at some point an object layer is stripped off. Is it safe to assign the user_id to the Session within the scoped_session? Can I be sure that it won't be there for other requests?

Old question, but still very relevant.
You should avoid trying to place web session information on the database session. It's combining unrelated concerns and each has it's own lifecycle (which don't match). Here's an approach I use in Flask with SQLAlchemy (not Flask-SQLAlchemy, but that should work too). I've tried to comment where Pyramid would be different.
from flask import has_request_context # How to check if in a Flask session
from sqlalchemy import inspect
from sqlalchemy.orm import class_mapper
from sqlalchemy.orm.attributes import get_history
from sqlalchemy.event import listen
from YOUR_SESSION_MANAGER import get_user # This would be something in Pyramid
from my_project import models # Where your models are defined
def get_object_changes(obj):
""" Given a model instance, returns dict of pending
changes waiting for database flush/commit.
e.g. {
'some_field': {
'before': *SOME-VALUE*,
'after': *SOME-VALUE*
},
...
}
"""
inspection = inspect(obj)
changes = {}
for attr in class_mapper(obj.__class__).column_attrs:
if getattr(inspection.attrs, attr.key).history.has_changes():
if get_history(obj, attr.key)[2]:
before = get_history(obj, attr.key)[2].pop()
after = getattr(obj, attr.key)
if before != after:
if before or after:
changes[attr.key] = {'before': before, 'after': after}
return changes
def my_model_change_listener(mapper, connection, target):
changes = get_object_changes(target)
changes.pop("modify_ts", None) # remove fields you don't want to track
user_id = None
if has_request_context():
# Call your function to get active user and extract id
user_id = getattr(get_user(), 'id', None)
if user_id is None:
# What do you want to do if user can't be determined
pass
# You now have the model instance (target), the user_id who is logged in,
# and a dictionary of changes.
# Either do somthing "quick" with it here or call an async task (e.g.
# Celery) to do something with the information that may take longer
# than you want the request to take.
# Add the listener
listen(models.MyModel, 'after_update', my_model_change_listener)

After a bunch of fiddling I seem to able to set values on the session object within the scoped_session by doing the following:
DBSession = scoped_session(sessionmaker(extension=ZopeTransactionExtension()))
session = DBSession()
inner_session = session.registry()
inner_session.user_id = "test"
versioned_session(session)
Now the session object being passed around in history_meta.py has a user_id attribute on it which I set. I'm a little concerned about whether this is the right way of doing this as the object in the registry is a thread-local one and the threads are being re-used for different http requests.

I ran into this old question recently. My requirement is to log all changes to a set of tables.
I'll post the code I ended up with here in case anyone finds it useful. It has some limitations, especially around deletes, but works for my purposes. The code supports logging audit records for selected tables to either a log file, or an audit table in the db.
from app import db
import datetime
from flask import current_app, g
# your own session user goes here
# you'll need an id and an email in that model
from flask_user import current_user as user
import importlib
import logging
from sqlalchemy import event, inspect
from sqlalchemy.orm.attributes import get_history
from sqlalchemy.orm import ColumnProperty, class_mapper
from uuid import uuid4
class AuditManager (object):
config = {'storage': 'log',
#define class for Audit model for your project, if saving audit records in db
'auditModel': 'app.models.user_models.Audit'}
def __init__(self, app):
if 'AUDIT_CONFIG' in app.config:
app.before_request(self.before_request_handler)
self.config.update(app.config['AUDIT_CONFIG'])
event.listen(
db.session,
'after_flush',
self.db_after_flush
)
event.listen(
db.session,
'before_flush',
self.db_before_flush
)
event.listen(
db.session,
'after_bulk_delete',
self.db_after_bulk_delete
)
if self.config['storage'] == 'log':
self.logger = logging.getLogger(__name__)
elif self.config['storage'] == 'db':
# Load Audit model class at runtime, so that log file users dont need to define it
module_name, class_name = self.config['auditModel'].rsplit(".", 1)
self.AuditModel = getattr(importlib.import_module(module_name), class_name)
#Create a global request id
# Use this to group transactions together
def before_request_handler(self):
g.request_id = uuid4()
def db_after_flush(self, session, flush_context):
for instance in session.new:
if instance.__tablename__ in self.config['tables']:
# Record the inserts for this table
data = {}
auditFields = getattr(instance.__class__, 'Meta', None)
auditFields = getattr(auditFields,\
'auditFields', #Prefer to list auditable fields explicitly in the model's Meta class
self.get_fields(instance)) # or derive them otherwise
for attr in auditFields:
data[attr] = str(getattr(instance, attr, 'not set')) #Make every value a string in audit
self.log_it (session, 'insert', instance, data)
def db_before_flush(self, session, flush_context, instances):
for instance in session.dirty:
# Record the changes for this table
if instance.__tablename__ in self.config['tables']:
inspection = inspect(instance)
data = {}
auditFields = getattr(instance.__class__, 'Meta', None)
auditFields = getattr(auditFields,\
'auditFields',
self.get_fields(instance))
for attr in auditFields:
if getattr(inspection.attrs, attr).history.has_changes(): #We only log the new data
data[attr] = str(getattr(instance, attr, 'not set'))
self.log_it (session, 'change', instance, data)
for instance in session.deleted:
# Record the deletes for this table
# for this to be triggered, you must use this session based delete object construct.
# Eg: session.delete({query}.first())
if instance.__tablename__ in self.config['tables']:
data = {}
auditFields = getattr(instance.__class__, 'Meta', None)
auditFields = getattr(auditFields,\
'auditFields',
self.get_fields(instance))
for attr in auditFields:
data[attr] = str(getattr(instance, attr, 'not set'))
self.log_it (session, 'delete', instance, data)
def db_after_bulk_delete(self, delete_context):
instance = delete_context.query.column_descriptions[0]['type'] #only works for single table deletes
if delete_context.result.returns_rows:
# Not sure exactly how after_bulk_delete is expected work, since the context.results is empty,
# as delete statement return no results
for row in delete_context.result:
data = {}
auditFields = getattr(instance.__class__, 'Meta', None)
auditFields = getattr(auditFields,\
'auditFields',
self.get_fields(instance))
for attr in auditFields:
data[attr] = str(getattr(row, attr, 'not set')) #Make every value a string in audit
self.log_it (delete_context.session, 'delete', instance, data)
else:
# Audit what we can when we don't have indiividual rows to look at
self.log_it (delete_context.session, 'delete', instance,\
{"rowcount": delete_context.result.rowcount})
def log_it (self, session, action, instance, data):
if self.config['storage'] == 'log':
self.logger.info("request_id: %s, table: %s, action: %s, user id: %s, user email: %s, date: %s, data: %s" \
% (getattr(g, 'request_id', None), instance.__tablename__, action, getattr(user, 'id', None), getattr(user, 'email', None),\
datetime.datetime.now(), data))
elif self.config['storage'] == 'db':
audit = self.AuditModel(request_id=str(getattr(g, 'request_id', None)),
table=str(instance.__tablename__),
action=action,
user_id=getattr(user, 'id', None),
user_email=getattr(user, 'email', None),
date=datetime.datetime.now(),
data=data
)
session.add(audit)
def get_fields(self, instance):
fields = []
for attr in class_mapper(instance.__class__).column_attrs:
fields.append(attr.key)
return fields
Suggested Model, if you want to store audit records in the database.
class Audit(db.Model):
__tablename__ = 'audit'
id = db.Column(db.Integer, primary_key=True)
request_id = db.Column(db.Unicode(50), nullable=True, index=True, server_default=u'')
table = db.Column(db.Unicode(50), nullable=False, index=True, server_default=u'')
action = db.Column(db.Unicode(20), nullable=False, server_default=u'')
user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='SET NULL'), nullable=True, )
user_email = db.Column(db.Unicode(255), nullable=False, server_default=u'')
date = db.Column(db.DateTime, default=db.func.now())
data = db.Column(JSON)
In settings:
AUDIT_CONFIG = {
"tables": ['user', 'order', 'batch']
}

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Can't override method in Sqlalchemy inherited class - python

Related

Preserve key naming when creating a joinedload sql_query in SQLAlchemy

SqlAlchemy Classes Declaration of two dependent classes

SQLAlchemy filter for files in a folder-file type hierarchy

SQLAlchemy audit logging; how to handle deletes?

SQLAlchemy logging of changes with date and user

Categories

Resources