I'm trying to model an entity that as one or more one-to-many relationships, such that it's last_modified attribute is updated, when
a child is added or removed
a child is modified
the entity itself is modified
I've put together the following minimal example:
class Config(Base):
__tablename__ = 'config'
ID = Column('ID', Integer, primary_key=True)
name = Column('name', String)
last_modified = Column('last_modified', DateTime, default=now, onupdate=now)
params = relationship('ConfigParam', backref='config')
class ConfigParam(Base):
__tablename__ = 'config_params'
ID = Column('ID', Integer, primary_key=True)
ConfigID = Column('ConfigID', Integer, ForeignKey('config.ID'), nullable=False)
key = Column('key', String)
value = Column('value', Float)
#event.listens_for(Config.params, 'append')
#event.listens_for(Config.params, 'remove')
def receive_append_or_remove(target, value, initiator):
target.last_modified = now()
#event.listens_for(ConfigParam.key, 'set')
#event.listens_for(ConfigParam.value, 'set')
def receive_attr_change(target, value, oldvalue, initiator):
if target.config:
# don't act if the parent config isn't yet set
# i.e. during __init__
target.config.last_modified = now()
This seems to work, but I'm wondering if there's a better way to do this?
Specifically, this becomes very verbose since my actual ConfigParam implementation has more attributes and I'm having multiple one-to-many relations configured on the parent Config class.
Take this with a huge grain of salt, it "seems" to work, could explode:
def rel_listener(t, v, i):
t.last_modified = now()
def listener(t, v, o, i):
if t.config:
t.config.last_modified = now()
from sqlalchemy import inspect
for rel in inspect(Config).relationships:
event.listen(rel, 'append', rel_listener)
event.listen(rel, 'remove', rel_listener)
for col in inspect(ConfigParam).column_attrs:
event.listen(col, 'set', listener)
Problem is that the inspections make no exceptions and columns such as 'ID' and 'ConfigID' will be bound to event listeners.
Another perhaps slightly less tedious form would be to just use a list of attributes to bind events to in a similar fashion:
for attr in ['key', 'value']:
event.listen(getattr(ConfigParam, attr), 'set', listener)
This gives you control over what is bound to events and what is not.
Related
This seems like a real beginner question, but I'm having trouble finding a simple answer. I have simplified this down to just the bare bones with a simple data model representing a one-to-many relationship:
class Room(db.Model):
__tablename__ = 'rooms'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(128), unique=True)
capacity = db.Column(db.Integer)
events = db.relationship('Event', backref='room')
class Event(db.Model):
__tablename__ = 'counts'
id = db.Column(db.Integer, primary_key=True)
unusedCapacity = db.Column(db.Integer)
attendance = db.Column(db.Integer)
room_id = db.Column(db.Integer, db.ForeignKey('rooms.id'))
Event.unusedCapacity is calculated as Room.capacity - Event.attendance, but I need to store the value in the column — Room.capacity may change over time, but the Event.unusedCapacity needs to reflect the actual unused capacity at the time of the Event.
I am currently querying the Room and then creating the event:
room = Room.query.get(room_id) # using Flask sqlAlchemy
event = event(unusedCapacity = room.capacity - attendance, ...etc)
My question is: is there a more efficient way to do this in one step?
As noted in the comments by #SuperShoot, a query on insert can calculate the unused capacity in the database without having to fetch first. An explicit constructor, such as shown by #tooTired, could pass a scalar subquery as unusedCapacity:
class Event(db.Model):
...
def __init__(self, **kwgs):
if 'unusedCapacity' not in kwgs:
kwgs['unusedCapacity'] = \
db.select([Room.capacity - kwgs['attendance']]).\
where(Room.id == kwgs['room_id']).\
as_scalar()
super().__init__(**kwgs)
Though it is possible to use client-invoked SQL expressions as defaults, I'm not sure how one could refer to the values to be inserted in the expression without using a context-sensitive default function, but that did not quite work out: the scalar subquery was not inlined and SQLAlchemy tried to pass it using placeholders instead.
A downside of the __init__ approach is that you cannot perform bulk inserts that would handle unused capacity using the table created for the model as is, but will have to perform a manual query that does the same.
Another thing to look out for is that until a flush takes place the unusedCapacity attribute of a new Event object holds the SQL expression object, not the actual value. The solution by #tooTired is more transparent in this regard, since a new Event object will hold the numeric value of unused capacity from the get go.
SQLAlchemy adds an implicit constructor to all model classes which accepts keyword arguments for all its columns and relationships. You can override this and pass the kwargs without unusedCapacity and get the room capacity in the constructor:
class Event(db.Model):
# ...
#kwargs without unusedCapacity
def __init__(**kwargs):
room = Room.query.get(kwargs.get(room_id))
super(Event, self).__init__(unusedCapacity = room.capacity - kwargs.get(attendance), **kwargs)
#Create new event normally
event = Event(id = 1, attendance = 1, room_id = 1)
I have been playing around with SQLAlchemy and found out that I cannot track reliably what is being changed within database.
I have created an example that explains what my concern is:
import re
import datetime
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import (
declarative_base,
declared_attr,
)
from sqlalchemy import (
create_engine,
event,
Column,
Boolean,
Integer,
String,
Unicode,
DateTime,
Index,
ForeignKey,
CheckConstraint,
)
from sqlalchemy.orm import (
scoped_session,
sessionmaker,
Session,
relationship,
backref,
)
import transaction
from zope.sqlalchemy import ZopeTransactionExtension
class ExtendedSession(Session):
my_var = None
DBSession = scoped_session(
sessionmaker(extension=ZopeTransactionExtension(),
class_=ExtendedSession
)
)
class BaseModel(object):
query = DBSession.query_property()
id = Column(
Integer,
primary_key=True,
)
#declared_attr
def __tablename__(cls):
class_name = re.sub(r"([A-Z])", r"_\1", cls.__name__).lower()[1:]
return "{0}".format(
class_name,
)
Base = declarative_base(cls=BaseModel)
def initialize_sql(engine):
DBSession.configure(bind=engine)
Base.metadata.bind = engine
engine = create_engine("sqlite://")
initialize_sql(engine)
class Parent(Base):
# *** Columns
col1 = Column (
String,
nullable=False,
)
# *** Relationships
# *** Methods
def __repr__(self):
return "<Parent(id: '{0}', col1: '{1}')>".format(
self.id,\
self.col1,\
)
class Child(Base):
# *** Columns
col1 = Column (
String,
nullable=False,
)
parent_id = Column (
Integer,
ForeignKey (
Parent.id,
ondelete="CASCADE",
),
nullable=False,
)
# *** Relationships
parent = relationship (
Parent,
backref=backref(
"child_elements",
uselist=True,
cascade="save-update, delete",
lazy="dynamic",
),
# If below is uncommented then instance of Parent won't appear in session.dirty
# However this relationship will never be loaded (even if needed)
#lazy="noload",
)
# *** Methods
def __repr__(self):
return "<Child(id: '{0}', col1: '{1}', parent_id: '{2}')>".format(
self.id,\
self.col1,\
self.parent_id,\
)
#event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
time_stamp = datetime.datetime.utcnow()
if session.new:
for elem in session.new:
print(" ### NEW {0}".format(repr(elem)))
if session.dirty:
for elem in session.dirty:
print(" ### DIRTY {0}".format(repr(elem)))
if session.deleted:
for elem in session.deleted:
print(" ### DELETED {0}".format(repr(elem)))
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
with transaction.manager:
parent = Parent(col1="parent")
DBSession.add(parent)
DBSession.flush()
# Below loop is to demonstrate that
# each time child object is created and linked to parent
# parent is also marked as modified
# how to avoid that?
# or optionally is it possible to detect this in before_flush event
# without issuing additional SQL query?
for i in range(0, 10):
parent=Parent.query.filter(Parent.col1 == "parent").first()
child = Child(col1="{0}".format(i))
child.parent = parent
DBSession.add(child)
DBSession.flush()
# Below update will not cause associated instance of Parent appearing in session.dirty
child = Child.query.filter(Child.col1=="3").first()
child.col1="updated"
DBSession.add(child)
DBSession.flush()
In short - there are two objects:
Parent
Child - linked to Parent
Each time I add new instance of Child and link it with instance of Parent that instance of Parent also appears within session.dirty of before_flush event.
SQLAlchemy community adviced this behavior is expected (although I think there must be an option to change default behavior - I could not find it within doco)
So here is my question: is it possible to configure relationship such way that when I add a new instance of Child and link it to instance of Parent then that instance of Parent won't appear within session.dirty?
I have tried setting relationship as lazy="noload" and it is not an option since I may need to use that relationship (so I may need to load it)
I would also accept a solution that would allow me to detect that Parent have not been changed within before_load event handler - however I do not want to trigger additional query to achieve this.
I would appreciate your help,
Greg
After hours of research and a hint from SQLAlchemy community I found solution that seems to work the way I need (notice additional condition within session.dirty block).
#event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
time_stamp = datetime.datetime.utcnow()
if session.new:
for elem in session.new:
print(" ### NEW {0}".format(repr(elem)))
if session.dirty:
for elem in session.dirty:
# Below check was added to solve the problem
if ( session.is_modified(elem, include_collections=False) ):
print(" ### DIRTY {0}".format(repr(elem)))
if session.deleted:
for elem in session.deleted:
print(" ### DELETED {0}".format(repr(elem)))
The documentation related to my solution can be found here: http://docs.sqlalchemy.org/en/latest/orm/session_api.html#sqlalchemy.orm.session.Session.is_modified
In short - specifying include_collections=False within session.is_modified makes SQLAlchemy to ignore situations where multivalued collections have been changed (in my case if child was changed then parent would be filtered out by that additional check).
I need to implement a "related items" feature, i.e. to allow items from the same table to be arbitrarily linked to each other in a many-to-many fashion. Something similar to how news websites show related articles.
Also, I need the relationship to be bi-directional, something like this:
a = Item()
b = Item()
a.related.append(b)
assert a in b.related # True
Now, on SQL level I imagine this could be solved by modifying the "standard" many-to-many relationship so 2 records are inserted into the association table each time an association is made, so (a -> b) and (b -> a) are two separate records.
Alternatively, the join condition for the many-to-many table could somehow check both sides of the association, so roughly instead of ... JOIN assoc ON a.id = assoc.left_id ... SQLAlchemy would produce something like ... JOIN assoc ON a.id = assoc.left_id OR a.id = assoc.right_id ...
Is there a way to configure this with SQLAlchemy so the relation works similar to a "normal" many-to-many relationship?
It's likely that I'm just don't know the correct terminology - everything I came up with - "self-referential", "bidirectional", "association" - is used to describe something else in SQLAlchemy.
Using Attribute Events should do the job. See the sample code below, where little ugly piece of code is solely for the purpose of avoid endless recursion:
class Item(Base):
__tablename__ = "item"
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False)
# relationships
related = relationship('Item',
secondary = t_links,
primaryjoin = (id == t_links.c.from_id),
secondaryjoin = (id == t_links.c.to_id),
)
_OTHER_SIDE = set()
from sqlalchemy import event
def Item_related_append_listener(target, value, initiator):
global _OTHER_SIDE
if not((target, value) in _OTHER_SIDE):
_OTHER_SIDE.add((value, target))
if not target in value.related:
value.related.append(target)
else:
_OTHER_SIDE.remove((target, value))
event.listen(Item.related, 'append', Item_related_append_listener)
# ...
a = Item()
b = Item()
a.related.append(b)
assert a in b.related # True
For completeness sake, here's the code I ended up with; the listener method is slightly different to avoid using a global variable, an also there's a listener for remove event.
import sqlalchemy as sa
related_items = sa.Table(
"related_items",
Base.metadata,
sa.Column("id", sa.Integer, primary_key=True),
sa.Column("from_id", sa.ForeignKey("items.id")),
sa.Column("to_id", sa.ForeignKey("items.id")),
)
class Item(Base):
__tablename__ = 'items'
...
related = sa.orm.relationship('Item',
secondary = related_items,
primaryjoin = (id == related_items.c.from_id),
secondaryjoin = (id == related_items.c.to_id),
)
def item_related_append_listener(target, value, initiator):
if not hasattr(target, "__related_to__"):
target.__related_to__ = set()
target.__related_to__.add(value)
if target not in getattr(value, "__related_to__", set()):
value.related.append(target)
sa.event.listen(Item.related, 'append', item_related_append_listener)
def item_related_remove_listener(target, value, initiator):
if target in value.related:
value.related.remove(target)
sa.event.listen(Item.related, 'remove', item_related_remove_listener)
This is the first time I've used ORM, so I'm not sure the best way to handle this. I have a one-to-many relationship where each Parent can have many Children:
class Parent(Base):
__tablename__ = 'Parent'
name = Column(String(50))
gid = Column(String(16), primary_key = True)
lastUpdate = Column(DateTime)
def __init__(self,name, gid):
self.name = name
self.gid = gid
self.lastUpdate = datetime.datetime.now()
class Child(Base):
__tablename__ = 'Child'
id = Column(Integer, primary_key = True)
loc = Column(String(50))
status = Column(String(50))
parent_gid = Column(String(16), ForeignKey('Parent.gid'))
parent = relationship("Parent", backref=backref('children'))
Now, updates are coming in over the network. When an update comes in, I want to UPDATE the appropriate Parent row (updating lastUpdate column) and INSERT new children rows into the database. I don't know how to do that with ORM. Here is my failed attempt:
engine = create_engine('sqlite+pysqlite:///file.db',
module=dbapi2)
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
def addChildren(parent):
p = session.query(Parent).filter(Parent.gid == p1.gid).all()
if len(p) == 0:
session.add(p1)
session.commit()
else:
updateChildren = parent.children[:]
parent.chlidren = []
for c in updateChildren:
c.parent_gid = parent.gid
session.add_all(updateChildren)
session.commit()
if __name__ == '__main__':
#first update from the 'network'
p1 = Parent(name='team1', gid='t1')
p1.children = [Child(loc='x', status='a'), Child(loc='y', status='b')]
addChildren(p1)
import time
time.sleep(1)
#here comes another network update
p1 = Parent(name='team1', gid='t1')
p1.children = [Child(loc='z', status='a'), Child(loc='k', status='b')]
#this fails
addChildren(p1)
I initially tried to do a merge, but that caused the old children to be disassociated with the parent (the foreign IDs were set to null). What is the best way to approach this with ORM? Thanks
EDIT
I guess it doesn't really make sense to create entirely new objects when updates come in over the network. I should just query the session for the appropriate parent, then create new children if necessary and merge? E.g.
def addChildren(pname, pid, cloc, cstat):
p = session.query(Parent).filter(Parent.gid == pid).all()
if len(p) == 0:
p = Parent(pname, pid)
p.children = [Child(loc=cloc, status=cstat)]
session.add(p)
session.commit()
else:
p = p[0]
p.children.append(Child(loc=cloc, status=cstat))
session.merge(p)
session.commit()
You are right - you should not create the same parent twice. In terms of adding children, ... well, you really need only to add them and you do not care about the existing ones... So your edited code should do the job just fine. You can make it shorter and more readable though:
def addChildren(pname, pid, cloc, cstat):
p = session.query(Parent).get(pid) # will give you either Parent or None
if not(p):
p = Parent(pname, pid)
session.add(p)
p.children.append(Child(loc=cloc, status=cstat))
session.commit()
The disadvantage of this way is that for existing Parent the whole collection of Children will be loaded into memory before a new Child is added and later saved to the database. If this is the case (many and increasing number of children for each parent), then the lazy='noload' might be useful:
parent = relationship("Parent", backref=backref('children', lazy='noload'))
This might dramatically improve the speed of inserts, but in this case the access to p.children will never load the existing objects from the database. In such scenarios it is enough to define another relationship. In these situations I prefer to use Building Query-Enabled Properties, so you end up with one property only for adding objects, and the other only for quering persisted results, which often are used by different parts of the system.
Lets say that I have a database structure with three tables that look like this:
items
- item_id
- item_handle
attributes
- attribute_id
- attribute_name
item_attributes
- item_attribute_id
- item_id
- attribute_id
- attribute_value
I would like to be able to do this in SQLAlchemy:
item = Item('item1')
item.foo = 'bar'
session.add(item)
session.commit()
item1 = session.query(Item).filter_by(handle='item1').one()
print item1.foo # => 'bar'
I'm new to SQLAlchemy and I found this in the documentation (http://www.sqlalchemy.org/docs/05/mappers.html#mapping-a-class-against-multiple-tables):
j = join(items, item_attributes, items.c.item_id == item_attributes.c.item_id). \
join(attributes, item_attributes.c.attribute_id == attributes.c.attribute_id)
mapper(Item, j, properties={
'item_id': [items.c.item_id, item_attributes.c.item_id],
'attribute_id': [item_attributes.c.attribute_id, attributes.c.attribute_id],
})
It only adds item_id and attribute_id to Item and its not possible to add attributes to Item object.
Is what I'm trying to achieve possible with SQLAlchemy? Is there a better way to structure the database to get the same behaviour of "dynamic columns"?
This is called the entity-attribute-value pattern. There is an example about this under the SQLAlchemy examples directory: vertical/.
If you are using PostgreSQL, then there is also the hstore contrib module that can store a string to string mapping. If you are interested then I have some code for a custom type that makes it possible to use that to store extended attributes via SQLAlchemy.
Another option to store custom attributes is to serialize them to a text field. In that case you will lose the ability to filter by attributes.
The link to vertical/vertical.py is broken. The example had been renamed to dictlike-polymorphic.py and dictlike.py.
I am pasting in the contents of dictlike.py:
"""Mapping a vertical table as a dictionary.
This example illustrates accessing and modifying a "vertical" (or
"properties", or pivoted) table via a dict-like interface. These are tables
that store free-form object properties as rows instead of columns. For
example, instead of::
# A regular ("horizontal") table has columns for 'species' and 'size'
Table('animal', metadata,
Column('id', Integer, primary_key=True),
Column('species', Unicode),
Column('size', Unicode))
A vertical table models this as two tables: one table for the base or parent
entity, and another related table holding key/value pairs::
Table('animal', metadata,
Column('id', Integer, primary_key=True))
# The properties table will have one row for a 'species' value, and
# another row for the 'size' value.
Table('properties', metadata
Column('animal_id', Integer, ForeignKey('animal.id'),
primary_key=True),
Column('key', UnicodeText),
Column('value', UnicodeText))
Because the key/value pairs in a vertical scheme are not fixed in advance,
accessing them like a Python dict can be very convenient. The example below
can be used with many common vertical schemas as-is or with minor adaptations.
"""
class VerticalProperty(object):
"""A key/value pair.
This class models rows in the vertical table.
"""
def __init__(self, key, value):
self.key = key
self.value = value
def __repr__(self):
return '<%s %r=%r>' % (self.__class__.__name__, self.key, self.value)
class VerticalPropertyDictMixin(object):
"""Adds obj[key] access to a mapped class.
This is a mixin class. It can be inherited from directly, or included
with multiple inheritence.
Classes using this mixin must define two class properties::
_property_type:
The mapped type of the vertical key/value pair instances. Will be
invoked with two positional arugments: key, value
_property_mapping:
A string, the name of the Python attribute holding a dict-based
relationship of _property_type instances.
Using the VerticalProperty class above as an example,::
class MyObj(VerticalPropertyDictMixin):
_property_type = VerticalProperty
_property_mapping = 'props'
mapper(MyObj, sometable, properties={
'props': relationship(VerticalProperty,
collection_class=attribute_mapped_collection('key'))})
Dict-like access to MyObj is proxied through to the 'props' relationship::
myobj['key'] = 'value'
# ...is shorthand for:
myobj.props['key'] = VerticalProperty('key', 'value')
myobj['key'] = 'updated value']
# ...is shorthand for:
myobj.props['key'].value = 'updated value'
print myobj['key']
# ...is shorthand for:
print myobj.props['key'].value
"""
_property_type = VerticalProperty
_property_mapping = None
__map = property(lambda self: getattr(self, self._property_mapping))
def __getitem__(self, key):
return self.__map[key].value
def __setitem__(self, key, value):
property = self.__map.get(key, None)
if property is None:
self.__map[key] = self._property_type(key, value)
else:
property.value = value
def __delitem__(self, key):
del self.__map[key]
def __contains__(self, key):
return key in self.__map
# Implement other dict methods to taste. Here are some examples:
def keys(self):
return self.__map.keys()
def values(self):
return [prop.value for prop in self.__map.values()]
def items(self):
return [(key, prop.value) for key, prop in self.__map.items()]
def __iter__(self):
return iter(self.keys())
if __name__ == '__main__':
from sqlalchemy import (MetaData, Table, Column, Integer, Unicode,
ForeignKey, UnicodeText, and_, not_)
from sqlalchemy.orm import mapper, relationship, create_session
from sqlalchemy.orm.collections import attribute_mapped_collection
metadata = MetaData()
# Here we have named animals, and a collection of facts about them.
animals = Table('animal', metadata,
Column('id', Integer, primary_key=True),
Column('name', Unicode(100)))
facts = Table('facts', metadata,
Column('animal_id', Integer, ForeignKey('animal.id'),
primary_key=True),
Column('key', Unicode(64), primary_key=True),
Column('value', UnicodeText, default=None),)
class AnimalFact(VerticalProperty):
"""A fact about an animal."""
class Animal(VerticalPropertyDictMixin):
"""An animal.
Animal facts are available via the 'facts' property or by using
dict-like accessors on an Animal instance::
cat['color'] = 'calico'
# or, equivalently:
cat.facts['color'] = AnimalFact('color', 'calico')
"""
_property_type = AnimalFact
_property_mapping = 'facts'
def __init__(self, name):
self.name = name
def __repr__(self):
return '<%s %r>' % (self.__class__.__name__, self.name)
mapper(Animal, animals, properties={
'facts': relationship(
AnimalFact, backref='animal',
collection_class=attribute_mapped_collection('key')),
})
mapper(AnimalFact, facts)
metadata.bind = 'sqlite:///'
metadata.create_all()
session = create_session()
stoat = Animal(u'stoat')
stoat[u'color'] = u'reddish'
stoat[u'cuteness'] = u'somewhat'
# dict-like assignment transparently creates entries in the
# stoat.facts collection:
print stoat.facts[u'color']
session.add(stoat)
session.flush()
session.expunge_all()
critter = session.query(Animal).filter(Animal.name == u'stoat').one()
print critter[u'color']
print critter[u'cuteness']
critter[u'cuteness'] = u'very'
print 'changing cuteness:'
metadata.bind.echo = True
session.flush()
metadata.bind.echo = False
marten = Animal(u'marten')
marten[u'color'] = u'brown'
marten[u'cuteness'] = u'somewhat'
session.add(marten)
shrew = Animal(u'shrew')
shrew[u'cuteness'] = u'somewhat'
shrew[u'poisonous-part'] = u'saliva'
session.add(shrew)
loris = Animal(u'slow loris')
loris[u'cuteness'] = u'fairly'
loris[u'poisonous-part'] = u'elbows'
session.add(loris)
session.flush()
q = (session.query(Animal).
filter(Animal.facts.any(
and_(AnimalFact.key == u'color',
AnimalFact.value == u'reddish'))))
print 'reddish animals', q.all()
# Save some typing by wrapping that up in a function:
with_characteristic = lambda key, value: and_(AnimalFact.key == key,
AnimalFact.value == value)
q = (session.query(Animal).
filter(Animal.facts.any(
with_characteristic(u'color', u'brown'))))
print 'brown animals', q.all()
q = (session.query(Animal).
filter(not_(Animal.facts.any(
with_characteristic(u'poisonous-part', u'elbows')))))
print 'animals without poisonous-part == elbows', q.all()
q = (session.query(Animal).
filter(Animal.facts.any(AnimalFact.value == u'somewhat')))
print 'any animal with any .value of "somewhat"', q.all()
# Facts can be queried as well.
q = (session.query(AnimalFact).
filter(with_characteristic(u'cuteness', u'very')))
print 'just the facts', q.all()
metadata.drop_all()