sqlalchemy add child in one-to-many relationship - python

This is the first time I've used ORM, so I'm not sure the best way to handle this. I have a one-to-many relationship where each Parent can have many Children:
class Parent(Base):
__tablename__ = 'Parent'
name = Column(String(50))
gid = Column(String(16), primary_key = True)
lastUpdate = Column(DateTime)
def __init__(self,name, gid):
self.name = name
self.gid = gid
self.lastUpdate = datetime.datetime.now()
class Child(Base):
__tablename__ = 'Child'
id = Column(Integer, primary_key = True)
loc = Column(String(50))
status = Column(String(50))
parent_gid = Column(String(16), ForeignKey('Parent.gid'))
parent = relationship("Parent", backref=backref('children'))
Now, updates are coming in over the network. When an update comes in, I want to UPDATE the appropriate Parent row (updating lastUpdate column) and INSERT new children rows into the database. I don't know how to do that with ORM. Here is my failed attempt:
engine = create_engine('sqlite+pysqlite:///file.db',
module=dbapi2)
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
def addChildren(parent):
p = session.query(Parent).filter(Parent.gid == p1.gid).all()
if len(p) == 0:
session.add(p1)
session.commit()
else:
updateChildren = parent.children[:]
parent.chlidren = []
for c in updateChildren:
c.parent_gid = parent.gid
session.add_all(updateChildren)
session.commit()
if __name__ == '__main__':
#first update from the 'network'
p1 = Parent(name='team1', gid='t1')
p1.children = [Child(loc='x', status='a'), Child(loc='y', status='b')]
addChildren(p1)
import time
time.sleep(1)
#here comes another network update
p1 = Parent(name='team1', gid='t1')
p1.children = [Child(loc='z', status='a'), Child(loc='k', status='b')]
#this fails
addChildren(p1)
I initially tried to do a merge, but that caused the old children to be disassociated with the parent (the foreign IDs were set to null). What is the best way to approach this with ORM? Thanks
EDIT
I guess it doesn't really make sense to create entirely new objects when updates come in over the network. I should just query the session for the appropriate parent, then create new children if necessary and merge? E.g.
def addChildren(pname, pid, cloc, cstat):
p = session.query(Parent).filter(Parent.gid == pid).all()
if len(p) == 0:
p = Parent(pname, pid)
p.children = [Child(loc=cloc, status=cstat)]
session.add(p)
session.commit()
else:
p = p[0]
p.children.append(Child(loc=cloc, status=cstat))
session.merge(p)
session.commit()

You are right - you should not create the same parent twice. In terms of adding children, ... well, you really need only to add them and you do not care about the existing ones... So your edited code should do the job just fine. You can make it shorter and more readable though:
def addChildren(pname, pid, cloc, cstat):
p = session.query(Parent).get(pid) # will give you either Parent or None
if not(p):
p = Parent(pname, pid)
session.add(p)
p.children.append(Child(loc=cloc, status=cstat))
session.commit()
The disadvantage of this way is that for existing Parent the whole collection of Children will be loaded into memory before a new Child is added and later saved to the database. If this is the case (many and increasing number of children for each parent), then the lazy='noload' might be useful:
parent = relationship("Parent", backref=backref('children', lazy='noload'))
This might dramatically improve the speed of inserts, but in this case the access to p.children will never load the existing objects from the database. In such scenarios it is enough to define another relationship. In these situations I prefer to use Building Query-Enabled Properties, so you end up with one property only for adding objects, and the other only for quering persisted results, which often are used by different parts of the system.

Related

SQLAlchemy Replicate Client Side Method on SQL-Side

Note: This is a simplified example of what I'm actually trying to do here.
I have the following Parent-Child relationship both driven off a declarative_base.
class Parent(declartive_base):
__tablename__ = 'parents'
id = Column(Integer, primary_key=True)
_children = relationship("Child", lazy='dynamic')
def total_for_date(self, date):
return sum([child.num for child in self._children.filter(Child.date == date)])
#classmethod
def total_for_date_query(cls, date):
#TODO Return a query that represents this...
pass
class Child(declarative_base):
__tablename__ = 'children'
id = Column(Integer, primary_key=True)
num = Column(Integer)
date = Column(Date)
parent_id = Column(Integer, ForeignKey('parents.id'))
_parent = relationship("Parent")
I'd like to calculate a total of a certain number associated with a child given a parent query. This can be performed via python as such
q = session.query(Parent).filter(Parent.id_([4,5,10,...]))
total = sum([parent.total_for_date(datetime.date(2018, 1, 2)) for parent in q.all()])
However, the computation here is done in python and given a large amount of data, won't perform as well compared to SQL.
I'm trying to figure out a way using hybrid expressions, selects, sqlalchemy queries etc. to have an equivalent method on the parent that returns a query/selectable/expression that will allow me to perform the computation on the SQL side, but maintain a similar interface compared to the other method.
In this example, I'd would like to do the following instead.
q = session.query(Parent).filter(Parent.id.in_([4,5,10]))
total = q.select_entity_from(Parent.total_for_date_query(datetime.date(2018, 1, 2))).scalar()
#Note idk if "select_entity_from" is what I want here
But I don't know how to fill out the SQL-side method equivalent total_for_date_query. I just can't seem to wrap my head around when to use a Query vs. Selectable, hybrid property expressions vs. hybrid method expressions etc.

Make parent object not appearing within session.dirty of before_flush event listener

I have been playing around with SQLAlchemy and found out that I cannot track reliably what is being changed within database.
I have created an example that explains what my concern is:
import re
import datetime
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import (
declarative_base,
declared_attr,
)
from sqlalchemy import (
create_engine,
event,
Column,
Boolean,
Integer,
String,
Unicode,
DateTime,
Index,
ForeignKey,
CheckConstraint,
)
from sqlalchemy.orm import (
scoped_session,
sessionmaker,
Session,
relationship,
backref,
)
import transaction
from zope.sqlalchemy import ZopeTransactionExtension
class ExtendedSession(Session):
my_var = None
DBSession = scoped_session(
sessionmaker(extension=ZopeTransactionExtension(),
class_=ExtendedSession
)
)
class BaseModel(object):
query = DBSession.query_property()
id = Column(
Integer,
primary_key=True,
)
#declared_attr
def __tablename__(cls):
class_name = re.sub(r"([A-Z])", r"_\1", cls.__name__).lower()[1:]
return "{0}".format(
class_name,
)
Base = declarative_base(cls=BaseModel)
def initialize_sql(engine):
DBSession.configure(bind=engine)
Base.metadata.bind = engine
engine = create_engine("sqlite://")
initialize_sql(engine)
class Parent(Base):
# *** Columns
col1 = Column (
String,
nullable=False,
)
# *** Relationships
# *** Methods
def __repr__(self):
return "<Parent(id: '{0}', col1: '{1}')>".format(
self.id,\
self.col1,\
)
class Child(Base):
# *** Columns
col1 = Column (
String,
nullable=False,
)
parent_id = Column (
Integer,
ForeignKey (
Parent.id,
ondelete="CASCADE",
),
nullable=False,
)
# *** Relationships
parent = relationship (
Parent,
backref=backref(
"child_elements",
uselist=True,
cascade="save-update, delete",
lazy="dynamic",
),
# If below is uncommented then instance of Parent won't appear in session.dirty
# However this relationship will never be loaded (even if needed)
#lazy="noload",
)
# *** Methods
def __repr__(self):
return "<Child(id: '{0}', col1: '{1}', parent_id: '{2}')>".format(
self.id,\
self.col1,\
self.parent_id,\
)
#event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
time_stamp = datetime.datetime.utcnow()
if session.new:
for elem in session.new:
print(" ### NEW {0}".format(repr(elem)))
if session.dirty:
for elem in session.dirty:
print(" ### DIRTY {0}".format(repr(elem)))
if session.deleted:
for elem in session.deleted:
print(" ### DELETED {0}".format(repr(elem)))
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
with transaction.manager:
parent = Parent(col1="parent")
DBSession.add(parent)
DBSession.flush()
# Below loop is to demonstrate that
# each time child object is created and linked to parent
# parent is also marked as modified
# how to avoid that?
# or optionally is it possible to detect this in before_flush event
# without issuing additional SQL query?
for i in range(0, 10):
parent=Parent.query.filter(Parent.col1 == "parent").first()
child = Child(col1="{0}".format(i))
child.parent = parent
DBSession.add(child)
DBSession.flush()
# Below update will not cause associated instance of Parent appearing in session.dirty
child = Child.query.filter(Child.col1=="3").first()
child.col1="updated"
DBSession.add(child)
DBSession.flush()
In short - there are two objects:
Parent
Child - linked to Parent
Each time I add new instance of Child and link it with instance of Parent that instance of Parent also appears within session.dirty of before_flush event.
SQLAlchemy community adviced this behavior is expected (although I think there must be an option to change default behavior - I could not find it within doco)
So here is my question: is it possible to configure relationship such way that when I add a new instance of Child and link it to instance of Parent then that instance of Parent won't appear within session.dirty?
I have tried setting relationship as lazy="noload" and it is not an option since I may need to use that relationship (so I may need to load it)
I would also accept a solution that would allow me to detect that Parent have not been changed within before_load event handler - however I do not want to trigger additional query to achieve this.
I would appreciate your help,
Greg
After hours of research and a hint from SQLAlchemy community I found solution that seems to work the way I need (notice additional condition within session.dirty block).
#event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
time_stamp = datetime.datetime.utcnow()
if session.new:
for elem in session.new:
print(" ### NEW {0}".format(repr(elem)))
if session.dirty:
for elem in session.dirty:
# Below check was added to solve the problem
if ( session.is_modified(elem, include_collections=False) ):
print(" ### DIRTY {0}".format(repr(elem)))
if session.deleted:
for elem in session.deleted:
print(" ### DELETED {0}".format(repr(elem)))
The documentation related to my solution can be found here: http://docs.sqlalchemy.org/en/latest/orm/session_api.html#sqlalchemy.orm.session.Session.is_modified
In short - specifying include_collections=False within session.is_modified makes SQLAlchemy to ignore situations where multivalued collections have been changed (in my case if child was changed then parent would be filtered out by that additional check).

SQLAlchemy update parent when related child changes

I'm trying to model an entity that as one or more one-to-many relationships, such that it's last_modified attribute is updated, when
a child is added or removed
a child is modified
the entity itself is modified
I've put together the following minimal example:
class Config(Base):
__tablename__ = 'config'
ID = Column('ID', Integer, primary_key=True)
name = Column('name', String)
last_modified = Column('last_modified', DateTime, default=now, onupdate=now)
params = relationship('ConfigParam', backref='config')
class ConfigParam(Base):
__tablename__ = 'config_params'
ID = Column('ID', Integer, primary_key=True)
ConfigID = Column('ConfigID', Integer, ForeignKey('config.ID'), nullable=False)
key = Column('key', String)
value = Column('value', Float)
#event.listens_for(Config.params, 'append')
#event.listens_for(Config.params, 'remove')
def receive_append_or_remove(target, value, initiator):
target.last_modified = now()
#event.listens_for(ConfigParam.key, 'set')
#event.listens_for(ConfigParam.value, 'set')
def receive_attr_change(target, value, oldvalue, initiator):
if target.config:
# don't act if the parent config isn't yet set
# i.e. during __init__
target.config.last_modified = now()
This seems to work, but I'm wondering if there's a better way to do this?
Specifically, this becomes very verbose since my actual ConfigParam implementation has more attributes and I'm having multiple one-to-many relations configured on the parent Config class.
Take this with a huge grain of salt, it "seems" to work, could explode:
def rel_listener(t, v, i):
t.last_modified = now()
def listener(t, v, o, i):
if t.config:
t.config.last_modified = now()
from sqlalchemy import inspect
for rel in inspect(Config).relationships:
event.listen(rel, 'append', rel_listener)
event.listen(rel, 'remove', rel_listener)
for col in inspect(ConfigParam).column_attrs:
event.listen(col, 'set', listener)
Problem is that the inspections make no exceptions and columns such as 'ID' and 'ConfigID' will be bound to event listeners.
Another perhaps slightly less tedious form would be to just use a list of attributes to bind events to in a similar fashion:
for attr in ['key', 'value']:
event.listen(getattr(ConfigParam, attr), 'set', listener)
This gives you control over what is bound to events and what is not.

SQLALchemy adjacency list get all parents

Here's an adjacency list example:
class TreeNode(Base):
__tablename__ = 'tree'
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey(id))
name = Column(String(50), nullable=False)
children = relationship("TreeNode",
cascade="all",
backref=backref("parent", remote_side=id)
)
Supposing I've got a simple linear structure:
(0)---->(1)---->(2)---->(3)
How do I get all ancestor nodes of a certain node? Something like node2.parents.all() that returns a list of nodes 0 and 1.
I tried to do this:
parents = relationship("TreeNode", cascade="all", primaryjoin="TreeNode.parent_id==TreeNode.id")
with no luck - it returns children instead of parents.
Thanks.
You can not do it using simple relationship.
If you use MSSQL or Postgresql, instead try to create a (Hybrid) attribute, which would leverage on Query.cte.
Thank you, I'll look it up - for now it seems to be a little bit dark. If someone else's stumbled on this, it's possible to use rather more exprensive thing which still does what I want:
#property
def parents(self):
allparents = []
p = self.parent
while p:
allparents.append(p)
p = p.parent
return allparents

Implement "related items" feature using SQLAlchemy

I need to implement a "related items" feature, i.e. to allow items from the same table to be arbitrarily linked to each other in a many-to-many fashion. Something similar to how news websites show related articles.
Also, I need the relationship to be bi-directional, something like this:
a = Item()
b = Item()
a.related.append(b)
assert a in b.related # True
Now, on SQL level I imagine this could be solved by modifying the "standard" many-to-many relationship so 2 records are inserted into the association table each time an association is made, so (a -> b) and (b -> a) are two separate records.
Alternatively, the join condition for the many-to-many table could somehow check both sides of the association, so roughly instead of ... JOIN assoc ON a.id = assoc.left_id ... SQLAlchemy would produce something like ... JOIN assoc ON a.id = assoc.left_id OR a.id = assoc.right_id ...
Is there a way to configure this with SQLAlchemy so the relation works similar to a "normal" many-to-many relationship?
It's likely that I'm just don't know the correct terminology - everything I came up with - "self-referential", "bidirectional", "association" - is used to describe something else in SQLAlchemy.
Using Attribute Events should do the job. See the sample code below, where little ugly piece of code is solely for the purpose of avoid endless recursion:
class Item(Base):
__tablename__ = "item"
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False)
# relationships
related = relationship('Item',
secondary = t_links,
primaryjoin = (id == t_links.c.from_id),
secondaryjoin = (id == t_links.c.to_id),
)
_OTHER_SIDE = set()
from sqlalchemy import event
def Item_related_append_listener(target, value, initiator):
global _OTHER_SIDE
if not((target, value) in _OTHER_SIDE):
_OTHER_SIDE.add((value, target))
if not target in value.related:
value.related.append(target)
else:
_OTHER_SIDE.remove((target, value))
event.listen(Item.related, 'append', Item_related_append_listener)
# ...
a = Item()
b = Item()
a.related.append(b)
assert a in b.related # True
For completeness sake, here's the code I ended up with; the listener method is slightly different to avoid using a global variable, an also there's a listener for remove event.
import sqlalchemy as sa
related_items = sa.Table(
"related_items",
Base.metadata,
sa.Column("id", sa.Integer, primary_key=True),
sa.Column("from_id", sa.ForeignKey("items.id")),
sa.Column("to_id", sa.ForeignKey("items.id")),
)
class Item(Base):
__tablename__ = 'items'
...
related = sa.orm.relationship('Item',
secondary = related_items,
primaryjoin = (id == related_items.c.from_id),
secondaryjoin = (id == related_items.c.to_id),
)
def item_related_append_listener(target, value, initiator):
if not hasattr(target, "__related_to__"):
target.__related_to__ = set()
target.__related_to__.add(value)
if target not in getattr(value, "__related_to__", set()):
value.related.append(target)
sa.event.listen(Item.related, 'append', item_related_append_listener)
def item_related_remove_listener(target, value, initiator):
if target in value.related:
value.related.remove(target)
sa.event.listen(Item.related, 'remove', item_related_remove_listener)

Categories