SQLAlchemy: How to properly use Relationship()? - python

I went through the docs, and think I've structured everything correctly, but struggling to implement it.
There's two pieces to this: The use case is when I look at a resume, each resume has multiple jobs. Then the sum of all of those jobs determines the value of the entire resume.
I've set up two tables & corresponding classes.
from sqlalchemy import Column, Integer, String
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import relationship
from sqlalchemy import ForeignKey
Base = declarative_base()
engine = create_engine('sqlite:///candidate.db', echo=True)
class Candidate_Evaluation(Base):
__tablename__ = 'candidate_evaluation'
id = Column(Integer, primary_key=True)
label = Column(String)
url = Column(String)
string_of_job_evals = Column(String)
job_evaluation_relationship = relationship("Job_Evaluation", back_populates="candidate_evalution")
def __repr__(self):
"<Candidate(URL = '%s', label = '%s', job evaluations = '%s')>" % (self.url, self.label, self.string_of_job_evals)
class Job_Evaluation(Base):
__tablename__ = 'job_evaluation'
id = Column(Integer, primary_key=True)
candidate_evaluation_id = Column(Integer, ForeignKey('candidate_evaluation.id')) #
details = Column(String)
label = Column(String)
candidate_evaluation_relationship = relationship("Candidate_Evaluation", back_populates='job_evaluation')
def __repr__(self):
"<Job(label = '%s', details = '%s')>" %(self.label, self.details)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
job = Job_Evaluation(label = 'front_end', details = 'javascript jquery html css')
session.add(job)
session.commit()
However, I'm running into a problem when I try to add records to the job_evaluation table. I think it has to do with how I've set up the relationship between them.
The goal is that I can add job evaluation to the database and then link it to the candidate evaluation. It's a Many to One relationship, but the Many comes first. Is that possible?
I'm getting the error:
sqlalchemy.exc.InvalidRequestError: Mapper 'Mapper|Job_Evaluation|job_evaluation' has no property 'candidate_evalution'
What am I doing wrong?

I struggled with this in my early days of using sqlalchemy.
As stated in the docs:
Takes a string name and has the same meaning as backref, except the complementing property is not created automatically, and instead must be configured explicitly on the other mapper. The complementing property should also indicate back_populates to this relationship to ensure proper functioning.
class Candidate_Evaluation(Base):
__tablename__ = 'candidate_evaluation'
id = Column(Integer, primary_key=True)
label = Column(String)
url = Column(String)
string_of_job_evals = Column(String)
job_evaluation_relationship = relationship("Job_Evaluation", backref ="candidate_evalution")
def __repr__(self):
"<Candidate(URL = '%s', label = '%s', job evaluations = '%s')>" % (self.url, self.label, self.string_of_job_evals)
class Job_Evaluation(Base):
__tablename__ = 'job_evaluation'
id = Column(Integer, primary_key=True)
candidate_evaluation_id = Column(Integer, ForeignKey('candidate_evaluation.id')) #
details = Column(String)
label = Column(String)
candidate_evaluation_relationship = relationship("Candidate_Evaluation", backref = job_evaluation')
def __repr__(self):
"<Job(label = '%s', details = '%s')>" %(self.label, self.details)
Just replace back_populates with backref.

Related

Self referencing many-to-many relationship with extra column in association object

I am new in Sqlalchemy and trying to achieve the following goal with relationship():
There is an User table which stores user data.
Every user is able to invite other user with an invite_code.
Every user keeps a list of invitation, every invitation includes the invite_code and the invitee User
I think the relationship between User and Invitation is one-to-many. Since Invitation contains User, then I think it is probably better to use self-referential relationship to represent the inviter-to-invitaions(invitees) relationship and use an association object to store the invite_code.
I checked the sqlalchemy documentation and the question, tried to implement the classed like this:
from sqlalchemy import Column, Integer, ForeignKey, create_engine, String
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Invitation(Base):
__tablename__ = 'invitation'
invite_code = Column(Integer)
inviter_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
invitee_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
invitee = relationship('User') #Need HELP here
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String)
inviters = relationship('User',
secondary='invitation',
primaryjoin=id==Invitation.invitee_id,
secondaryjoin=id==Invitation.inviter_id,
backref='invitees')
invitations = relationship('Invitation')# Need HELP here
def __repr__(self):
return f'User: {self.name}'
if __name__ == '__main__':
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(engine)
db = Session()
inviter1 = User(name='inviter1')
inviter2 = User(name='inviter2')
invitee1= User(name='invitee1')
invitee2 = User(name='invitee2')
inviter1.invitees = [invitee1, invitee2]
inviter2.invitees = [invitee1]
db.add(inviter1)
db.add(inviter2)
db.add(invitee1)
db.add(invitee2)
db.commit()
users = db.query(User).all()
for user in users:
print(user)
print(' Inviter: ', user.inviters)
print(' Invitee: ', user.invitees)
print()
If the lines with comment #Need HELP here are deleted, I can get the corresponding inviters and invitees, but cannot get the invite_code. If the #Need HELP here code are added, the error is:
Exception has occurred: AmbiguousForeignKeysError
Could not determine join condition between parent/child tables on relationship Invitation.invitee - there are multiple foreign key paths linking the tables. Specify the 'foreign_keys' argument, providing a list of those columns which should be counted as containing a foreign key reference to the parent table.
Is there a way to add extra data column in association object like association object for many-to-many relationship for self referential table?
Sorry for the too much text, I didn't find any reference document on the web.
Finally, I figured it out with the help of foreign_keys:
from sqlalchemy import Column, Integer, ForeignKey, create_engine, String
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String)
sent_invitations = relationship('Invitation', foreign_keys='Invitation.inviter_id', back_populates='inviter', cascade='all, delete')
received_invitations=relationship('Invitation', foreign_keys='Invitation.invitee_id', back_populates='invitee', cascade='all, delete')
def __repr__(self):
return f'User: {self.name}'
class Invitation(Base):
__tablename__ = 'invitation'
id = Column(Integer, primary_key=True)
invite_code = Column(Integer)
inviter_id = Column(Integer, ForeignKey('user.id'))
invitee_id = Column(Integer, ForeignKey('user.id'))
inviter=relationship('User', foreign_keys=[inviter_id], back_populates='sent_invitations')
invitee=relationship('User', foreign_keys=[invitee_id], back_populates='received_invitations')
def __repr__(self):
return f'Invitation: {self.inviter} invited {self.invitee} with {self.invite_code}'
if __name__ == '__main__':
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(engine)
db = Session()
inviter1 = User(name='inviter1')
inviter2 = User(name='inviter2')
invitee1= User(name='invitee1')
invitee2 = User(name='invitee2')
invitation1 = Invitation(invite_code=50, inviter=inviter1, invitee=invitee1)
invitation2 = Invitation(invite_code=20, inviter=inviter2, invitee=invitee2)
invitation3 = Invitation(invite_code=22, inviter=inviter1, invitee=inviter2)
invitation4 = Invitation(invite_code=44, inviter=invitee1, invitee=inviter2)
db.add(inviter1)
db.add(inviter2)
db.add(invitee1)
db.add(invitee2)
db.commit()
users = db.query(User).all()
for user in users:
print(user)
print(' sent_invitation: ', user.sent_invitations)
print(' received_invitation: ', user.received_invitations)
print()
invitations = db.query(Invitation).all()
for invitation in invitations:
print(invitation)
db.delete(inviter1)
db.delete(invitee2)
db.commit()

SQLAlchemy 1.4 warnings on overlapping relationships with a many-to-many relationship with association table

I have a model in SQLAlchemy which defines a many-to-many relationship using an association table (automap is being used here because I'm using an existing database):
from sqlalchemy import (Column, Table, MetaData, Integer, Text, LargeBinary,
ForeignKey, Float, Boolean, Index)
from sqlalchemy.ext.automap import automap_base, AutomapBase
from sqlalchemy.orm import Session, deferred, relationship
Base: AutomapBase = automap_base()
class VariantAssociation(Base):
__tablename__ = "sample_variant_association"
vid = Column(Integer, ForeignKey("variants.variant_id"),
primary_key=True)
sid = Column(Integer, ForeignKey("samples.sample_id"),
primary_key=True)
vdepth = Column(Integer)
valt_depth = Column(Integer)
gt = Column(Text)
gt_type = Column(Integer)
fraction = Column(Float)
variant = relationship("Variant", back_populates="samples")
sample = relationship("Sample", back_populates="variants")
__table_args__ = (Index('ix_sample_variant_association_valt_depth',
"valt_depth"),
Index('ix_sample_variant_association_vdepth',
"vdepth"),
Index('ix_sample_variant_association_vid', 'vid'),
Index('ix_sample_variant_association_sid', 'sid'),
Index('ix_sample_variant_association_fraction',
'fraction')
)
class Variant(Base):
__tablename__ = "variants"
variant_id = Column(Integer, primary_key=True)
info = deferred(Column(LargeBinary))
samples = relationship("VariantAssociation",
back_populates="variant")
class Sample(Base):
__tablename__ = "samples"
sample_id = Column(Integer, primary_key=True, index=True)
name = Column(Text, index=True)
variants = relationship("VariantAssociation",
back_populates="sample")
class SampleGenotypeCount(Base):
__tablename__ = 'sample_genotype_counts'
sample_id = Column(Integer, primary_key=True)
num_hom_ref = Column(Integer)
num_het = Column(Integer)
num_hom_alt = Column(Integer)
num_unknown = Column(Integer)
class DataMigration(Base):
__tablename__ = "datamigration"
done = Column(Boolean, primary_key=True)
On querying, this eventually generates this warning:
Query:
query = session.query(Variant).join(
Variant.samples).join(Sample)
Warning:
/usr/local/lib/python3.9/site-packages/sqlalchemy/orm/relationships.py:3441: SAWarning:
relationship 'Variant.variantassociation_collection' will copy column variants.variant_id to
column sample_variant_association.vid, which conflicts with relationship(s): 'Variant.samples'
(copies variants.variant_id to sample_variant_association.vid). If this is not the intention,
consider if these relationships should be linked with back_populates, or if viewonly=True
should be applied to one or more if they are read-only. For the less common case that foreign
key constraints are partially overlapping, the orm.foreign() annotation can be used to isolate
the columns that should be written towards. The 'overlaps' parameter may be used to remove
this warning. (Background on this error at: http://sqlalche.me/e/14/qzyx)
I've been looking through SO and the SQLAlchemy documentation but I was unable to find what could cause this issue since (in my view) the back_populates parameters are in the right places.
Where would the error in the model be? SQLAlchemy 1.3.23 did not generate one, FTR.
In order to set your own relationship names, you need to prevent Automap from generating relationships by iteself. You can achieve this by setting 'generate_relationship' to a function that returns None.
def generate_relationships(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
return None
Base.prepare(generate_relationship=generate_relationships)

Sqlalchemy association proxy and no_autoflush

I'm trying to figure out why I need to use a no_autoflush block when inserting data into an association proxy if the association proxy data has been accessed first. An example of this is bellow (using MySQL):
from sqlalchemy import create_engine, Integer, Column, String, ForeignKey, UniqueConstraint
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship, scoped_session
Base = declarative_base()
engine = create_engine('{}://{}:{}#{}/{}'.format(...))
session_factory = sessionmaker(bind=engine)
Session = scoped_session(session_factory)
class DomainModel(Base):
__tablename__ = 'domains'
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False, unique=True)
domains_to_servers = relationship("DomainServerModel", back_populates="domain")
servers = association_proxy('domains_to_servers', 'server',
creator=lambda s: DomainServerModel(server=s))
class ServerModel(Base):
__tablename__ = 'servers'
id = Column(Integer, primary_key=True)
name = Column(String(128), nullable=False, unique=True, index=True)
domains_to_servers = relationship("DomainServerModel", back_populates="server")
domains = association_proxy('domains_to_servers', 'domain',
creator=lambda d: DomainServerModel(domain=d))
class DomainServerModel(Base):
__tablename__ = 'domains_to_servers'
id = Column(Integer, primary_key=True)
domain_id = Column(Integer, ForeignKey('domains.id'), nullable=False)
server_id = Column(Integer, ForeignKey('servers.id'), nullable=False)
server = relationship('ServerModel', back_populates="domains_to_servers")
domain = relationship('DomainModel', back_populates="domains_to_servers")
def test():
session = Session()
with session.no_autoflush:
s = session.query(ServerModel).filter_by(name='test.com').one()
print(s.domains)
d = DomainModel(name='test1.com')
session.add(d)
session.commit()
s.domains.append(d)
session.commit()
if __name__ == '__main__':
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
session = Session()
session.add(ServerModel(name='test.com'))
session.commit()
test()
I'm trying to add a new domain_to_server mapping via the server/domain association proxy. If I don't access the association proxy first, ie remove the print statement in test(), then I can add the domain without needing the session.no_autoflush block. But with the print statement in there, it will fail without the session.no_autoflush block with an IntegrityError, saying that server_id cannot be null in the domains to servers table.
I'm trying to figure out why the no_autoflush block is needed here. I don't see any mention of it in the association_proxy docs. Is this simply the way it is, and all inserts into an association_proxy should to happen in a no_autoflush bock in case it has been accessed prior to the insert?

SQLAlchemy: order by a relationship field in a relationship

In a Pyramid application I'm working on, I have the following scenario:
class Widget(Base):
__tablename__ = 'widgets'
id = Column(Integer, primary_key=True)
name = Column(String(50))
sidebar = Column(mysql.TINYINT(2))
def __init__(self, name, sidebar):
self.name = name
self.sidebar = sidebar
class Dashboard(Base):
__tablename__ = 'dashboard'
user_id = Column(Integer, ForeignKey('users.id'), primary_key=True)
widget_id = Column(Integer, ForeignKey('widgets.id'), primary_key=True)
delta = Column(mysql.TINYINT)
widget = relationship('Widget')
def __init__(self, user_id, widget_id, delta):
self.user_id = user_id
self.widget_id = widget_id
self.delta = delta
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
login = Column(Unicode(255), unique=True)
password = Column(Unicode(60))
fullname = Column(Unicode(100))
dashboard = relationship('Dashboard', order_by='Dashboard.widget.sidebar, Dashboard.delta')
def __init__(self, login, password, fullname):
self.login = login
self.password = crypt.encode(password)
self.fullname = fullname
So, I want the User 'dashboard' relationship to have the dashboard records for the user but ordered by 'sidebar' (which is a relationship property of Dashboard). Currently I am getting this error:
sqlalchemy.exc.InvalidRequestError: Property 'widget' is not an instance of ColumnProperty (i.e. does not correspond directly to a Column).
Is this ordering possible in a relationship declaration?
Thanks!
With this, try to think what SQL SQLAlchemy should emit when it tries to load User.dashboard. Like SELECT * FROM dashboard JOIN widget ... ORDER BY widget.sidebar ? Or SELECT * FROM dashboard ORDER BY (SELECT sidebar FROM widget... ? ordering the results by a different table is too open-ended of a job for relationship() to decide on it's own. The way this can be done is by providing a column expression in terms of Dashboard that can provide this ordering, when the ORM emits a simple SELECT against dashboard's table, as well as when it refers to it in a not-so-simple SELECT where it might be joining across User, Dashboard tables at once (e.g. eager loading).
We provide custom SQL expressions, particularly those that involve other tables, using column_property(), or alternatively with deferred() when we don't want that expression to be loaded by default (as is likely the case here). Example:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Widget(Base):
__tablename__ = 'widgets'
id = Column(Integer, primary_key=True)
name = Column(String(50))
sidebar = Column(Integer)
class Dashboard(Base):
__tablename__ = 'dashboard'
user_id = Column(Integer, ForeignKey('users.id'), primary_key=True)
widget_id = Column(Integer, ForeignKey('widgets.id'), primary_key=True)
delta = Column(Integer)
widget = relationship('Widget')
widget_sidebar = deferred(select([Widget.sidebar]).where(Widget.id == widget_id))
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
login = Column(Unicode(255), unique=True)
dashboard = relationship('Dashboard', order_by='Dashboard.widget_sidebar, Dashboard.delta')
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
w1, w2 = Widget(name='w1', sidebar=1), Widget(name='w2', sidebar=2)
s.add_all([
User(login='u1', dashboard=[
Dashboard(
delta=1, widget=w1
),
Dashboard(
delta=2, widget=w2
)
]),
])
s.commit()
print s.query(User).first().dashboard
the final SQL emitted by the load of ".dashboard" is:
SELECT dashboard.user_id AS dashboard_user_id, dashboard.widget_id AS dashboard_widget_id, dashboard.delta AS dashboard_delta
FROM dashboard
WHERE ? = dashboard.user_id ORDER BY (SELECT widgets.sidebar
FROM widgets
WHERE widgets.id = dashboard.widget_id), dashboard.delta
Keep in mind that MySQL does a terrible job optimizing for subqueries like the one above. If you need high performance here, you might consider copying the value of "sidebar" into "dashboard", even though that makes consistency more difficult to maintain.

SQLAlchemy declarative property from join (single attribute, not whole object)

I wish to create a mapped attribute of an object which is populated from another table.
Using the SQLAlchemy documentation example, I wish to make a user_name field exist on the Address class such that it can be both easily queried and easily accessed (without a second round trip to the database)
For example, I wish to be able to query and filter by user_name Address.query.filter(Address.user_name == 'wcdolphin').first()
And also access the user_name attribute of all Address objects, without performance penalty, and have it properly persist writes as would be expected of an attribute in the __tablename__
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String(50))
addresses = relation("Address", backref="user")
class Address(Base):
__tablename__ = 'addresses'
id = Column(Integer, primary_key=True)
email = Column(String(50))
user_name = Column(Integer, ForeignKey('users.name'))#This line is wrong
How do I do this?
I found the documentation relatively difficult to understand, as it did not seem to conform to most examples, especially the Flask-SQLAlchemy examples.
You can do this with a join on the query object, no need to specify this attribute directly. So your model would look like:
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey
from sqlalchemy.orm import sessionmaker, relation
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
engine = create_engine('sqlite:///')
Session = sessionmaker(bind=engine)
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String(50))
addresses = relation("Address", backref="user")
class Address(Base):
__tablename__ = 'addresses'
id = Column(Integer, primary_key=True)
email = Column(String(50))
user_id = Column(Integer, ForeignKey("users.id"))
Base.metadata.create_all(engine)
A query after addresses with filtering the username looks like:
>>> session = Session()
>>> session.add(Address(user=User(name='test')))
>>> session.query(Address).join(User).filter(User.name == 'test').first()
<__main__.Address object at 0x02DB3730>
Edit: As you can directly access the user from an address object, there is no need for directly referencing an attribute to the Address class:
>>> a = session.query(Address).join(User).filter(User.name == 'test').first()
>>> a.user.name
'test'
If you truly want Address to have a SQL enabled version of "User.name" without the need to join explicitly, you need to use a correlated subquery. This will work in all cases but tends to be inefficient on the database side (particularly with MySQL), so there is possibly a performance penalty on the SQL side versus using a regular JOIN. Running some EXPLAIN tests may help to analyze how much of an effect there may be.
Another example of a correlated column_property() is at http://docs.sqlalchemy.org/en/latest/orm/mapped_sql_expr.html#using-column-property.
For the "set" event, a correlated subquery represents a read-only attribute, but an event can be used to intercept changes and apply them to the parent User row. Two approaches to this are presented below, one using regular identity map mechanics, which will incur a load of the User row if not already present, the other which emits a direct UPDATE to the row:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base= declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String(50))
addresses = relation("Address", backref="user")
class Address(Base):
__tablename__ = 'addresses'
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('users.id'))
email = Column(String(50))
Address.user_name = column_property(select([User.name]).where(User.id==Address.id))
from sqlalchemy import event
#event.listens_for(Address.user_name, "set")
def _set_address_user_name(target, value, oldvalue, initiator):
# use ORM identity map + flush
target.user.name = value
# use direct UPDATE
#object_session(target).query(User).with_parent(target).update({'name':value})
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add_all([
User(name='u1', addresses=[Address(email='e1'), Address(email='e2')])
])
s.commit()
a1 = s.query(Address).filter(Address.user_name=="u1").first()
assert a1.user_name == "u1"
a1.user_name = 'u2'
s.commit()
a1 = s.query(Address).filter(Address.user_name=="u2").first()
assert a1.user_name == "u2"

Categories