Fetching collection inside a subquery - python

Supposing we have two tables, linked by a many-to-many relationship.
class Student(db.Model):
id = db.Column(UUIDType, primary_key=True)
name = db.Column(db.String(255))
courses = db.relationship('Course',
secondary=student_courses,
backref=db.backref('students'))
class Course(db.Model):
id = db.Column(UUIDType, primary_key=True)
name = db.Column(db.String(255))
I am trying to query the name of the students with the names of the courses s/he is subscribed to using a subquery, but it only shows the name of the first matching course (not all of them). In other words, I would like to retrieve (student_id, student_name, [list of course_names]).
sq = db.session.query(Student.id.label('student_id'),
Course.id.label('course_id'),
Course.name.label('course_name')) \
.join(Student.courses) \
.group_by(Student.id, Course.id).subquery('pattern_links_sq')
db.session.query(Student.id, Student.name, sq.c.course_name) \
.join(Student.courses)
.filter(Student.id == sq.c.student_id).all()

You can use array_agg function in PostgreSQL
from sqlalchemy import func
db.session.query(Student.id, Student.name, func.array_agg(Course.name))\
.join(Student.courses)\
.group_by(Student.id)\
.all()

Related

Return the Table whose primary key is another table's foreign key, SQL Alchemy

I can return the name of the table of which a foreign key references, but I want to do the opposite.
Suppose I have two tables, Users and Address, and Address has a foreign key to Users (one-to-many). I also made this bidirectional so that I can get the User from the address table.
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String)
address = relationship("Address", back_populates="user")
class Address(Base):
__tablename__ = 'address'
id = Column(Integer, primary_key=True, autoincrement=True)
company_name = Column(String)
user_id = Column(Integer, ForeignKey('user.id'))
user = relationship("User", back_populates="address")
If I wanted to figure out the name of the parent table, in a way that works for any other table in a similar relationship, I can to this:
for i in Address.__table__.foreign_key_constraints:
print(i.referred_table)
tab = Table(Address.__tablename__, Base.metadata, autoload_with=engine, extend_existing=True)
for i in tab.foreign_key_constraints:
print(i.referred_table)
#both output "user" which is the __tablename__ of User
This was me accessing the User table using the foreign key constraint attribute. But how do I do the opposite, using an attribute from User to access Address? I mainly want to know so I can handle relationships in a bulk core insert.
tab = Table(User.__tablename__, Base.metadata, autoload_with=engine, extend_existing=True)
records = df.to_dict(orient="records")
insert_stmt = sqlalchemy.dialects.sqlite.insert(tab).values(records)
#list of primary keys
pks = [pk.name for pk in tab.primary_key]
update_columns = {col.name: col for col in insert_stmt.excluded if col.name not in pks}
update_statement = insert_stmt.on_conflict_do_update(index_elements=pks, set_=update_columns)
The above works once I execute the on_conflict_do_update statement, but it does not generate any Address rows.
I found one way of doing it, but it's not as clean as I wanted.
tab = list(inspect(User).relationships)[0].entity.local_table
For any declarative table (classes), you can inspect them to get a Mapper Object. The Mapper Object has relationships as an attribute, which returns a collection of relationships.
inspect(User).relationships
#Get the relationship, note this will return the relationship attribute from the class
list(inspect(User).relationships)[0]
So I then get the mapper/class this relationship belongs to with .entity. Then from the declarative table, a table object is returned with the local_table attribute
entity = list(inspect(User).relationships)[0].entity
tab = entity.local_table
This statement should evaluate to True
tab == Address.__table__

SQLAlchemy filtering Children in one-to-many relationships

I have defined my models as:
class Row(Base):
__tablename__ = "row"
id = Column(Integer, primary_key=True)
key = Column(String(32))
value = Column(String(32))
status = Column(Boolean, default=True)
parent_id = Column(Integer, ForeignKey("table.id"))
class Table(Base):
__tablename__ = "table"
id = Column(Integer, primary_key=True)
name = Column(String(32), nullable=False, unique=True)
rows=relationship("Row", cascade="all, delete-orphan")
to read a table from the db I can simply query Table and it loads all the rows owned by the table. But if I want to filter rows by 'status == True' it does not work. I know this is not a valid query but I want to do something like:
session.query(Table).filter(Table.name == name, Table.row.status == True).one()
As I was not able to make the above query work, I came up with a new solution to query table first without loading any rows, then use the Id to query Rows with filters and then assign the results to the Table object:
table_res = session.query(Table).option(noload('rows')).filter(Table.name == 'test').one()
rows_res = session.query(Row).filter(Row.parent_id == 1, Row.status == True)
table_res.rows = rows_res
But I believe there has to be a better way to do this in one shot. Suggestions?
You could try this SQLAlchemy query:
from sqlalchemy.orm import contains_eager
result = session.query(Table)\
.options(contains_eager(Table.rows))\
.join(Row)\
.filter(Table.name == 'abc', Row.status == True).one()
print(result)
print(result.rows)
Which leads to this SQL:
SELECT "row".id AS row_id,
"row"."key" AS row_key,
"row".value AS row_value,
"row".status AS row_status,
"row".parent_id AS row_parent_id,
"table".id AS table_id,
"table".name AS table_name
FROM "table" JOIN "row" ON "table".id = "row".parent_id
WHERE "table".name = ?
AND "row".status = 1
It does a join but also includes the contains_eager option to do it in one query. Otherwise the rows would be fetched on demand in a second query (you could specify this in the relationship as well, but this is one method of solving it).

Flask-SqlAlchemy Many-To-Many relationship with duplicates allowed

I have an Order - FoodItem Many-To-Many relationship that is as follows:
association_table = db.Table('association', db.Model.metadata,
db.Column('left_id', db.Integer, db.ForeignKey('orders.order_id')),
db.Column('right_id', db.Integer, db.ForeignKey('fooditems.fooditem_id'))
)
class OrderModel(ReviewableModel):
__tablename__ = 'orders'
order_id = db.Column(db.Integer, db.ForeignKey('reviewables.id'), primary_key=True)
food_items = db.relationship("FoodItemModel", secondary = association_table)
__mapper_args__ = {'polymorphic_identity':'orders'}
class FoodItemModel(ReviewableModel):
__tablename__ = 'fooditems'
fooditem_id = db.Column(db.Integer, db.ForeignKey('reviewables.id'), primary_key=True)
__mapper_args__ = {'polymorphic_identity':'fooditems'}
The user can request an order with duplicate foodItems. This is created properly, but when I save the changes to the database, the duplicates are removed. For e.g., I order 3 Pizzas:
def save_to_db(self):
print('before: '+str(self.food_items))
db.session.add(self)
db.session.commit()
print('after: '+str(self.food_items))
The output is like this:
before: [<FoodItemModel u'Pizza'>, <FoodItemModel u'Pizza'>, <FoodItemModel u'Pizza'>]
after: [<FoodItemModel u'Pizza'>]
The association table is updated properly:
"left_id" "right_id"
"6" "3"
"6" "3"
"6" "3"
However, the food_items in the OrderModel only contains 1 item
What Juan Mellado was getting at in his answer is that the relational data (RD) and object relational mapping (ORM) clash: the ORM cannot distinguish separate objects that have the same data. To solve this, simply add an id column as primary key to the association_table - that way the ORM has something to distinguish different records with the same left_id and right_id.
But that would be a workaround and not a solution.
The solution is in thinking about what it means when "The user can request an order with duplicate foodItems". The relation from the order to the food is not direct, it is indirect via an order-item. Each order-item belongs to an order (which in turn belongs to a customer or a dining-table) and each order-item can have a relation with a food item. By making each order-item unique, the problem of "duplicate food-items" disappears. At the same time, we can now have an infinite amount of variations of the food-item by adding an optional "customer request" to each order item. E.g. "food: fries, request: easy on the salt".
Below a demonstration in code where customer "I scream" places 1 order with 3 portions of "ice cream" of which 1 portion is "with sprinkles on top".
from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.pool import StaticPool
Base = declarative_base()
class Order(Base):
__tablename__ = 'order'
id = Column(Integer, primary_key=True)
customer = Column(String(127))
items = relationship("OrderItem")
def __repr__(self):
return "<Order(id='{}', customer='{}', items='{}')>".format(self.id, self.customer, self.items)
class Food(Base):
__tablename__ = 'food'
id = Column(Integer, primary_key=True)
name = Column(String(127))
def __repr__(self):
return "<Food(id='{}', name='{}')>".format(self.id, self.name)
class OrderItem(Base):
__tablename__ = 'order_item'
id = Column(Integer, primary_key=True)
order_id = Column(Integer, ForeignKey(Order.id))
order = relationship(Order)
food_id = Column(Integer, ForeignKey(Food.id))
food = relationship(Food)
comment = Column(String(127))
def __repr__(self):
return "<OrderItem(id='{}', order_id='{}', food_id='{}, comment={}')>" \
.format(self.id, self.order_id, self.food_id, self.comment)
def orderFood():
engine = create_engine('sqlite:///:memory:', echo=True, connect_args={'check_same_thread':False}, poolclass=StaticPool)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
food = Food(name='ice cream')
session.add(food)
order = Order(customer='I scream')
session.add(order)
session.commit()
print("Food: {}".format(food))
print("Order: {}".format(order))
order.items = [OrderItem(order=order, food=food), OrderItem(order=order, food=food), \
OrderItem(order=order, food=food, comment='with sprinkles on top')]
session.merge(order)
session.commit()
print("Order: {}".format(order))
print("Order.items")
for item in order.items:
print(item)
print("OrderItems for order")
orderFilter = OrderItem.order_id == order.id
for order_item in session.query(OrderItem).filter(orderFilter).all():
print(order_item)
print("Food in order")
for row in session.query(Food).join(OrderItem).filter(orderFilter).all():
print(row)
session.close();
if __name__ == "__main__":
orderFood()
You must declare a primary key for the association table.
Flask-SQLALchemy is an ORM, and it needs a series of columns that uniquely identify a row.
Take a look to this part of the documentation, a bit outdated, but still valid:
http://docs.sqlalchemy.org/en/rel_1_1/faq/ormconfiguration.html#faq-mapper-primary-key
Flask-SQLALchemy is using all the fields (left_id, right_id) to identify the rows, and all the rows have the same values (6, 3). So, all the rows are stored in the database (as there are not any declared constraint on it), but only one is retained in the context (memory).

SQLAlchemy ignore filter when related column does not exist

I am having trouble constructing a query using SQLalchemy. Here is a simplified representation of the models I have defined:
Models
Project
class Project(Base):
__tablename__ = 'project'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False, unique=True)
# User associations
users = relationship(
'User',
secondary='user_project_association'
)
User
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False, unique=True)
# Project associations
projects = relationship(
'Project',
secondary='user_project_association'
)
User <-> Project (association)
class UserProjectAssociation(Base):
__tablename__ = 'user_project_association'
# User association.
user_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
user = relationship('User', backref='project_associations')
# Project association.
project_id = Column(Integer, ForeignKey('project.id'), primary_key=True)
project = relationship('Project', backref='user_associations')
Query
I want to perform a query on the projects table such that the result contains information about the projects as well as information about the associated users - if there are any. I am including a filter based on the user name. I am eventually going to send the result as JSON via a REST API so I would prefer the results as python {dict} objects rather than SQLAlchemy objects. The query I am performing looks like:
# Add return fields
query = session.query(
Project.id,
Project.name,
User.id.label('users.id'),
User.name.label('users.name')
)
# Add join statements
query = query.outerjoin(User, Project.users)
# Add filters
query = query.filter(
Project.name == 'proj1',
User.name != 'jane.doe' # <--- I think this is causing the issue.
)
# Execute
results = query.all()
data = [result._asdict() for result in results]
print(data)
Results
The database contains a project called proj1 which doesn't have any associated users. In this particular scenario, I am filtering on a user column and the user association does not exist. However, I am still expecting to get a row for the project in my results but the query returns an empty list. The result I am expecting would look something like this:
[{'id': 1, 'name': 'proj1', 'users.id': None, 'users.name': None}]
Can someone explain where I am going wrong?
You have to account for the NULL values that result from the left join, since != compares values and NULL is the absence of value, so the result of NULL != 'jane.doe' is NULL, not true:
query = query.filter(
Project.name == 'proj1',
or_(User.name == None, User.name != 'jane.doe')
)
Note that SQLAlchemy handles equality with None in a special way and produces IS NULL. If you want to be less ambiguous you could also use User.name.is_(None).

SqlAlchemy Table and Query Issues

Still wrapping my head around SqlAlchemy and have run into a few issues. Not sure if it is because I am creating the relationships incorrectly, querying incorrect, or both.
The general idea is...
one-to-many from location to user (a location can have many users but users can only have one location).
many-to-many between group and user (a user can be a member of many groups and a group can have many members).
Same as #2 above for desc and user.
My tables are created as follows:
Base = declarative_base()
class Location(Base):
__tablename__ = 'location'
id = Column(Integer, primary_key=True)
name = Column(String)
group_user_association_table = Table('group_user_association_table', Base.metadata,
Column('group_id', Integer, ForeignKey('group.id')),
Column('user_id', Integer, ForeignKey('user.id')))
class Group(Base):
__tablename__ = 'group'
id = Column(Integer, primary_key=True)
name = Column(String)
users = relationship('User', secondary=group_user_association_table, backref='group')
desc_user_association_table = Table('desc_user_association', Base.metadata,
Column('desc_id', Integer, ForeignKey('desc.id')),
Column('user_id', Integer, ForeignKey('user.id')))
class Desc(Base):
__tablename__ = 'desc'
id = Column(Integer, primary_key=True)
name = Column(String)
users = relationship('User', secondary=desc_user_association_table, backref='desc')
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
user_name = Column(String)
location_id = Column(Integer, ForeignKey('location.id'))
groups = Column(String, ForeignKey('group.id'))
descs = Column(String, ForeignKey('desc.id'))
location = relationship('Location', backref='user')
Here are some examples as to how I am creating the data (all being scraped from the web):
location = Location(id=city[1], name=city[0]) #city = ('name', id)
profile = User()
profile.id = int(str(span2class[0].a['href'][7:]))
profile.user_name = str(span2class[0].a.img['alt'])
profile.location_id = location.id
g = Group(id=gid, name=str(group.contents[0])) # add the group to the Group table
self.db_session.add(g)
# Now add the gid to a list that will be added to the profile that eventually gets added to the user table
profile.groups.append(str(gid)) # stick the gid into the list
profile.groups = ','.join(profile.groups) # convert list to csv string
# Repeat basically same thing above for desc
self.db_session.add(profile)
self.db_session.commit()
As far as queries go, I've got some of the basic ones working such as:
for instance in db_session.query(User).all():
print instance.id, instance.user_name
But when it comes to performing a join to get (for example) group.id and group.name for a specific user.id... nothing I've tried has worked. I am guessing that the form would be something like the following:
db_session.query(User, Group).join('users').filter(User.id==42)
but that didn't work.
Joins works from left to right, so you should join on the relationship from User to Group:
db_session.query(User, Group).join(User.group).filter(User.id == 42)
But this return you a list of tuples (<User>, <Group>), so if the user belongs to 2 or more groups, you will receive 2 or more rows.
If you really want to load both the user and its groups in one (SQL) query, a better way would be to load a user, but configure query to preload groups:
u = (session.query(User)
.options(joinedload(User.group))
.get(42)
)
print("User = {}".format(u))
for g in u.group:
print(" Group = {}".format(g))

Categories