Scrapy - SQLalchemy Foreign Key not created in SQLite - python

I tried to run Scrapy using itemLoader to collect all the data and put them into SQLite 3. I am success in gathering all the info I wanted but I cannot get the foreign keys to be generated in my ThreadInfo and PostInfo tables using back_populates with foreign key. I did try with back_ref but it also did not work.
All the other info was inserted to SQLite database after my Scrapy finished.
My goal is to have four tables, boardInfo, threadInfo, postInfo, and authorInfo linked to each others.
boardInfo will have one-to-many relationship with threadInfo
threadInfo will have one-to-many relationship with postInfo
authorInfo will have one-to-many relationship with threadInfo and
postInfo.
I used DB Browser for SQLite and found that the values of my foreign keys are Null.
I tried query for the value (threadInfo.boardInfos_id), and it displayed None. I try to fix this for many days and read through the document but cannot solve the issue.
How can I have the foriegn keys generated in my threadInfo and postInfo tables?
Thank you for all guidances and comments.
Here is my models.py
from sqlalchemy import create_engine, Column, Table, ForeignKey, MetaData
from sqlalchemy import Integer, String, Date, DateTime, Float, Boolean, Text
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from scrapy.utils.project import get_project_settings
Base = declarative_base()
def db_connect():
'''
Performs database connection using database settings from settings.py.
Returns sqlalchemy engine instance
'''
return create_engine(get_project_settings().get('CONNECTION_STRING'))
def create_table(engine):
Base.metadata.create_all(engine)
class BoardInfo(Base):
__tablename__ = 'boardInfos'
id = Column(Integer, primary_key=True)
boardName = Column('boardName', String(100))
threadInfosLink = relationship('ThreadInfo', back_populates='boardInfosLink') # One-to-Many with threadInfo
class ThreadInfo(Base):
__tablename__ = 'threadInfos'
id = Column(Integer, primary_key=True)
threadTitle = Column('threadTitle', String())
threadLink = Column('threadLink', String())
threadAuthor = Column('threadAuthor', String())
threadPost = Column('threadPost', Text())
replyCount = Column('replyCount', Integer)
readCount = Column('readCount', Integer)
boardInfos_id = Column(Integer, ForeignKey('boardInfos.id')) # Many-to-One with boardInfo
boardInfosLink = relationship('BoardInfo', back_populates='threadInfosLink') # Many-to-One with boardInfo
postInfosLink = relationship('PostInfo', back_populates='threadInfosLink') # One-to-Many with postInfo
authorInfos_id = Column(Integer, ForeignKey('authorInfos.id')) # Many-to-One with authorInfo
authorInfosLink = relationship('AuthorInfo', back_populates='threadInfosLink') # Many-to-One with authorInfo
class PostInfo(Base):
__tablename__ = 'postInfos'
id = Column(Integer, primary_key=True)
postOrder = Column('postOrder', Integer, nullable=True)
postAuthor = Column('postAuthor', Text(), nullable=True)
postContent = Column('postContent', Text(), nullable=True)
postTimestamp = Column('postTimestamp', Text(), nullable=True)
threadInfos_id = Column(Integer, ForeignKey('threadInfos.id')) # Many-to-One with threadInfo
threadInfosLink = relationship('ThreadInfo', back_populates='postInfosLink') # Many-to-One with threadInfo
authorInfos_id = Column(Integer, ForeignKey('authorInfos.id')) # Many-to-One with authorInfo
authorInfosLink = relationship('AuthorInfo', back_populates='postInfosLink') # Many-to-One with authorInfo
class AuthorInfo(Base):
__tablename__ = 'authorInfos'
id = Column(Integer, primary_key=True)
threadAuthor = Column('threadAuthor', String())
postInfosLink = relationship('PostInfo', back_populates='authorInfosLink') # One-to-Many with postInfo
threadInfosLink = relationship('ThreadInfo', back_populates='authorInfosLink') # One-to-Many with threadInfo
Here is my pipelines.py
from sqlalchemy import exists, event
from sqlalchemy.orm import sessionmaker
from scrapy.exceptions import DropItem
from .models import db_connect, create_table, BoardInfo, ThreadInfo, PostInfo, AuthorInfo
from sqlalchemy.engine import Engine
from sqlite3 import Connection as SQLite3Connection
import logging
#event.listens_for(Engine, "connect")
def _set_sqlite_pragma(dbapi_connection, connection_record):
if isinstance(dbapi_connection, SQLite3Connection):
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON;")
# print("####### PRAGMA prog is running!! ######")
cursor.close()
class DuplicatesPipeline(object):
def __init__(self):
'''
Initializes database connection and sessionmaker.
Creates tables.
'''
engine = db_connect()
create_table(engine)
self.Session = sessionmaker(bind=engine)
logging.info('****DuplicatesPipeline: database connected****')
def process_item(self, item, spider):
session = self.Session()
exist_threadLink = session.query(exists().where(ThreadInfo.threadLink == item['threadLink'])).scalar()
exist_thread_replyCount = session.query(ThreadInfo.replyCount).filter_by(threadLink = item['threadLink']).scalar()
if exist_threadLink is True: # threadLink is in DB
if exist_thread_replyCount < item['replyCount']: # check if replyCount is more?
return item
session.close()
else:
raise DropItem('Duplicated item found and replyCount is not changed')
session.close()
else: # New threadLink to be added to BoardPipeline
return item
session.close()
class BoardPipeline(object):
def __init__(self):
'''
Initializes database connection and sessionmaker
Creates tables
'''
engine = db_connect()
create_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
'''
Save scraped info in the database
This method is called for every item pipeline component
'''
session = self.Session()
# Input info to boardInfos
boardInfo = BoardInfo()
boardInfo.boardName = item['boardName']
# Input info to threadInfos
threadInfo = ThreadInfo()
threadInfo.threadTitle = item['threadTitle']
threadInfo.threadLink = item['threadLink']
threadInfo.threadAuthor = item['threadAuthor']
threadInfo.threadPost = item['threadPost']
threadInfo.replyCount = item['replyCount']
threadInfo.readCount = item['readCount']
# Input info to postInfos
# Due to info is in list, so we have to loop and add it.
for num in range(len(item['postOrder'])):
postInfoNum = 'postInfo' + str(num)
postInfoNum = PostInfo()
postInfoNum.postOrder = item['postOrder'][num]
postInfoNum.postAuthor = item['postAuthor'][num]
postInfoNum.postContent = item['postContent'][num]
postInfoNum.postTimestamp = item['postTimestamp'][num]
session.add(postInfoNum)
# Input info to authorInfo
authorInfo = AuthorInfo()
authorInfo.threadAuthor = item['threadAuthor']
# check whether the boardName exists
exist_boardName = session.query(exists().where(BoardInfo.boardName == item['boardName'])).scalar()
if exist_boardName is False: # the current boardName does not exists
session.add(boardInfo)
# check whether the threadAuthor exists
exist_threadAuthor = session.query(exists().where(AuthorInfo.threadAuthor == item['threadAuthor'])).scalar()
if exist_threadAuthor is False: # the current threadAuthor does not exists
session.add(authorInfo)
try:
session.add(threadInfo)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item

From the code I can see, it doesn't look to me like you are setting ThreadInfo.authorInfosLink or ThreadInfo.authorInfos_id anywhere (the same goes for all of your FK/relationships).
For the related objects to be attached to a ThreadInfo instance, you need to create them and then attach them something like:
# Input info to authorInfo
authorInfo = AuthorInfo()
authorInfo.threadAuthor = item['threadAuthor']
threadInfo.authorInfosLink = authorInfo
You probably don't want to session.add() each object if it's related via FK. You'll want to:
instantiate a BoardInfo object bi
then instantiate attach your related ThreadInfo object ti
attach your the related object eg bi.threadInfosLink = ti
At the end of all of your chained relationships, you can simply add bi to the session using session.add(bi) -- all of the related objects will be added through their relationships and the FKs will be correct.

Per the discussion in the comments of my other answer, below is how I would rationalize your models to make them make more sense to me.
Notice:
I have removed the unnecessary "Info" everywhere
I have removed explicit column names from your model definitions and will rely instead on SQLAlchemy's ability to infer those for me based on my attribute names
In a "Post" object I do not name the attribute PostContent, it's implied that the content relates to the Post because that's how we're accessing it -- instead simply call the attribute "Post"
I've removed all "Link" terminology -- in places where I think you want a reference to a collection of related objects I've provided a plural attribute of that object as the relationship.
I've left a line in the Post model for you to remove. As you can see, you don't need "author" twice -- once as a related object and once on the Post, that defeats the purpose of the FKs.
With these changes, when you attempt to use these models from your other code it becomes obvious where you need to use .append() and where you simply assign the related object. For a given Board object you know that 'threads' is a collection just based on the attribute name, so you're going to do something like b.threads.append(thread)
from sqlalchemy import create_engine, Column, Table, ForeignKey, MetaData
from sqlalchemy import Integer, String, Date, DateTime, Float, Boolean, Text
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
class Board(Base):
__tablename__ = 'board'
id = Column(Integer, primary_key=True)
name = Column(String(100))
threads = relationship(back_populates='board')
class Thread(Base):
__tablename__ = 'thread'
id = Column(Integer, primary_key=True)
title = Column(String())
link = Column(String())
author = Column(String())
post = Column(Text())
reply_count = Column(Integer)
read_count = Column(Integer)
board_id = Column(Integer, ForeignKey('Board.id'))
board = relationship('Board', back_populates='threads')
posts = relationship('Post', back_populates='threads')
author_id = Column(Integer, ForeignKey('Author.id'))
author = relationship('Author', back_populates='threads')
class Post(Base):
__tablename__ = 'post'
id = Column(Integer, primary_key=True)
order = Column(Integer, nullable=True)
author = Column(Text(), nullable=True) # remove this line and instead use the relationship below
content = Column(Text(), nullable=True)
timestamp = Column(Text(), nullable=True)
thread_id = Column(Integer, ForeignKey('Thread.id'))
thread = relationship('Thread', back_populates='posts')
author_id = Column(Integer, ForeignKey('Author.id'))
author = relationship('Author', back_populates='posts')
class AuthorInfo(Base):
__tablename__ = 'author'
id = Column(Integer, primary_key=True)
name = Column(String())
posts = relationship('Post', back_populates='author')
threads = relationship('Thread', back_populates='author')

Related

Self referencing many-to-many relationship with extra column in association object

I am new in Sqlalchemy and trying to achieve the following goal with relationship():
There is an User table which stores user data.
Every user is able to invite other user with an invite_code.
Every user keeps a list of invitation, every invitation includes the invite_code and the invitee User
I think the relationship between User and Invitation is one-to-many. Since Invitation contains User, then I think it is probably better to use self-referential relationship to represent the inviter-to-invitaions(invitees) relationship and use an association object to store the invite_code.
I checked the sqlalchemy documentation and the question, tried to implement the classed like this:
from sqlalchemy import Column, Integer, ForeignKey, create_engine, String
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Invitation(Base):
__tablename__ = 'invitation'
invite_code = Column(Integer)
inviter_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
invitee_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
invitee = relationship('User') #Need HELP here
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String)
inviters = relationship('User',
secondary='invitation',
primaryjoin=id==Invitation.invitee_id,
secondaryjoin=id==Invitation.inviter_id,
backref='invitees')
invitations = relationship('Invitation')# Need HELP here
def __repr__(self):
return f'User: {self.name}'
if __name__ == '__main__':
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(engine)
db = Session()
inviter1 = User(name='inviter1')
inviter2 = User(name='inviter2')
invitee1= User(name='invitee1')
invitee2 = User(name='invitee2')
inviter1.invitees = [invitee1, invitee2]
inviter2.invitees = [invitee1]
db.add(inviter1)
db.add(inviter2)
db.add(invitee1)
db.add(invitee2)
db.commit()
users = db.query(User).all()
for user in users:
print(user)
print(' Inviter: ', user.inviters)
print(' Invitee: ', user.invitees)
print()
If the lines with comment #Need HELP here are deleted, I can get the corresponding inviters and invitees, but cannot get the invite_code. If the #Need HELP here code are added, the error is:
Exception has occurred: AmbiguousForeignKeysError
Could not determine join condition between parent/child tables on relationship Invitation.invitee - there are multiple foreign key paths linking the tables. Specify the 'foreign_keys' argument, providing a list of those columns which should be counted as containing a foreign key reference to the parent table.
Is there a way to add extra data column in association object like association object for many-to-many relationship for self referential table?
Sorry for the too much text, I didn't find any reference document on the web.
Finally, I figured it out with the help of foreign_keys:
from sqlalchemy import Column, Integer, ForeignKey, create_engine, String
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String)
sent_invitations = relationship('Invitation', foreign_keys='Invitation.inviter_id', back_populates='inviter', cascade='all, delete')
received_invitations=relationship('Invitation', foreign_keys='Invitation.invitee_id', back_populates='invitee', cascade='all, delete')
def __repr__(self):
return f'User: {self.name}'
class Invitation(Base):
__tablename__ = 'invitation'
id = Column(Integer, primary_key=True)
invite_code = Column(Integer)
inviter_id = Column(Integer, ForeignKey('user.id'))
invitee_id = Column(Integer, ForeignKey('user.id'))
inviter=relationship('User', foreign_keys=[inviter_id], back_populates='sent_invitations')
invitee=relationship('User', foreign_keys=[invitee_id], back_populates='received_invitations')
def __repr__(self):
return f'Invitation: {self.inviter} invited {self.invitee} with {self.invite_code}'
if __name__ == '__main__':
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(engine)
db = Session()
inviter1 = User(name='inviter1')
inviter2 = User(name='inviter2')
invitee1= User(name='invitee1')
invitee2 = User(name='invitee2')
invitation1 = Invitation(invite_code=50, inviter=inviter1, invitee=invitee1)
invitation2 = Invitation(invite_code=20, inviter=inviter2, invitee=invitee2)
invitation3 = Invitation(invite_code=22, inviter=inviter1, invitee=inviter2)
invitation4 = Invitation(invite_code=44, inviter=invitee1, invitee=inviter2)
db.add(inviter1)
db.add(inviter2)
db.add(invitee1)
db.add(invitee2)
db.commit()
users = db.query(User).all()
for user in users:
print(user)
print(' sent_invitation: ', user.sent_invitations)
print(' received_invitation: ', user.received_invitations)
print()
invitations = db.query(Invitation).all()
for invitation in invitations:
print(invitation)
db.delete(inviter1)
db.delete(invitee2)
db.commit()

sqlalchemy: AttributeError: type object 'customer' has no attribute 'invoices'

I am new to sqlalchemy. I can create database tables by declarative mapping like this:
engine = create_engine("--engine works---")
Base = declarative_base()
class Customer(Base):
__tablename__ = 'customer'
customer_id = Column(Integer, primary_key=True)
name = Column(String(30))
email = Column(String(30))
invoices = relationship(
'Invoice',
order_by="Invoice.inv_id",
back_populates='customer',
cascade="all, delete, delete-orphan"
)
class Invoice(Base):
__tablename__ = 'invoice'
inv_id = Column(Integer, primary_key=True)
name = Column(String(30))
created = Column(Date)
customer_id = Column(ForeignKey('customer.customer_id'))
customer = relationship('Customer', back_populates='invoices')
Base.metadata.create_all(engine)
This is fine. I added some data into both customer and invoice tables.
So far so good. Next, I would try out automap_base on this existing database like this:
from sqlalchemy import select, text
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.ext.automap import automap_base
engine = create_engine('--engine works---')
Base = automap_base()
# reflect
Base.prepare(engine, reflect=True)
Customer = Base.classes.customer
Invoice = Base.classes.invoice
Session = sessionmaker(bind=engine, future=True)
session = Session()
# query invoice
stmt = select(Customer, Invoice).join(Customer.invoices).order_by(Customer.customer_id, Invoice.inv_id)
res = session.execute(stmt)
for c in res:
print(c.customer_id)
When I ran the code, I got:
AttributeError: type object 'customer' has no attribute 'invoices'
What did I miss for the relationship on the Customer (one side) or Invoice (many side) in this case so that when I query for customers with its invoices attibute and for invoices with customer attribute? Thanks for any help.
By default, automap will create the relation in the parent by appending "_collection" the lower-cased classname, so the name will be Customer.invoice_collection.
While answering this, I found that the join would raise an AttributeError on Customer.invoice_collection unless I performed a query on Customer beforehand, for example
session.execute(sa.select(Customer).where(False))
I'm not sure why that happens, however you don't necessarily need the join as you can iterate over Customer.invoice_collection directly, or join against the invoice table:
stmt = sa.select(Customer, Invoice).join(Invoice)
res = session.execute(stmt)
for c, i in res:
print(c.customer_id, i.inv_id)

SQLAlchemy listen for attribute change in many-to-many relationship and change other attributes of initiator

I'm new to the SQLAlchemy and I wrote a simple CRUD database layout like this: I have three tables, Customer, Product, and Template. The idea is this: Each customer can have a template of the products he usually orders. When I fetch a particular customer from the database, his template along with all the products should be fetched as well. So I have a one-to-one relationship (customer-template) and one many-to-many relationship (template-product). Now, a template should contain fields such as quantity of a particular product, along with its net, gross and tax values. I'd like to have a listener on the quantity column, such that when the quantity of a particular product is changed, other attributes I mentioned will be changed too. So the code I wrote is as follows (please, if you can, also verify whether all the relationships are written appropriately)
from sqlalchemy import event
from sqlalchemy.orm import relationship, exc, column_property, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.query import Query as _Query
from sqlalchemy import Table, Column, Integer, String, Boolean, ForeignKey, UniqueConstraint, create_engine, Numeric
from decimal import *
# decimal operations settings
getcontext().prec = 6
getcontext().rounding = ROUND_HALF_UP
engine = create_engine('sqlite:///test.db')
Base = declarative_base()
# Initalize the database if it is not already.
Base.metadata.create_all(engine)
# Create a session to handle updates.
Session = sessionmaker(bind=engine)
# customer has a one-to-one relationship with template, where customer is the parent
class Customer(Base):
__tablename__ = "customers"
id = Column(Integer, primary_key=True)
alias = Column(String)
firm_name = Column(String)
last_name = Column(String)
first_name = Column(String)
tax_id = Column(String, nullable=False)
address = Column(String)
postal_code = Column(String)
city = Column(String)
payment = Column(Boolean)
template = relationship("Template", uselist=False, back_populates="customer")
# product has many-to-many relationship with template, where template is the parent
class Product(Base):
__tablename__ = "products"
id = Column(Integer, primary_key=True)
product_name = Column(String, nullable=False)
symbol = Column(String)
unit = Column(String, nullable=False)
unit_net_price = Column(Numeric, nullable=False)
vat_rate = Column(Numeric, nullable=False)
per_month = Column(Boolean, nullable=False)
# association table for the products-template many-to-many relationship
association_table = Table('association', Base.metadata,
Column('product_id', Integer, ForeignKey('product.id')),
Column('template_id', Integer, ForeignKey('template.id'))
)
# template implements one-to-one relationship with customer and many-to-many relationship with product
class Template(Base):
id = Column(Integer, primary_key=True)
customer_id = Column(Integer, ForeignKey("customer.id"))
customer = relationship("Customer", back_populates="template")
products = relationship("Product", secondary=association_table)
quantity = Column(Numeric)
net_val = Column(Numeric)
tax_val = Column(Numeric)
gross_val = Column(Numeric)
# TODO: implement constructor
def __init__(self, **kwargs):
self.quantity = Decimal(0.0)
self.net_val = Decimal(0.0)
self.tax_val = Decimal(0.0)
self.gross_val = Decimal(0.0)
#event.listens_for(Template.quantity, "set")
def quantity_listener(target, value, oldvalue, initiator):
print(target)
print(initiator)
# target.net_val =
# target.tax_val =
# target.gross_val =
Now, I'm unsure how should I get a particular initiator and set its values, since products in Template table is a list (?)
I'd probably do it like this. It's hard to use Sqlalchemy orm in such cases as you can't access the Session object we all got used to use.
#event.listens_for(Template, "after_update")
def quantity_listener(mapper, connection, target):
field = 'quantity'
added, _, deleted = get_history(target, field)
# added is a new values of the specified field
# deleted is the old values
# so we basically check if 'quantity' has changed
# its a tuple btw
if added and deleted and added[0] != deleted[0]:
stmt = Template.update(). \
values(Template.something=Template.something * Template.other_something). \
where(Template.id == target.id)
conn.execute(stmt)
# here goes another updates for products or you can have another
# listener for them
There might be a better way to accomplish this. I can't debug this right now and I can't manage to get your examples working. Hope it helps.

Python / SQLAlchemy: How to delete records IN allocation tables?

below I have brought you an executable program. There are comments in this program to make the situation easier to understand. Please read the comments. Well What I want? I want the porgram to delete/remove only record(s) in an allocation table (Allocation_Film_Genre) - not rows in the tables like Film or Genre. As you can see, in the example I assigned the movie "Saw" to the genre "Comedy". A (deliberate) mistake. Now I want to resolve this constellation, but I don't want to delete "comedy" and "Saw" from the database, but only the assignment. But how?
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, backref
sqlite_url = 'sqlite:///test.sqlite'
engine = sqlalchemy.create_engine(sqlite_url, echo = True)
Base = declarative_base()
class Allocation_Film_Genre(Base):
__tablename__ = 'allocation_film_genre'
genre_id = Column(Integer, ForeignKey('genre.id'), primary_key=True)
film_id = Column(Integer, ForeignKey('film.id'), primary_key=True)
genre = relationship("Genre", backref=backref("allocation_film_genre", lazy='dynamic', cascade="all, delete-orphan" ))
film = relationship("Film", backref=backref("allocation_film_genre", lazy='dynamic', cascade="all, delete-orphan" ))
class Film(Base):
__tablename__ = 'film'
id = Column(Integer, primary_key=True, unique=True)
name = Column(String(80))
class Genre(Base):
__tablename__ = 'genre'
id = Column(Integer, primary_key=True, unique=True)
name = Column(String(80))
# Let us create all tables with certain columns
Base.metadata.create_all(engine)
# Now we have to create a session to work with.
Session = sessionmaker(bind=engine)
session = Session()
# We want to save some movies
film1 = Film(name="Saw")
film2 = Film(name="Amageddon")
film3 = Film(name="Little Stuart")
film4 = Film(name="Doom Day")
session.add_all([film1, film2, film3, film4])
session.commit()
# By the way we also want to save some genres
genre1 = Genre( name = "Horror")
genre2 = Genre( name = "Comedy")
genre3 = Genre( name = "Psycho")
genre4 = Genre( name = "Thriller")
session.add_all([genre1, genre2, genre3, genre4])
session.commit()
# Hold on, we know we created an allocation table, because
# one movie can contains one or more genre, otherwise, one genre
# also can contains one or more movie, right? Let take us a look.
# For simulate we use the movie named 'Saw".
film_obj1 = session.query(Film).filter(Film.name=="Saw").one()
genre_obj1 = session.query(Genre).filter(Genre.name=="Horror").one()
film_obj2 = session.query(Film).filter(Film.name=="Saw").one()
genre_obj2 = session.query(Genre).filter(Genre.name=="Psycho").one()
film_obj3 = session.query(Film).filter(Film.name=="Saw").one()
genre_obj3 = session.query(Genre).filter(Genre.name=="Comedy").one()
allocation1 = Allocation_Film_Genre(film=film_obj1, genre=genre_obj1)
allocation2 = Allocation_Film_Genre(film=film_obj2, genre=genre_obj2)
allocation3 = Allocation_Film_Genre(film=film_obj3, genre=genre_obj3)
session.add_all([allocation1, allocation2, allocation3])
session.commit()
# Ok, we are done. Alle movies and genre are saved, and we also saved all
# allocation records. But wait! There is a mistake. Saw isn't a comedy. Damn!
# Shame on me!
# And now, I don't know what I have to do.
from sqlalchemy import and_
allocation_obj_to_delete _list=session.query(Allocation_Film_Genre).join(Film).join(Genre).filter(and_(Film.name=='Saw',Genre.name=="Comedy")).all()
for obj in allocation_obj_to_delete:
session.delete(obj)
session.commit()
Joined query and delete the objects

Sqlalchemy association proxy and no_autoflush

I'm trying to figure out why I need to use a no_autoflush block when inserting data into an association proxy if the association proxy data has been accessed first. An example of this is bellow (using MySQL):
from sqlalchemy import create_engine, Integer, Column, String, ForeignKey, UniqueConstraint
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship, scoped_session
Base = declarative_base()
engine = create_engine('{}://{}:{}#{}/{}'.format(...))
session_factory = sessionmaker(bind=engine)
Session = scoped_session(session_factory)
class DomainModel(Base):
__tablename__ = 'domains'
id = Column(Integer, primary_key=True)
name = Column(String(255), nullable=False, unique=True)
domains_to_servers = relationship("DomainServerModel", back_populates="domain")
servers = association_proxy('domains_to_servers', 'server',
creator=lambda s: DomainServerModel(server=s))
class ServerModel(Base):
__tablename__ = 'servers'
id = Column(Integer, primary_key=True)
name = Column(String(128), nullable=False, unique=True, index=True)
domains_to_servers = relationship("DomainServerModel", back_populates="server")
domains = association_proxy('domains_to_servers', 'domain',
creator=lambda d: DomainServerModel(domain=d))
class DomainServerModel(Base):
__tablename__ = 'domains_to_servers'
id = Column(Integer, primary_key=True)
domain_id = Column(Integer, ForeignKey('domains.id'), nullable=False)
server_id = Column(Integer, ForeignKey('servers.id'), nullable=False)
server = relationship('ServerModel', back_populates="domains_to_servers")
domain = relationship('DomainModel', back_populates="domains_to_servers")
def test():
session = Session()
with session.no_autoflush:
s = session.query(ServerModel).filter_by(name='test.com').one()
print(s.domains)
d = DomainModel(name='test1.com')
session.add(d)
session.commit()
s.domains.append(d)
session.commit()
if __name__ == '__main__':
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
session = Session()
session.add(ServerModel(name='test.com'))
session.commit()
test()
I'm trying to add a new domain_to_server mapping via the server/domain association proxy. If I don't access the association proxy first, ie remove the print statement in test(), then I can add the domain without needing the session.no_autoflush block. But with the print statement in there, it will fail without the session.no_autoflush block with an IntegrityError, saying that server_id cannot be null in the domains to servers table.
I'm trying to figure out why the no_autoflush block is needed here. I don't see any mention of it in the association_proxy docs. Is this simply the way it is, and all inserts into an association_proxy should to happen in a no_autoflush bock in case it has been accessed prior to the insert?

Categories