alembic migrations with sqlmodel attempts to alter primary key colum - python

Given this model:
from typing import Optional
from sqlmodel import SQLModel, Field
class SongBase(SQLModel):
name: str
artist: str = Field(index=False)
#label: Optional[str] = Field(None, index=False)
year: Optional[int] = Field(None, index=False)
class Song(SongBase, table=True):
id: int = Field(default=None, primary_key=True, index=False)
class SongCreate(SongBase):
pass
I create an initial alembic revision using alembic revision --autogenerate -m "init"and then apply it using alembic upgrade head.
Now I uncomment the labelfield, and run alembic revision --autogenerate -m "label".
My migration shows up like this:
revision = '083a8e84f047'
down_revision = 'c1b2ad7d0a39'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('song', sa.Column('label', sqlmodel.sql.sqltypes.AutoString(), nullable=True))
op.alter_column('song', 'id',
existing_type=sa.INTEGER(),
nullable=True,
autoincrement=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('song', 'id',
existing_type=sa.INTEGER(),
nullable=False,
autoincrement=True)
op.drop_column('song', 'label')
# ### end Alembic commands ###
Why is alembic trying to make changes to the id field?
We're trying to evaluate sqlmodel/alembic to see if it's feasable for a production workload and having to hand-wrangle migrations to get rid of these primary key manipulations seems a bit dangerous to me. Am I doing anything wrong to make alembic want to edit my primary key field in this way?
EDIT: For disclosure, the model comes from this article/example: https://github.com/testdrivenio/fastapi-sqlmodel-alembic

Did some more research here, and looked at a few github discussions in the alembic repo. What I think happens, is that the id column doesnt explicitly set nullable=False which alembic seems to require. Then during the initial "migration" it overlooks that fact (idk if that's a bug), which makes the model get out of sync from the get-go. So each migration tries to bring it back into sync.
In any case, the fix seems to be to always explicitly declare the nullable parameter for primary key fields:
class SongBase(SQLModel):
name: str
artist: str
label: str = Field(index=False)
year: Optional[int] = None
class Song(SongBase, table=True):
id: int = Field(default=None, primary_key=True, nullable=False)
class SongCreate(SongBase):
pass

Related

How to implement a "Citation" table? (using SQLModel or SQLAlchemy)

I'm struggling with implementing the concept of "scientific paper citation" in SQL.
I have a table of Papers. Each Paper can cite many other Papers and, vice-versa, it can be cited by many other more.
Here's the code I wrote
class Paper(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
bibliography: List["Citation"] = Relationship(back_populates="citing")
cited_by: List["Citation"] = Relationship(back_populates="cited")
class Citation(SQLModel, table=True):
citing_id: Optional[int] = Field(default=None, primary_key=True, foreign_key="paper.id")
citing: "Paper" = Relationship(back_populates="bibliography")
cited_id: Optional[int] = Field(default=None, primary_key=True, foreign_key="paper.id")
cited: "Paper" = Relationship(back_populates="cited_by")
This is not working:
sqlalchemy.exc.AmbiguousForeignKeysError: Could not determine join condition between parent/child tables on relationship Paper.bibliography - there are multiple foreign key paths linking the tables. Specify the 'foreign_keys' argument, providing a list of those columns which should be counted as containing a foreign key reference to the parent table.
The problem is the fact that I wrote foreign_key="paper.id" twice, but I don't know how to fix it.
To reproduce the error:
I'm using Python 3.10.5;
the only dependency is sqlmodel.
from typing import List
from typing import Optional
from sqlmodel import create_engine
from sqlmodel import Field
from sqlmodel import Relationship
from sqlmodel import Session
from sqlmodel import SQLModel
sqlite_file_name = "database.db"
sqlite_url = f"sqlite:///{sqlite_file_name}"
engine = create_engine(sqlite_url, echo=True)
# class Paper(SQLModel, table=True): ...
# class Citation(SQLModel, table=True): ...
if __name__ == "__main__":
SQLModel.metadata.create_all(engine)
Paper()
I'm using SQLModel, but an answer in SQLAlchemy would be fine as well.
Handling multiple possible JOIN conditions in SQLAlchemy is documented here. The solution is to explicitly pass the foreign_keys argument to your RelationshipProperty constructor.
In this case, you will need to specify that for all four relationships in question.
Since SQLModel currently does not allow to directly pass all available relationship arguments to its constructor (though I am working on a PR for that), you need to utilize the sa_relationship_kwargs parameter.
Here is a working example:
from typing import Optional
from sqlmodel import Field, Relationship, SQLModel
class Paper(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
bibliography: list["Citation"] = Relationship(
back_populates="citing",
sa_relationship_kwargs={"foreign_keys": "Citation.citing_id"},
)
cited_by: list["Citation"] = Relationship(
back_populates="cited",
sa_relationship_kwargs={"foreign_keys": "Citation.cited_id"},
)
class Citation(SQLModel, table=True):
citing_id: Optional[int] = Field(
default=None,
primary_key=True,
foreign_key="paper.id",
)
citing: Paper = Relationship(
back_populates="bibliography",
sa_relationship_kwargs={"foreign_keys": "Citation.citing_id"},
)
cited_id: Optional[int] = Field(
default=None,
primary_key=True,
foreign_key="paper.id",
)
cited: Paper = Relationship(
back_populates="cited_by",
sa_relationship_kwargs={"foreign_keys": "Citation.cited_id"},
)
As a side note, I think in this case it might be even nicer to use an association proxy to have an additional direct link from a paper to all papers it is cited by and citing (without the additional "hop" via the Citation object), but I believe this is currently not possible with SQLModel.

SQLModel Relationship that returns count of related objects

I'm working on a FastAPI application and using SQLModel with a Postgres backend. I have Post objects, each of which can be upvoted by Users. We represent this with a PostUpvote many-to-many relation between Users and Posts. So far, so boring.
from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel
from sqlmodel import Field, Relationship, SQLModel
import uuid as uuid_pkg
def uuid_hex():
return uuid_pkg.uuid4().hex
def PkIdField():
return Field(
default_factory=uuid_hex,
primary_key=True,
index=True,
nullable=False,
)
class PostBase(SQLModel):
title: str
description: str
class Post(PostBase, table=True):
creator_id: str = Field(foreign_key="amp_users.id")
id: str = PkIdField()
created_at: datetime = Field(default_factory=datetime.utcnow, nullable=False)
creator: User = Relationship(back_populates="posts")
upvotes: List["PostUpvote"] = Relationship(back_populates="post")
class UserBase(SQLModel):
email: str
class User(UserBase, table=True):
# "user" table is reserved by postgres
__tablename__ = "app_users"
id: str = PkIdField()
posts: List["Post"] = Relationship(back_populates="creator")
class PostUpvote(SQLModel, table=True):
post: Post = Relationship(back_populates="upvotes")
post_id: str = Field(foreign_key="posts.id", primary_key=True)
user_id: str = Field(foreign_key="app_users.id", primary_key=True)
As you can see, I've set up an upvotes relationship on my Post object, which will give me a list of all the upvotes for that post. But when I'm returning this to the frontend, I don't need or want a list of all the upvotes. I just need the count. Obviously, I can use len(post.updates) to get this, but that still requires us to fetch all the individual upvote objects for that post. So my question is, is there some way to add an upvote_count relationship to my Post object, like so:
class Post(PostBase, table=True):
creator_id: str = Field(foreign_key="amp_users.id")
id: str = PkIdField()
created_at: datetime = Field(default_factory=datetime.utcnow, nullable=False)
creator: User = Relationship(back_populates="posts")
upvotes: List["PostUpvote"] = Relationship(back_populates="post")
upvote_count: int = Relationship(...)
Note that this is using SQLModel's Relationship feature (https://sqlmodel.tiangolo.com/tutorial/relationship-attributes/), not SQLAlchemy relations (though I am running SQLAlchemy under the hood).
If there's some way to provide a custom SQLAlchemy query to the SQLModel relationship, that would solve the problem neatly. But I've not been able to find anything in the SQLModel docs about how to do so. Is this even possible? Or should I just resign myself to doing the query manually?

SAWarning: Object of type <Child> not in session, add operation along 'Parent.children' will not proceed

I'm stuck on this issue and I don't know how to fix it. This is my models.py file:
models.py
class TripDetail(db.Model):
"""
Base class for every table that contains info about a trip.
"""
__abstract__ = True
__bind_key__ = 'instructions'
id = db.Column(db.Integer, primary_key=True)
# list of fields
class Overview(TripDetail):
"""
Class that contains general information about a trip.
"""
__tablename__ = 'overviews'
__table_args__ = (
db.ForeignKeyConstraint(['user_id', 'calendar_id'], ['calendars.user_id', 'calendars.id'], ondelete='CASCADE'),
) # constraints on other tables, omitted here
user_id = db.Column(db.Integer, primary_key=True)
calendar_id = db.Column(db.Integer, primary_key=True)
calendar = db.relationship('Calendar', backref=db.backref('overviews', cascade='delete'), passive_deletes=True)
# other fields
class Step(TripDetail):
__tablename__ = 'steps'
overview_id = db.Column(db.Integer, db.ForeignKey('overviews.id', ondelete='CASCADE'))
overview = db.relationship('Overview', backref=db.backref('steps', cascade='delete'), passive_deletes=True)
# also other fields
And this is how I add items to the DB (the response parameter contains a dict that matches the classes, in such a way that it can be unpacked directly):
def add_instruction(response):
"""
Adds a travel instruction to the database.
"""
steps = response.pop('steps')
overview = Overview(**response)
for step in steps:
Step(overview=overview, **step)
db.session.add(overview)
db.session.commit()
logger.info(f"Stored instruction with PK {(overview.id, overview.user_id, overview.calendar_id, overview.event_id)}")
Now, the overviews table is filled up correctly, but steps stays empty. Inspecting the logs, I receive this warning:
SAWarning: Object of type not in session, add operation along 'Overview.steps' will not proceed
(orm_util.state_class_str(state), operation, prop))
What am I doing wrong?
Normally, when add()ing objects to a session, their related objects will get auto-added like you wanted. That behavior is controlled by the relationship's cascade.
Setting cascade to 'delete' in Steps.overview removes the default 'save-update', which is what turns on the auto-adding. You could just add it back with cascade='save-update, delete', but take a look at the possible traits and see what else you might need. A common set is 'all, delete-orphan'.
And remember these are strictly ORM behaviors; setting a 'delete' in your cascade won't set the column's ON [event] CASCADE.
Well, I've solved this by expliciting adding the created step to the session. Still have no idea what the warning means though, so I'll just leave this here. My fix:
for step in steps:
step = Step(overview=overview, **step) # explicitly add
db.session.add(step)

name 'Text' is not defined in HSTORE Sqlalchemy migration

When I try to add HSTORE to my model and try to upgrade head it throws me NameError: name 'Text' is not defined.
I used PYTHONPATH=. alembic revision --autogenerate -m "Added user_id, admin_id and role to Customer" to create the revision.
Thanks in advance?
Update:
#Daniel Roseman suggested me to add sa.Text(). My question is why it has not been generated automatically during revision
Error:
op.add_column('customers', sa.Column('user_id',
postgresql.HSTORE(text_type=Text()), nullable=True)) NameError: name
'Text' is not defined
Model:
class Customer(Base):
__tablename__ = "customers"
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
img = Column(String, nullable=False)
auth_token = Column(String, nullable=True)
notification_config = Column(JSONB, nullable=True)
admin_id = Column(Integer, nullable=True)
user_id = Column(MutableDict.as_mutable(HSTORE))
Generated Migration revision:
"""Added user_id, admin_id and role to Customer
Revision ID: 1ebe3d18442f
Revises: 88b4dccb5c1e
Create Date: 2017-06-21 17:03:21.181933
"""
# revision identifiers, used by Alembic.
revision = '1ebe3d18442f'
down_revision = '88b4dccb5c1e'
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('customers', sa.Column('admin_id', sa.Integer(), nullable=True))
op.add_column('customers', sa.Column('auth_token', sa.String(), nullable=True))
op.add_column('customers', sa.Column('user_id', postgresql.HSTORE(text_type=Text()), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('customers', 'user_id')
op.drop_column('customers', 'auth_token')
op.drop_column('customers', 'admin_id')
# ### end Alembic commands ###

How do I execute inserts and updates in an Alembic upgrade script?

I need to alter data during an Alembic upgrade.
I currently have a 'players' table in a first revision:
def upgrade():
op.create_table('player',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.Unicode(length=200), nullable=False),
sa.Column('position', sa.Unicode(length=200), nullable=True),
sa.Column('team', sa.Unicode(length=100), nullable=True)
sa.PrimaryKeyConstraint('id')
)
I want to introduce a 'teams' table. I've created a second revision:
def upgrade():
op.create_table('teams',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=80), nullable=False)
)
op.add_column('players', sa.Column('team_id', sa.Integer(), nullable=False))
I would like the second migration to also add the following data:
Populate teams table:
INSERT INTO teams (name) SELECT DISTINCT team FROM players;
Update players.team_id based on players.team name:
UPDATE players AS p JOIN teams AS t SET p.team_id = t.id WHERE p.team = t.name;
How do I execute inserts and updates inside the upgrade script?
What you are asking for is a data migration, as opposed to the schema migration that is most prevalent in the Alembic docs.
This answer assumes you are using declarative (as opposed to class-Mapper-Table or core) to define your models. It should be relatively straightforward to adapt this to the other forms.
Note that Alembic provides some basic data functions: op.bulk_insert() and op.execute(). If the operations are fairly minimal, use those. If the migration requires relationships or other complex interactions, I prefer to use the full power of models and sessions as described below.
The following is an example migration script that sets up some declarative models that will be used to manipulate data in a session. The key points are:
Define the basic models you need, with the columns you'll need. You don't need every column, just the primary key and the ones you'll be using.
Within the upgrade function, use op.get_bind() to get the current connection, and make a session with it.
Or use bind.execute() to use SQLAlchemy's lower level to write SQL queries directly. This is useful for simple migrations.
Use the models and session as you normally would in your application.
"""create teams table
Revision ID: 169ad57156f0
Revises: 29b4c2bfce6d
Create Date: 2014-06-25 09:00:06.784170
"""
revision = '169ad57156f0'
down_revision = '29b4c2bfce6d'
from alembic import op
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Player(Base):
__tablename__ = 'players'
id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String, nullable=False)
team_name = sa.Column('team', sa.String, nullable=False)
team_id = sa.Column(sa.Integer, sa.ForeignKey('teams.id'), nullable=False)
team = orm.relationship('Team', backref='players')
class Team(Base):
__tablename__ = 'teams'
id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String, nullable=False, unique=True)
def upgrade():
bind = op.get_bind()
session = orm.Session(bind=bind)
# create the teams table and the players.team_id column
Team.__table__.create(bind)
op.add_column('players', sa.Column('team_id', sa.ForeignKey('teams.id'), nullable=False)
# create teams for each team name
teams = {name: Team(name=name) for name in session.query(Player.team).distinct()}
session.add_all(teams.values())
# set player team based on team name
for player in session.query(Player):
player.team = teams[player.team_name]
session.commit()
# don't need team name now that team relationship is set
op.drop_column('players', 'team')
def downgrade():
bind = op.get_bind()
session = orm.Session(bind=bind)
# re-add the players.team column
op.add_column('players', sa.Column('team', sa.String, nullable=False)
# set players.team based on team relationship
for player in session.query(Player):
player.team_name = player.team.name
session.commit()
op.drop_column('players', 'team_id')
op.drop_table('teams')
The migration defines separate models because the models in your code represent the current state of the database, while the migrations represent steps along the way. Your database might be in any state along that path, so the models might not sync up with the database yet. Unless you're very careful, using the real models directly will cause problems with missing columns, invalid data, etc. It's clearer to explicitly state exactly what columns and models you will use in the migration.
You can also use direct SQL see (Alembic Operation Reference) as in the following example:
from alembic import op
# revision identifiers, used by Alembic.
revision = '1ce7873ac4ced2'
down_revision = '1cea0ac4ced2'
branch_labels = None
depends_on = None
def upgrade():
# ### commands made by andrew ###
op.execute('UPDATE STOCK SET IN_STOCK = -1 WHERE IN_STOCK IS NULL')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
I recommend using SQLAlchemy core statements using an ad-hoc table, as detailed in the official documentation, because it allows the use of agnostic SQL and pythonic writing and is also self-contained. SQLAlchemy Core is the best of both worlds for migration scripts.
Here is an example of the concept:
from sqlalchemy.sql import table, column
from sqlalchemy import String
from alembic import op
account = table('account',
column('name', String)
)
op.execute(
account.update().\\
where(account.c.name==op.inline_literal('account 1')).\\
values({'name':op.inline_literal('account 2')})
)
# If insert is required
from sqlalchemy.sql import insert
from sqlalchemy import orm
bind = op.get_bind()
session = orm.Session(bind=bind)
data = {
"name": "John",
}
ret = session.execute(insert(account).values(data))
# for use in other insert calls
account_id = ret.lastrowid

Categories