I'm trying to translate the following query to sqlAlchemy and can't seem to figure it out (I'm not even far yet):
"SELECT time, version_id FROM ( \
SELECT \
time, \
version_id, \
LAG(software_version_id) OVER (ORDER BY time) as previous_version_id \
FROM device_checkins WHERE device_id = 001 AND time BETWEEN '2020-01-01' AND '2021-04-01') tt \
WHERE previous_version_id IS NULL OR version_id != previous_version_id;"
As far as I could figure out, I need the select function that sqlAlchemy provides but I'm running into trouble.
Of course, in the python representation, we have a DeviceCheckin model with all the fields that are used here. I'd love all the help you might be able to provide.
class DeviceCheckin(ModelBase):
__tablename__ = "device_checkins"
time = db.Column(DateTimeUtc(), nullable=False)
device_id = db.Column(sa.BigInteger, nullable=False)
device = db.relationship(...)
software_version_id = db.Column(...)
software_version = db.relationship(...)
You could draw up the subquery expression as follows:
import sqlalchemy as sa
#...
entities = [
DeviceCheckin.time,
DeviceCheckin.version_id,
sa.func.lag(
DeviceCheckin.software_version_id
).over(
order_by=DeviceChecking.time
).label('previous_version_id')
]
condition = (
(DeviceCheckin.device_id == '001')
& (DeviceCheckin.time.between('2020-01-01', '2021-04-01'))
)
subq = DeviceCheckin.query.with_entities(entities).filter(condition).subquery()
Then select from it in the following manner:
condition = (
subq.c.previous_version_id.is_(None)
| (subq.c.version_id != subq.c.previous_version_id)
)
entities = [subq.c.time, subq.c.version_id]
query = subq.select(entities).where(condition)
results = db.session.execute(query)
Related
I am trying something really simple, but I cannot find the proper way to do it in any of the sqlalchemy orm tutorials I can find. I want to do the equivalent of the following from Adonisjs:
Database.query('SELECT * FROM tbl WHERE user = ? AND age = ?', ['Tester', 18])
How do I do parameters in the below sqlalchemy python code? What am I doing wrong?
from sqlalchemy.orm import Session
engine = create_engine("postgresql+psycopg2://test:test#localhost:5432/test", echo=False, future=True)
session = Session(engine)
sql = select(User).where(User.first_name == 'Tester').where(User.age == 18)
user = session.execute(sql)
So instead of User.first_name == 'Tester', I'd like it to be a binding placeholder. Same goes for User.age == 18. Then is session.execute(sql) I'd like to add the bindings. Is there a way to do this, or am I approaching this the incorrect way? I want to use orm, so the syntax above. I'm trying to learn the newest sqlalchemy with orm instead of core.
As far as I know, bind parameters like the ones in your qmark style query are only available on text based queries like a TextClause.
ORM and textual queries are compatible via Select.from_statement.
import sqlalchemy as sa
from sqlalchemy import orm
Base = orm.declarative_base()
class User(Base):
__tablename__ = "user"
id = sa.Column(sa.Integer, primary_key=True)
first_name = sa.Column(sa.String)
age = sa.Column(sa.Integer)
def __repr__(self):
return f"User(first_name={self.first_name}, age={self.age})"
engine = sa.create_engine("sqlite:///:memory:", echo=True, future=True)
Base.metadata.create_all(engine)
u1 = User(first_name="Alice", age=21)
u2 = User(first_name="Bob", age=20)
session = orm.Session(engine)
session.add_all([u1, u2])
session.flush()
stmt = sa.select(User).from_statement(
sa.text("SELECT * FROM user WHERE first_name = :fn AND age = :age")
)
session.execute(stmt, {"fn": "Alice", "age": 21}).scalars().one()
stmt = sa.select(User).where(User.first_name == "Alice", User.age == 21)
session.execute(stmt).scalars().one()
# or with variables
fn = "Alice"
age = 21
stmt = sa.select(User).where(User.first_name == fn, User.age == age)
session.execute(stmt).scalars().one()
I have a simple polling script that polls entries based on new ID's in a MSSQL table. I'm using SQLAlchemy's ORM to create a table class and then query that table. I want to be able to add more tables "dynamically" without coding it directly into the method.
My polling function:
def poll_db():
query = db.query(
Transactions.ID).order_by(Transactions.ID.desc()).limit(1)
# Continually poll for new images to classify
max_id_query = query
last_max_id = max_id_query.scalar()
while True:
max_id = max_id_query.scalar()
if max_id > last_max_id:
print(
f"New row(s) found. "
f"Processing ids {last_max_id + 1} through {max_id}"
)
# Insert ML model
id_query = db.query(Transactions).filter(
Transactions.ID > last_max_id)
df_from_query = pd.read_sql_query(
id_query.statement, db.bind, index_col='ID')
print(f"New query was made")
last_max_id = max_id
time.sleep(5)
My table model:
import sqlalchemy as db
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import defer, relationship, query
from database import SessionLocal, engine
insp = db.inspect(engine)
db_list = insp.get_schema_names()
Base = declarative_base(cls=BaseModel)
class Transactions(Base):
__tablename__ = 'simulation_data'
sender_account = db.Column('sender_account', db.BigInteger)
recipient_account = db.Column('recipient_account', db.String)
sender_name = db.Column('sender_name', db.String)
recipient_name = db.Column('recipient_name', db.String)
date = db.Column('date', db.DateTime)
text = db.Column('text', db.String)
amount = db.Column('amount', db.Float)
currency = db.Column('currency', db.String)
transaction_type = db.Column('transaction_type', db.String)
fraud = db.Column('fraud', db.BigInteger)
swift_bic = db.Column('swift_bic', db.String)
recipient_country = db.Column('recipient_country', db.String)
internal_external = db.Column('internal_external', db.String)
ID = Column('ID', db.BigInteger, primary_key=True)
QUESTION
How can I pass the table class name "dynamically" in the likes of poll_db(tablename), where tablename='Transactions', and instead of writing similar queries for multiple tables, such as:
query = db.query(Transactions.ID).order_by(Transactions.ID.desc()).limit(1)
query2 = db.query(Transactions2.ID).order_by(Transactions2.ID.desc()).limit(1)
query3 = db.query(Transactions3.ID).order_by(Transactions3.ID.desc()).limit(1)
The tables will have identical structure, but different data.
I can't give you a full example right now (will edit later) but here's one hacky way to do it (the documentation will probably be a better place to check):
def dynamic_table(tablename):
for class_name, cls in Base._decl_class_registry.items():
if cls.__tablename__ == tablename:
return cls
Transactions2 = dynamic_table("simulation_data")
assert Transactions2 is Transactions
The returned class is the model you want. Keep in mind that Base can only access the tables that have been subclassed already so if you have them in other modules you need to import them first so they are registered as Base's subclasses.
For selecting columns, something like this should work:
def dynamic_table_with_columns(tablename, *columns):
cls = dynamic_table(tablename)
subset = []
for col_name in columns:
column = getattr(cls, col_name)
if column:
subset.append(column)
# in case no columns were given
if not subset:
return db.query(cls)
return db.query(*subset)
The sqlalchemy core query builder appears to unnest and relocate CTE queries to the "top" of the compiled sql.
I'm converting an existing Postgres query that selects deeply joined data as a single JSON object. The syntax is pretty contrived but it significantly reduces network overhead for large queries. The goal is to build the query dynamically using the sqlalchemy core query builder.
Here's a minimal working example of a nested CTE
with res_cte as (
select
account_0.name acct_name,
(
with offer_cte as (
select
offer_0.id
from
offer offer_0
where
offer_0.account_id = account_0.id
)
select
array_agg(offer_cte.id)
from
offer_cte
) as offer_arr
from
account account_0
)
select
acct_name::text, offer_arr::text
from res_cte
Result
acct_name, offer_arr
---------------------
oliver, null
rachel, {3}
buddy, {4,5}
(my incorrect use of) the core query builder attempts to unnest offer_cte and results in every offer.id being associated with every account_name in the result.
There's no need to re-implement this exact query in an answer, any example that results in a similarly nested CTE would be perfect.
I just implemented the nesting cte feature. It should land with 1.4.24 release.
Pull request: https://github.com/sqlalchemy/sqlalchemy/pull/6709
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
# Model declaration
Base = declarative_base()
class Offer(Base):
__tablename__ = "offer"
id = sa.Column(sa.Integer, primary_key=True)
account_id = sa.Column(sa.Integer, nullable=False)
class Account(Base):
__tablename__ = "account"
id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.TEXT, nullable=False)
# Query construction
account_0 = sa.orm.aliased(Account)
# Watch the nesting keyword set to True
offer_cte = (
sa.select(Offer.id)
.where(Offer.account_id == account_0.id)
.select_from(Offer)
.correlate(account_0).cte("offer_cte", nesting=True)
)
offer_arr = sa.select(sa.func.array_agg(offer_cte.c.id).label("offer_arr"))
res_cte = sa.select(
account_0.name.label("acct_name"),
offer_arr.scalar_subquery().label("offer_arr"),
).cte("res_cte")
final_query = sa.select(
sa.cast(res_cte.c.acct_name, sa.TEXT),
sa.cast(res_cte.c.offer_arr, sa.TEXT),
)
It constructs this query that returns the result you expect:
WITH res_cte AS
(
SELECT
account_1.name AS acct_name
, (
WITH offer_cte AS
(
SELECT
offer.id AS id
FROM
offer
WHERE
offer.account_id = account_1.id
)
SELECT
array_agg(offer_cte.id) AS offer_arr
FROM
offer_cte
) AS offer_arr
FROM
account AS account_1
)
SELECT
CAST(res_cte.acct_name AS TEXT) AS acct_name
, CAST(res_cte.offer_arr AS TEXT) AS offer_arr
FROM
res_cte
I'm trying to write the following sql query with sqlalchemy ORM:
SELECT * FROM
(SELECT *, row_number() OVER(w)
FROM (select distinct on (grandma_id, author_id) * from contents) as c
WINDOW w AS (PARTITION BY grandma_id ORDER BY RANDOM())) AS v1
WHERE row_number <= 4;
This is what I've done so far:
s = Session()
unique_users_contents = (s.query(Content).distinct(Content.grandma_id,
Content.author_id)
.subquery())
windowed_contents = (s.query(Content,
func.row_number()
.over(partition_by=Content.grandma_id,
order_by=func.random()))
.select_from(unique_users_contents)).subquery()
contents = (s.query(Content).select_from(windowed_contents)
.filter(row_number >= 4)) ## how can I reference the row_number() value?
result = contents
for content in result:
print "%s\t%s\t%s" % (content.id, content.grandma_id,
content.author_id)
As you can see it's pretty much modeled, but I have no idea how to reference the row_number() result of the subquery from the outer query where. I tried something like windowed_contents.c.row_number and adding a label() call on the window func but it's not working, couldn't find any similar example in the official docs or in stackoverflow.
How can this be accomplished? And also, could you suggest a better way to do this query?
windowed_contents.c.row_number against a label() is how you'd do it, works for me (note the select_entity_from() method is new in SQLA 0.8.2 and will be needed here in 0.9 vs. select_from()):
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Content(Base):
__tablename__ = 'contents'
grandma_id = Column(Integer, primary_key=True)
author_id = Column(Integer, primary_key=True)
s = Session()
unique_users_contents = s.query(Content).distinct(
Content.grandma_id, Content.author_id).\
subquery('c')
q = s.query(
Content,
func.row_number().over(
partition_by=Content.grandma_id,
order_by=func.random()).label("row_number")
).select_entity_from(unique_users_contents).subquery()
q = s.query(Content).select_entity_from(q).filter(q.c.row_number <= 4)
print q
I'm trying to adapt some part of a MySQLdb application to sqlalchemy in declarative base. I'm only beginning with sqlalchemy.
The legacy tables are defined something like:
student: id_number*, semester*, stateid, condition, ...
choice: id_number*, semester*, choice_id, school, program, ...
We have 3 tables for each of them (student_tmp, student_year, student_summer, choice_tmp, choice_year, choice_summer), so each pair (_tmp, _year, _summer) contains information for a specific moment.
select *
from `student_tmp`
inner join `choice_tmp` using (`id_number`, `semester`)
My problem is the information that is important to me is to get the equivalent of the following select:
SELECT t.*
FROM (
(
SELECT st.*, ct.*
FROM `student_tmp` AS st
INNER JOIN `choice_tmp` as ct USING (`id_number`, `semester`)
WHERE (ct.`choice_id` = IF(right(ct.`semester`, 1)='1', '3', '4'))
AND (st.`condition` = 'A')
) UNION (
SELECT sy.*, cy.*
FROM `student_year` AS sy
INNER JOIN `choice_year` as cy USING (`id_number`, `semester`)
WHERE (cy.`choice_id` = 4)
AND (sy.`condition` = 'A')
) UNION (
SELECT ss.*, cs.*
FROM `student_summer` AS ss
INNER JOIN `choice_summer` as cs USING (`id_number`, `semester`)
WHERE (cs.`choice_id` = 3)
AND (ss.`condition` = 'A')
)
) as t
* used for shorten the select, but I'm actually only querying for about 7 columns out of the 50 availables.
This information is used in many flavors... "Do I have new students? Do I still have all students from a given date? Which students are subscribed after the given date? etc..." The result of this select statement is to be saved in another database.
Would it be possible for me to achieve this with a single view-like class? The information is read-only so I don't need to be able to modify/create/delte. Or do I have to declare a class for each table (ending up with 6 classes) and every time I need to query, I have to remember to filter?
Thanks for pointers.
EDIT: I don't have modification access to the database (I cannot create a view). Both databases may not be on the same server (so I cannot create a view on my second DB).
My concern is to avoid the full table scan before filtering on condition and choice_id.
EDIT 2: I've set up declarative classes like this:
class BaseStudent(object):
id_number = sqlalchemy.Column(sqlalchemy.String(7), primary_key=True)
semester = sqlalchemy.Column(sqlalchemy.String(5), primary_key=True)
unique_id_number = sqlalchemy.Column(sqlalchemy.String(7))
stateid = sqlalchemy.Column(sqlalchemy.String(12))
condition = sqlalchemy.Column(sqlalchemy.String(3))
class Student(BaseStudent, Base):
__tablename__ = 'student'
choices = orm.relationship('Choice', backref='student')
#class StudentYear(BaseStudent, Base):...
#class StudentSummer(BaseStudent, Base):...
class BaseChoice(object):
id_number = sqlalchemy.Column(sqlalchemy.String(7), primary_key=True)
semester = sqlalchemy.Column(sqlalchemy.String(5), primary_key=True)
choice_id = sqlalchemy.Column(sqlalchemy.String(1))
school = sqlalchemy.Column(sqlalchemy.String(2))
program = sqlalchemy.Column(sqlalchemy.String(5))
class Choice(BaseChoice, Base):
__tablename__ = 'choice'
__table_args__ = (
sqlalchemy.ForeignKeyConstraint(['id_number', 'semester',],
[Student.id_number, Student.semester,]),
)
#class ChoiceYear(BaseChoice, Base): ...
#class ChoiceSummer(BaseChoice, Base): ...
Now, the query that gives me correct SQL for one set of table is:
q = session.query(StudentYear, ChoiceYear) \
.select_from(StudentYear) \
.join(ChoiceYear) \
.filter(StudentYear.condition=='A') \
.filter(ChoiceYear.choice_id=='4')
but it throws an exception...
"Could not locate column in row for column '%s'" % key)
sqlalchemy.exc.NoSuchColumnError: "Could not locate column in row for column '*'"
How do I use that query to create myself a class I can use?
If you can create this view on the database, then you simply map the view as if it was a table. See Reflecting Views.
# DB VIEW
CREATE VIEW my_view AS -- #todo: your select statements here
# SA
my_view = Table('my_view', metadata, autoload=True)
# define view object
class ViewObject(object):
def __repr__(self):
return "ViewObject %s" % str((self.id_number, self.semester,))
# map the view to the object
view_mapper = mapper(ViewObject, my_view)
# query the view
q = session.query(ViewObject)
for _ in q:
print _
If you cannot create a VIEW on the database level, you could create a selectable and map the ViewObject to it. The code below should give you the idea:
student_tmp = Table('student_tmp', metadata, autoload=True)
choice_tmp = Table('choice_tmp', metadata, autoload=True)
# your SELECT part with the columns you need
qry = select([student_tmp.c.id_number, student_tmp.c.semester, student_tmp.stateid, choice_tmp.school])
# your INNER JOIN condition
qry = qry.where(student_tmp.c.id_number == choice_tmp.c.id_number).where(student_tmp.c.semester == choice_tmp.c.semester)
# other WHERE clauses
qry = qry.where(student_tmp.c.condition == 'A')
You can create 3 queries like this, then combine them with union_all and use the resulting query in the mapper:
view_mapper = mapper(ViewObject, my_combined_qry)
In both cases you have to ensure though that a PrimaryKey is properly defined on the view, and you might need to override the autoloaded view, and specify the primary key explicitely (see the link above). Otherwise you will either receive an error, or might not get proper results from the query.
Answer to EDIT-2:
qry = (session.query(StudentYear, ChoiceYear).
select_from(StudentYear).
join(ChoiceYear).
filter(StudentYear.condition == 'A').
filter(ChoiceYear.choice_id == '4')
)
The result will be tuple pairs: (Student, Choice).
But if you want to create a new mapped class for the query, then you can create a selectable as the sample above:
student_tmp = StudentTmp.__table__
choice_tmp = ChoiceTmp.__table__
.... (see sample code above)
This is to show what I ended up doing, any comment welcomed.
class JoinedYear(Base):
__table__ = sqlalchemy.select(
[
StudentYear.id_number,
StudentYear.semester,
StudentYear.stateid,
ChoiceYear.school,
ChoiceYear.program,
],
from_obj=StudentYear.__table__.join(ChoiceYear.__table__),
) \
.where(StudentYear.condition == 'A') \
.where(ChoiceYear.choice_id == '4') \
.alias('YearView')
and I will elaborate from there...
Thanks #van