SQLAlchemy Select from Join of two Subqueries - python

Need help translating this SQL query into SQLAlchemy:
select
COALESCE(DATE_1,DATE_2) as DATE_COMPLETE,
QUESTIONS_CNT,
ANSWERS_CNT
from (
(select DATE as DATE_1,
count(distinct QUESTIONS) as QUESTIONS_CNT
from GUEST_USERS
where LOCATION like '%TEXAS%'
and DATE = '2021-08-08'
group by DATE
) temp1
full join
(select DATE as DATE_2,
count(distinct ANSWERS) as ANSWERS_CNT
from USERS
where LOCATION like '%TEXAS%'
and DATE = '2021-08-08'
group by DATE
) temp2
on temp1.DATE_1=temp2.DATE_2
)
Mainly struggling with the join of the two subqueries. I've tried this (just for the join part of the SQL):
query1 = db.session.query(
GUEST_USERS.DATE_WEEK_START.label("DATE_1"),
func.count(GUEST_USERS.QUESTIONS).label("QUESTIONS_CNT")
).filter(
GUEST_USERS.LOCATION.like("%TEXAS%"),
GUEST_USERS.DATE == "2021-08-08"
).group_by(GUEST_USERS.DATE)
query2 = db_session_stg.query(
USERS.DATE.label("DATE_2"),
func.count(USERS.ANSWERS).label("ANSWERS_CNT")
).filter(
USERS.LOCATION.like("%TEXAS%"),
USERS.DATE == "2021-08-08"
).group_by(USERS.DATE)
sq2 = query2.subquery()
query1_results = query1.join(
sq2,
sq2.c.DATE_2 == GUEST_USERS.DATE)
).all()
In this output I receive only the DATE_1 column and the QUESTIONS_CNT columns. Any idea why the selected output from the subquery is not being returned in the result?

Not sure if this is the best solution but this is how I got it to work. Using 3 subqueries essentially.
query1 = db.session.query(
GUEST_USERS.DATE_WEEK_START.label("DATE_1"),
func.count(GUEST_USERS.QUESTIONS).label("QUESTIONS_CNT")
).filter(
GUEST_USERS.LOCATION.like("%TEXAS%"),
GUEST_USERS.DATE == "2021-08-08"
).group_by(GUEST_USERS.DATE)
query2 = db_session_stg.query(
USERS.DATE.label("DATE_2"),
func.count(USERS.ANSWERS).label("ANSWERS_CNT")
).filter(
USERS.LOCATION.like("%TEXAS%"),
USERS.DATE == "2021-08-08"
).group_by(USERS.DATE)
sq1 = query1.subquery()
sq2 = query2.subquery()
query3 = db.session.query(sq1, sq2).join(
sq2,
sq2.c.DATE_2 == sq1.c.DATE_1)
sq3 = query3.subquery()
query4 = db.session.query(
func.coalesce(
sq3.c.DATE_1, sq3.c.DATE_2),
sq3.c.QUESTIONS_CNT,
sq3.c.ANSWERS_CNT
)
results = query4.all()

Related

SQLAlchemy subquery access outer tables

I'm having trouble converting this SQL into a valid SQLAlchemy query:
select *
from A
join B on B.Id = (
select top 1 Id
from B
where B.name = A.name
order by B.date
)
I've tried using the subquery but it fails:
query = session.query(A, B)
sub_query = session.query(B)
sub_query = sub_query.filter(B.name == A.name)
sub_query = sub_query.order_by(B.date.desc()).limit(1)
sub_query = sub_query.subquery()
query = query.join(B, B.id == sub_query.c.Id)
By accessing the A in the subquery, SqLAlchemy will add it to the subquery from clause and doesn't use the A from the outer query.
I've seen many SQLAlchemy subquery examples but none of them uses the outer fields.
By using correlate(A) in the subquery we tell the SQLAlchemy that reuses A from the outer query.
For making the join work we should access the Id of the subquery, so we should return only Id and use scalar_subquery() to convert the subquery to a scalar subquery:
query = session.query(A, B)
sub_query = session.query(B.Id)
sub_query = sub_query.filter(B.name == A.name)
sub_query = sub_query.order_by(B.date.desc()).limit(1)
sub_query = sub_query.correlate(A)
query = query.join(B, B.id == sub_query.scalar_subquery())

Sqlalchemy subquery

I'm new in sqlalchemy, please help. I have this SQL query:
SELECT * FROM films
WHERE (
SELECT count(film_to_genre.id) FROM film_to_genre
WHERE films.id = film_to_genre.film_id AND film_to_genre.genre_id IN (2, 14)
) = 2
And I want to write it in SqlAlchemy. This is what I've tried:
db.query(models.Film)
.filter(db.query(func.count(models.FilmToGenre.id))
.filter(and_(models.Film.id == models.FilmToGenre.film_id,
models.FilmToGenre.genre_id.in_(genre_ids)))
.subquery().count == len(genre_ids)))
It converts to this SQL (problem in clause "WHERE false", it evaluates subquery immediately):
SELECT films.id AS films_id, films.kinopoisk_id AS films_kinopoisk_id, films.title AS films_title, films.year AS films_year, films.budget AS films_budget, films.run_time AS films_run_time, films.description AS films_description
FROM films JOIN film_to_genre ON films.id = film_to_genre.film_id
WHERE false
LIMIT %(param_1)s OFFSET %(param_2)s
Add .c to access the column of the subquery:
Instead of subquery().count == len(genre_ids)) please use subquery().c.count == len(genre_ids))
I do, however, prefer as_scalar option:
genre_ids = [2, 14]
sq = (
session.query(
func.count(FilmToGenre.id)
)
.filter(
and_(
FilmToGenre.film_id == Film.id,
FilmToGenre.genre_id.in_(genre_ids),
)
)
.as_scalar()
)
q = (
session.query(Film)
.filter(sq == len(genre_ids))
)

SQLAlchemy Joining with subquery issue

I am trying to translate SQL into SQLAlchemy. The SQL version of the query I want is as follows:
SELECT * from calendarEventAttendee
JOIN calendarEventAttendanceActual ON calendarEventAttendanceActual.id = calendarEventAttendee.attendanceActualId
LEFT JOIN
(SELECT bill.id, bill.personId, billToEvent.eventId FROM bill JOIN billToEvent ON bill.id = billToEvent.billId) b
ON b.eventId = calendarEventAttendee.eventId AND b.personId = calendarEventAttendee.personId
WHERE b.id is NULL
My SQLAlchemy query is as follows:
query = db.session.query(CalendarEventAttendee).join(CalendarEventAttendanceActual)
sub_query = db.session.query(Bill, BillToEvent).join(BillToEvent, BillToEvent.billId == Bill.id).subquery()
query = query.outerjoin(sub_query, and_(sub_query.Bill.personId == CalendarEventAttendee.personId, Bill.eventId == CalendarEventAttendee.eventId))
results = query.all()
I am getting an error AttributeError: 'Alias' object has no attribute 'Bill'
If I adjust the SQLAlchemy query to the following:
sub_query = db.session.query(Bill, BillToEvent).join(BillToEvent, BillToEvent.billId == Bill.id).subquery()
query = query.outerjoin(sub_query, and_(sub_query.Bill.personId == CalendarEventAttendee.personId, sub_query.BillToEvent.eventId == CalendarEventAttendee.eventId))
results = query.all()
I get an error AttributeError: Bill
Any help would be appreciated, thanks!
Once you call subquery(), there is no access to objects, but only to columns via .c.{column_name} accessor.
Do the following for sub_query instead: load only the columns you need in order to avoid any name collisions:
sub_query = db.session.query(
Bill.id, Bill.personId, BillToEvent.eventId
).join(BillToEvent, BillToEvent.billId == Bill.id).subquery()
Then in your query use column names with .c.column_name:
query = query.outerjoin(
sub_query, and_(
sub_query.c.personId == CalendarEventAttendee.personId,
sub_query.c.eventId == CalendarEventAttendee.eventId)
)
results = query.all()

How can I get the youngest objects from SQLAlchemy?

Each row in my table has a date. The date is not unique. The same date is present more than one time.
I want to get all objects with the youngest date.
My solution work but I am not sure if this is a elegent SQLAlchemy way.
query = _session.query(Table._date) \
.order_by(Table._date.desc()) \
.group_by(Table._date)
# this is the younges date (type is date.datetime)
young = query.first()
query = _session.query(Table).filter(Table._date==young)
result = query.all()
Isn't there a way to put all this in one query object or something like that?
You need a having clause, and you need to import the max function
then your query will be:
from sqlalchemy import func
stmt = _session.query(Table) \
.group_by(Table._date) \
.having(Table._date == func.max(Table._date)
This produces a sql statement like the following.
SELECT my_table.*
FROM my_table
GROUP BY my_table._date
HAVING my_table._date = MAX(my_table._date)
If you construct your sql statement with a select, you can examine the sql produced in your case using. *I'm not sure if this would work with statements query
str(stmt)
Two ways of doing this using a sub-query:
# #note: do not need to alias, but do in order to specify `name`
T1 = aliased(MyTable, name="T1")
# version-1:
subquery = (session.query(func.max(T1._date).label("max_date"))
.as_scalar()
)
# version-2:
subquery = (session.query(T1._date.label("max_date"))
.order_by(T1._date.desc())
.limit(1)
.as_scalar()
)
qry = session.query(MyTable).filter(MyTable._date == subquery)
results = qry.all()
The output should be similar to:
# version-1
SELECT my_table.id AS my_table_id, my_table.name AS my_table_name, my_table._date AS my_table__date
FROM my_table
WHERE my_table._date = (
SELECT max("T1"._date) AS max_date
FROM my_table AS "T1")
# version-2
SELECT my_table.id AS my_table_id, my_table.name AS my_table_name, my_table._date AS my_table__date
FROM my_table
WHERE my_table._date = (
SELECT "T1"._date AS max_date
FROM my_table AS "T1"
ORDER BY "T1"._date DESC LIMIT ? OFFSET ?
)

How can I write this SQL query in SQLAlchemy?

I wrote the following SQL query. How can I do the same thing in SQLAlchemy?
SELECT
T.campaign_id,
T.spend,
T.id
FROM activity_log T
WHERE T.end_time = (
SELECT MAX( T1.end_time ) FROM activity_log T1
WHERE T1.campaign_id = T.campaign_id and cast(T1.end_time as DATE) = cast(T.end_time as DATE)
);
Below should get you started:
T = aliased(ActivityLog, name="T")
T1 = aliased(ActivityLog, name="T1")
subquery = (
session.query(func.max(T1.end_time).label("end_time"))
.filter(T1.campaign_id == T.campaign_id)
.filter(cast(T1.end_time, Date) == cast(T.end_time, Date))
.correlate(T)
.as_scalar()
)
qry = (
session.query(T.campaign_id, T.spend, T.id)
.filter(T.end_time == subquery)
)

Categories