I'm trying to update course points by the sum of course's lessons points.
It is working perfectly if I do select particular course ID like this:
BEGIN
UPDATE course
SET points = (SELECT COALESCE(SUM("lesson"."points"), 0) AS "sum_points" FROM "course" LEFT OUTER JOIN "lesson" ON ("course"."id" = "lesson"."course_id") WHERE "course"."id" = 7)
WHERE "course"."id" = 7;
RETURN NULL;
END;
But not working with OLD which is the updating instance. I want to update points of whichever course is being updated.
BEGIN
UPDATE course
SET points = (SELECT COALESCE(SUM("lesson"."points"), 0) AS "sum_points" FROM "course" LEFT OUTER JOIN "lesson" ON ("course"."id" = "lesson"."course_id") WHERE "course"."id" = OLD."course_id")
WHERE "course"."id" = OLD."course_id";
RETURN NULL;
END;
I'm using django-pgtriggers: https://pypi.org/project/django-pgtrigger/
#pgtrigger.register(
pgtrigger.Trigger(
name="add_course_point",
level=pgtrigger.Statement,
when=pgtrigger.After,
operation=pgtrigger.Update,
func=f"""
UPDATE course
SET points = (SELECT COALESCE(SUM("lesson"."points"), 0) AS "sum_points" FROM "course" LEFT OUTER JOIN "lesson" ON ("course"."id" = "lesson"."course_id") WHERE "course"."id" = OLD."course_id")
WHERE "course"."id" = OLD."course_id";
RETURN NULL;
"""
)
)
OLD and NEW are always NULL in case of Statement level.
Replace level=pgtrigger.Statement with level=pgtrigger.Row
#pgtrigger.register(
pgtrigger.Trigger(
name="add_course_point",
level=pgtrigger.Row,
when=pgtrigger.After,
operation=pgtrigger.Update,
func=f"""
UPDATE course
SET points = (SELECT COALESCE(SUM("lesson"."points"), 0) AS "sum_points" FROM "course" LEFT OUTER JOIN "lesson" ON ("course"."id" = "lesson"."course_id") WHERE "course"."id" = OLD."course_id")
WHERE "course"."id" = OLD."course_id";
RETURN NULL;
"""
)
)
or add referencing=pgtrigger.Referencing(old='old_table_name') and then modify your function.
I have registered 2 triggers. The first one is before update trigger for inserted lessons. The second is after update trigger for updated and deleted lessons. Course points are incremented or decremented by the sum of its lessons points after all.
#pgtrigger.register(
pgtrigger.Trigger(
name="add_course_point",
operation=pgtrigger.Insert,
level=pgtrigger.Row,
when=pgtrigger.Before,
func=f"""
UPDATE course
SET points = (points + NEW.points)
WHERE "course"."id" = NEW.course_id;
RETURN NEW;
""",
),
pgtrigger.Trigger(
name="update_course_point",
operation=(pgtrigger.Update | pgtrigger.Delete),
level=pgtrigger.Row,
when=pgtrigger.After,
func=f"""
UPDATE course
SET points = (SELECT COALESCE(SUM("lesson"."points"), 0) AS "sum_points" FROM "course" LEFT OUTER JOIN "lesson" ON ("course"."id" = "lesson"."course_id") WHERE "course"."id" = OLD.course_id)
WHERE "course"."id" = OLD.course_id;
RETURN NULL;
""",
),
)
Related
I am trying to use python to use a parametrized query through a list. This is the following code:
loan_records =['604150062','604150063','604150064','604150065','604150066','604150067','604150069','604150070']
borr_query = "select distinct a.nbr_aus, cast(a.nbr_trans_aus as varchar(50)) nbr_trans_aus, c.amt_finl_item, case when a.cd_idx in (-9999, 0) then null else a.cd_idx end as cd_idx, a.rate_curr_int, case when a.rate_gr_mrtg_mrgn = 0 then null else a.rate_gr_mrtg_mrgn end as rate_gr_mrtg_mrgn, a.rate_loln_max_cap, case when a.rate_perdc_cap = 0 then null else a.rate_perdc_cap end as rate_perdc_cap from db2mant.i_lp_trans a left join db2mant.i_lp_trans_borr b on a.nbr_aus = b.nbr_aus and a.nbr_trans_aus = b.nbr_trans_aus left join db2mant.i_lp_finl_item c on a.nbr_aus = c.nbr_aus and a.nbr_trans_aus = c.nbr_trans_aus where a.nbr_trans_aus in (?) and c.cd_finl_item = 189"
ODS.execute(borr_query, loan_records)
#PML.execute(PML_SUBMN_Query, (first_evnt, last_evnt, x))
ODS_records = ODS.fetchall()
ODS_records = pd.DataFrame(ODS_records, columns=['nbr_aus', 'nbr_trans_aus', 'amt_finl_item', 'cd_idx', 'rate_curr_int', 'rate_gr_mrtg_mrgn', 'rate_loln_max_cap', 'rate_perdc_cap'])
When I try to run this code: this is the following error message:
error message
As above, I'm trying to use pd.read_sql to query our mysql database, and getting an error for double/single quotes.
When I remove the % operators from the LIKE clause (lines 84-87) the query runs, but these are needed. I know I need to format the strings but I don't know how within such a big query.
Here's the query:
SELECT
s.offer_id,
s.cap_id,
vi.make,
vi.model,
vi.derivative,
i.vehicle_orders,
s.lowest_offer,
CASE
WHEN f.previous_avg = f.previous_low THEN "n/a"
ELSE FORMAT(f.previous_avg, 2)
END as previous_avg,
f.previous_low,
CASE
WHEN ( ( (s.lowest_offer - f.previous_avg) / f.previous_avg) * 100) = ( ( (s.lowest_offer - f.previous_low) / f.previous_low) * 100) THEN "n/a"
ELSE CONCAT(FORMAT( ( ( (s.lowest_offer - f.previous_avg) / f.previous_avg) * 100), 2), "%")
END as diff_avg,
CONCAT(FORMAT( ( ( (s.lowest_offer - f.previous_low) / f.previous_low) * 100), 2), "%") as diff_low,
s.broker,
CASE
WHEN s.in_stock = '1' THEN "In Stock"
ELSE "Factory Order"
END as in_stock,
CASE
WHEN s.special IS NOT NULL THEN "Already in Specials"
ELSE "n/a"
END as special
FROM
( SELECT o.id as offer_id,
o.cap_id as cap_id,
MIN(o.monthly_payment) as lowest_offer,
b.name as broker,
o.stock as in_stock,
so.id as special
FROM
offers o
INNER JOIN brands b ON ( o.brand_id = b.id )
LEFT JOIN special_offers so ON ( so.cap_id = o.cap_id )
WHERE
( o.date_modified >= DATE_ADD(NOW(), INTERVAL -1 DAY) OR o.date_created >= DATE_ADD(NOW(), INTERVAL -1 DAY) )
AND o.deposit_value = 9
AND o.term = 48
AND o.annual_mileage = 8000
AND o.finance_type = 'P'
AND o.monthly_payment > 100
GROUP BY
o.cap_id
ORDER BY
special DESC) s
INNER JOIN
( SELECT o.cap_id as cap_id,
AVG(o.monthly_payment) as previous_avg,
MIN(o.monthly_payment) as previous_low
FROM
offers o
WHERE
o.date_modified < DATE_ADD(NOW(), INTERVAL -1 DAY)
AND o.date_modified >= DATE_ADD(NOW(), INTERVAL -1 WEEK)
AND o.deposit_value = 9
AND o.term = 48
AND o.annual_mileage = 8000
AND o.finance_type = 'P'
AND o.monthly_payment > 100
GROUP BY
o.cap_id ) f ON ( s.cap_id = f.cap_id )
LEFT JOIN
( SELECT a.cap_id as cap_id,
v.manufacturer as make,
v.model as model,
v.derivative as derivative,
COUNT(*) as vehicle_orders
FROM
( SELECT o.id,
o.name as name,
o.email as email,
o.date_created as date,
SUBSTRING_INDEX(SUBSTRING(offer_serialized, LOCATE("capId", offer_serialized) +12, 10), '"', 1) as cap_id
FROM moneyshake.orders o
WHERE o.name NOT LIKE 'test%'
AND o.email NOT LIKE 'jawor%'
AND o.email NOT LIKE 'test%'
AND o.email NOT LIKE '%moneyshake%'
AND o.phone IS NOT NULL
AND o.date_created > DATE_ADD(NOW(), INTERVAL -1 MONTH)
) a JOIN moneyshake.vehicles_view v ON a.cap_id = v.id
GROUP BY
v.manufacturer,
v.model,
v.derivative,
a.cap_id) i ON ( f.cap_id = i.cap_id )
INNER JOIN
( SELECT v.id as id,
v.manufacturer as make,
v.model as model,
v.derivative as derivative
FROM moneyshake.vehicles_view v
GROUP BY v.id ) vi ON s.cap_id = vi.id
WHERE
( ( s.lowest_offer - f.previous_low ) / f.previous_low) * 100 <= -15
GROUP BY
s.cap_id
Thanks!
That error occurs then the DBAPI layer (e.g., mysqlclient) natively uses the "format" paramstyle and the percent sign (%) is misinterpreted as a format character instead of a LIKE wildcard.
The fix is to wrap the SQL statement in a SQLAlchemy text() object. For example, this will fail:
import pandas as pd
import sqlalchemy as sa
engine = sa.create_engine("mysql+mysqldb://scott:tiger#localhost:3307/mydb")
sql = """\
SELECT * FROM million_rows
WHERE varchar_col LIKE 'record00000%'
ORDER BY id
"""
df = pd.read_sql_query(sql, engine)
but simply changing the read_sql_query() call to
df = pd.read_sql_query(sa.text(sql), engine)
will work.
I'm trying to input 2 variables into an sql query like so:
query = """
Select Distinct
sp.NAME,
sp.STUDY,
sp.DISEASE_ONTOLOGY_TERM,
sv.GENE,
sv.CDS_EFFECT,
sv.PROTEIN_EFFECT,
rep.STATUS,
sv.FRACTION_READS,
sv.DEPTH,
cvmship.REMOVED
From
SPECIMEN sp
inner join CURATION_MANAGER cm on (cm.SPECIMEN_ID = sp.SPECIMEN_ID)
inner join CURATION_VERSION cv on (cv.CURATION_VERSION_ID = cm.LATEST_VERSION_ID)
inner join CURATION_VERSION_MEMBERSHIP cvmship on (cvmship.VERSION_ID = cv.CURATION_VERSION_ID)
inner join CURATION_VERSION_MEMBER cvmer on (cvmer.CURATION_VERSION_MEMBER_ID = cvmship.MEMBER_ID)
inner join REPORTABLE rep on (rep.CURATION_VERSION_MEMBER_ID = cvmer.CURATION_VERSION_MEMBER_ID)
inner join SHORT_VARIANT sv on (sv.REPORTABLE_ID = rep.CURATION_VERSION_MEMBER_ID)
inner join (
Select
sp.SPECIMEN_ID,
cqr.STATUS
From
SPECIMEN sp
inner join CURATION_MANAGER cm on (cm.SPECIMEN_ID = sp.SPECIMEN_ID)
inner join CURATION_VERSION cv on (cv.CURATION_VERSION_ID = cm.LATEST_VERSION_ID)
inner join CURATION_VERSION_MEMBERSHIP cvmship on (cvmship.VERSION_ID = cv.CURATION_VERSION_ID)
inner join CURATION_VERSION_MEMBER cvmer on (cvmer.CURATION_VERSION_MEMBER_ID = cvmship.MEMBER_ID)
inner join CURATION_QC_RESULT cqr on (cqr.CURATION_VERSION_MEMBER_ID = cvmer.CURATION_VERSION_MEMBER_ID)
) cqr on (cqr.SPECIMEN_ID = sp.SPECIMEN_ID)
Where sp.ASSIGNED_INDEX is not null
AND sp.NAME like 'TRF%'
AND LENGTH(sp.NAME) = 12
AND cv.STATUS = 'final'
AND (cqr.STATUS = 'Pass' or cqr.STATUS = 'Qualified')
AND sp.STUDY like '%CLINICAL%'
AND sv.GENE = '%s'
AND sv.PROTEIN_EFFECT = '%s'
order by sp.name desc
""" % (gene, proEff)
When I run this script I get:
File "fetchDEVDB.py", line 57, in <module>
""" % (gene, proEff)
ValueError: unsupported format character ''' (0x27) at index 1481
I was thinking maybe it's trying to interpret the % as a format character, but I tried using %% to surround the %s and I get the same error. Any ideas?
Thanks
You need to '%'-quote the LIKE expression, not the format character:
...AND sp.NAME like 'TRF%%'
...
...AND sp.STUDY like '%%CLINICAL%%'
In this
AND sp.NAME like 'TRF%'
AND LENGTH(sp.NAME) = 12
AND cv.STATUS = 'final'
AND (cqr.STATUS = 'Pass' or cqr.STATUS = 'Qualified')
AND sp.STUDY like '%CLINICAL%'
AND sv.GENE = '%s'
AND sv.PROTEIN_EFFECT = '%s'
I see two invalid formats:
TRF%
%CLINICAL%
Continuing from here.
I have the following query expression in Django:
fields = {
'impressions': models.Sum('impressions'),
'clicks': models.Sum('clicks'),
}
calc_fields = {
'ctr': models.Case(models.When(impressions = 0, then = 0), default = 1.0 * models.F('clicks') / models.F('impressions'), output_field = models.FloatField()),
}
Stats.objects.values('product').annotate(**fields).annotate(**calc_fields)
This correctly calculates aggregated fields impressions, clicks and ctr grouped by product and produces the following postgres query:
SELECT
"stats"."product_id",
SUM("stats"."impressions") AS "impressions",
SUM("stats"."clicks") AS "clicks",
CASE WHEN SUM("stats"."impressions") = 0 THEN 0 ELSE ((1.0 * SUM("stats"."clicks")) / SUM("stats"."impressions")) END AS "ctr"
FROM "stats"
GROUP BY "stats"."product_id";
Now all I want is to modify this expression so when no grouping is provided it would just return aggregated values across the whole table.
And it appears to be harder than I expected. The rest of the question can be skipped, I'm just listing what I've tried.
1) First attempt to just drop values():
Stats.objects.annotate(**fields).annotate(**calc_fields)
This produces an error:
The annotation 'impressions' conflicts with a field on the model.
This happens probably because it now tries to return a list of full Stats objects with all its fields rather than only custom fields I've listed as before. Maybe there is a way to disable it and just return custom fields?
I tried renaming the fields but it results in grouping by the primary index, so it returns all records in the table rather than one
2) Maybe I need to swap annotate with aggregate.
Stats.objects.aggregate(**fields).aggregate(**calc_fields)
This produces an error:
Error: 'dict' object has no attribute 'aggregate'
Ok maybe they cannot be chained, lets merge the fields together and swap F with Sum inside the case:
fields = {
'impressions': models.Sum('impressions'),
'clicks': models.Sum('clicks'),
'ctr': models.Case(models.When(impressions = 0, then = 0), default = 1.0 * models.Sum('clicks') / models.Sum('impressions'), output_field = models.FloatField()),
}
Stats.objects.aggregate(**fields)
This appears to be working correctly. But the hell breaks loose again when I add extra fields (all values are the same, only names are different):
fields = {
'impressions': models.Sum('impressions'),
'impressions2': models.Sum('impressions'),
'clicks': models.Sum('clicks'),
'clicks2': models.Sum('clicks'),
'ctr': models.Case(models.When(impressions = 0, then = 0), default = 1.0 * models.Sum('clicks') / models.Sum('impressions'), output_field = models.FloatField()),
'ctr2': models.Case(models.When(impressions = 0, then = 0), default = 1.0 * models.Sum('clicks') / models.Sum('impressions'), output_field = models.FloatField()),
}
It seems that the problem is caused by multiple models.When(impressions = 0), it generates wrong sql:
SELECT
CASE WHEN "stats"."impressions" = 0 THEN 0 ELSE ((1.0 * SUM("stats"."clicks")) / SUM("stats"."impressions")) END AS "ctr",
CASE WHEN SUM("fstats"."impressions") = 0 THEN 0 ELSE ((1.0 * SUM("stats"."clicks")) / SUM("impressions")) END AS "ctr2",
SUM("stats"."impressions") AS "impressions",
SUM("stats"."impressions") AS "impressions2",
SUM("stats"."clicks") AS "clicks",
SUM("stats"."clicks") AS "clicks2",
FROM "stats";
Notice how the CASE for ctr is incorrectly calculated as:
CASE WHEN "stats"."impressions" = 0
While for ctr2 it is correct:
CASE WHEN SUM("stats"."impressions") = 0
Which causes the database error:
column "stats.impressions" must appear in the GROUP BY clause or be used in an aggregate function
3) Renaming all aggregated fields to be different from model fields so there is no chance of confusion:
fields = {
'impressions_total': models.Sum('impressions'),
'impressions_total2': models.Sum('impressions'),
'clicks_total': models.Sum('clicks'),
'clicks_total2': models.Sum('clicks'),
'ctr': models.Case(models.When(impressions_total = 0, then = 0), default = 1.0 * models.Sum('clicks') / models.Sum('impressions'), output_field = models.FloatField()),
'ctr2': models.Case(models.When(impressions_total = 0, then = 0), default = 1.0 * models.Sum('clicks') / models.Sum('impressions'), output_field = models.FloatField()),
}
Stats.objects.aggregate(**fields)
This produces error:
Error: Cannot resolve keyword 'impressions_total' into field. Choices are: clicks, clicks_total, impressions, impressions_total2, product, product_id
If I leave single ctr without ctr2 it works fine. Again adding extra fields breaks it for some reason.
Any tips? I would prefer it to just return an array of custom fields rather than full objects.
I have a table called moviegenre which looks like:
moviegenre:
- movie (FK movie.id)
- genre (FK genre.id)
I have a query (ORM generated) which returns all movie.imdb and genre.id's which have genre.id's in common with a given movie.imdb_id.
SELECT "movie"."imdb_id",
"moviegenre"."genre_id"
FROM "moviegenre"
INNER JOIN "movie"
ON ( "moviegenre"."movie_id" = "movie"."id" )
WHERE ( "movie"."imdb_id" IN (SELECT U0."imdb_id"
FROM "movie" U0
INNER JOIN "moviegenre" U1
ON ( U0."id" = U1."movie_id" )
WHERE ( U0."last_ingested_on" IS NOT NULL
AND NOT ( U0."imdb_id" IN
( 'tt0169547' ) )
AND NOT ( U0."imdb_id" IN
( 'tt0169547' ) )
AND U1."genre_id" IN ( 2, 10 ) ))
AND "moviegenre"."genre_id" IN ( 2, 10 ) )
The problem is that I'll get results in the format:
[
('imdbid22`, 'genreid1'),
('imdbid22`, 'genreid2'),
('imdbid44`, 'genreid1'),
('imdbid55`, 'genreid8'),
]
Is there a way within the query itself I can group all of the genre ids into a list under the movie.imdb_id's? I'd like do to grouping in the query.
Currently doing it in my web app code (Python) which is extremely slow when 50k+ rows are returned.
[
('imdbid22`, ['genreid1', 'genreid2']),
('imdbid44`, 'genreid1'),
('imdbid55`, 'genreid8'),
]
thanks in advance!
edit:
here's the python code which runs against the current results
results_list = []
for item in movies_and_genres:
genres_in_common = len(set([
i['genre__id'] for i in movies_and_genres
if i['movie__imdb_id'] == item['movie__imdb_id']
]))
imdb_id = item['movie__imdb_id']
if genres_in_common >= min_in_comon:
result_item = {
'movie.imdb_id': imdb_id,
'count': genres_in_common
}
if result_item not in results_list:
results_list.append(result_item)
return results_list
select m.imdb_id, array_agg(g.genre_id) as genre_id
from
moviegenre g
inner join
movie m on g.movie_id = m.id
where
m.last_ingested_on is not null
and not m.imdb_id in ('tt0169547')
and not m.imdb_id in ('tt0169547')
and g.genre_id in (2, 10)
group by m.imdb_id
array_agg will create an array of all the genre_ids of a certain imdb_id:
http://www.postgresql.org/docs/current/interactive/functions-aggregate.html#FUNCTIONS-AGGREGATE-TABLE
I hope python code will be fast enough:
movielist = [
('imdbid22', 'genreid1'),
('imdbid22', 'genreid2'),
('imdbid44, 'genreid1'),
('imdbid55', 'genreid8'),
]
dict = {}
for items in movielist:
if dict[items[0]] not in dict:
dict[items[0]] = items[1]
else:
dict[items[0]] = dict[items[0]].append(items[1])
print dict
Output:
{'imdbid44': ['genreid1'], 'imdbid55': ['genreid8'], 'imdbid22': ['genreid1', 'genreid2']}
If you just need movie name, count:
Change this in original query you will get the answer you dont need python code
SELECT "movie"."imdb_id", count("moviegenre"."genre_id")
group by "movie"."imdb_id"