I'm having a problem with SqlAlchemy and a group_by clause. See the SqlAlchemy query below.
I've got a SqlAlchemy query that includes a group_by clause and it's raising an exception, '(cx_Oracle.DatabaseError) ORA-00979: not a GROUP BY expression'. However, when I get the SQL generated by the SqlAlachemy query, and run that manually, the query works fine.
I'm not sure how to figure out what's wrong with the group_by clause. How can I debug this problem and figure out what I can do to fix it?
# create shorthand aliases
b = db.aliased(Batch)
bs = db.aliased(BatchingStatus)
bp = db.aliased(BatchPress)
bst = db.aliased(BatchState)
bit = db.aliased(BatchItem)
bin = db.aliased(BatchInput)
bpri = db.aliased(BatchPriority)
lcu = db.aliased(LCUser)
s = db.aliased(SubBatch)
w = db.aliased(WorkType)
ptw = db.aliased(LCProductToWorkType)
ctp = db.aliased(LCCategoryToProduct)
c = db.aliased(LCCategory)
# for correlated subquery
subq = (
db.session.query(ctp.product_name)
.join(c, c.category_id == ctp.category_id)
.filter(func.lower(c.category_path) == category)
.filter(ctp.active == 1)
)
# start of problem query
q = db.session.query(
b.batch_signature.label('batch_signature'),
b.batch_num,
b.created_date.label('created_date'),
bst.code.label('batch_state'),
func.min(bin.promise_date).label('due_out'),
bs.job_status,
bp.press_id.label('press_id'),
bp.description.label('press_description'),
bp.code.label('press_code'),
bp.active.label('press_active'),
func.listagg(bin.item_id, ',').within_group(bin.item_id).label('subbatches'),
bs.item_count.label('item_count'),
bs.product.label('product'),
bpri.code.label('priority'),
ptw.display_format.label('product_display_format'),
c.display_name.label('category_display_name'),
lcu.coalesce_first_name,
lcu.coalesce_last_name,
lcu.coalesce_email,
) \
.join(bs, (bs.batch_signature == b.batch_signature) & (bs.press_id == b.press_id)) \
.join(bp, bp.press_id == b.press_id) \
.join(bst, bst.state_id == b.state_id) \
.join(bit, bit.batch_id == b.batch_id) \
.join(bin, bin.batch_input_id == bit.batch_input_id) \
.join(bpri, bpri.priority_id == bin.priority_id) \
.join(lcu, lcu.username == bs.actor) \
.join(s, s.subbatchno == func.to_char(bin.item_id)) \
.join(w, w.worktypeenum == s.worktypeenum) \
.join(ptw, ptw.worktypeenum == w.worktypeenum) \
.join(ctp, ctp.category_to_product_id == ptw.category_to_product_id) \
.join(c, c.category_id == ctp.category_id) \
.filter(bs.product.in_(subq)) \
.filter(b.state_id <= 200) \
.group_by(
b.batch_signature,
b.batch_num,
b.created_date,
bst.code,
bs.job_status,
bp.press_id,
bp.description,
bp.code,
bp.active,
bs.item_count,
bs.product,
bpri.code,
ptw.display_format,
c.display_name,
lcu.coalesce_first_name,
lcu.coalesce_last_name,
lcu.coalesce_email,
) \
.order_by('batch_signature', 'batch_num', 'created_date')
try:
retval = q.all()
except Exception as e:
print e
The above doesn't show the models, some of which have #hybrid_property/#.expression methods, like the lcu.coalesce_first_name columns, which are an attempt to hid the #func.coalesce code that I thought was causing the group_by problems.
Related
I am trying to use python to use a parametrized query through a list. This is the following code:
loan_records =['604150062','604150063','604150064','604150065','604150066','604150067','604150069','604150070']
borr_query = "select distinct a.nbr_aus, cast(a.nbr_trans_aus as varchar(50)) nbr_trans_aus, c.amt_finl_item, case when a.cd_idx in (-9999, 0) then null else a.cd_idx end as cd_idx, a.rate_curr_int, case when a.rate_gr_mrtg_mrgn = 0 then null else a.rate_gr_mrtg_mrgn end as rate_gr_mrtg_mrgn, a.rate_loln_max_cap, case when a.rate_perdc_cap = 0 then null else a.rate_perdc_cap end as rate_perdc_cap from db2mant.i_lp_trans a left join db2mant.i_lp_trans_borr b on a.nbr_aus = b.nbr_aus and a.nbr_trans_aus = b.nbr_trans_aus left join db2mant.i_lp_finl_item c on a.nbr_aus = c.nbr_aus and a.nbr_trans_aus = c.nbr_trans_aus where a.nbr_trans_aus in (?) and c.cd_finl_item = 189"
ODS.execute(borr_query, loan_records)
#PML.execute(PML_SUBMN_Query, (first_evnt, last_evnt, x))
ODS_records = ODS.fetchall()
ODS_records = pd.DataFrame(ODS_records, columns=['nbr_aus', 'nbr_trans_aus', 'amt_finl_item', 'cd_idx', 'rate_curr_int', 'rate_gr_mrtg_mrgn', 'rate_loln_max_cap', 'rate_perdc_cap'])
When I try to run this code: this is the following error message:
error message
I am using the peewee as ORM and my goal is to serialize the result of the complex query whcih also contains subqueries:
machine_usage_alias = RecordDailyMachineUsage.alias()
subquery = (
machine_usage_alias.select(
machine_usage_alias.machine_id,
fn.MAX(machine_usage_alias.date).alias('max_date'),
)
.group_by(machine_usage_alias.machine_id)
.alias('machine_usage_subquery')
)
record_subquery = RecordDailyMachineUsage.select(
RecordDailyMachineUsage.machine_id, RecordDailyMachineUsage.usage
).join(
subquery,
on=(
(RecordDailyMachineUsage.machine_id == subquery.c.machine_id)
& (RecordDailyMachineUsage.date == subquery.c.max_date)
),
)
query = (
Machine.select(
Machine.id, # 0
Machine.name,
Machine.location,
Machine.arch,
Machine.platform,
Machine.machine_version,
Machine.status,
record_subquery.c.usage.alias('usage'),
fn.GROUP_CONCAT(Tag.name.distinct()).alias('tags_list'),
fn.GROUP_CONCAT(Project.full_name.distinct()).alias('projects_list'),
) # 10
.join(MachineTag)
.join(Tag)
.switch(Machine)
.join(MachineProject)
.join(Project)
.join(
record_subquery,
JOIN.LEFT_OUTER,
on=(Machine.id == record_subquery.c.machine_id),
)
.where((Machine.id != 0) & (Machine.is_alive == 1))
.group_by(Machine.id)
)
I've tried to use the method model_to_dict:
jsonify({'rows': [model_to_dict(c) for c in query]})
But this way gives me the columns and and values from the Machine model only. My aim is include all the columns from the select query.
It turned out that I had to use the dicts method of the query and jsonify the result.
machine_usage_alias = RecordDailyMachineUsage.alias()
subquery = (
machine_usage_alias.select(
machine_usage_alias.machine_id,
fn.MAX(machine_usage_alias.date).alias('max_date'),
)
.group_by(machine_usage_alias.machine_id)
.alias('machine_usage_subquery')
)
record_subquery = RecordDailyMachineUsage.select(
RecordDailyMachineUsage.machine_id, RecordDailyMachineUsage.usage
).join(
subquery,
on=(
(RecordDailyMachineUsage.machine_id == subquery.c.machine_id)
& (RecordDailyMachineUsage.date == subquery.c.max_date)
),
)
query = (
Machine.select(
Machine.id, # 0
Machine.name,
Machine.location,
Machine.arch,
Machine.platform,
Machine.machine_version,
Machine.status,
record_subquery.c.usage.alias('usage'),
fn.GROUP_CONCAT(Tag.name.distinct()).alias('tags_list'),
fn.GROUP_CONCAT(Project.full_name.distinct()).alias('projects_list'),
) # 10
.join(MachineTag)
.join(Tag)
.switch(Machine)
.join(MachineProject)
.join(Project)
.join(
record_subquery,
JOIN.LEFT_OUTER,
on=(Machine.id == record_subquery.c.machine_id),
)
.where((Machine.id != 0) & (Machine.is_alive == 1))
.group_by(Machine.id)
).dicts()
return jsonify({'rows': [c for c in query]})
I have the next PostgreSQL query:
WITH sub_query AS (
SELECT vi.idvalfield,
vi.value::text
FROM valueint_value AS vi
UNION
SELECT vt.idvalfield,
vt.value::text
FROM valuetext_value AS vt
)
SELECT
sq.idvalfield,
sq.value
FROM sub_query AS sq
JOIN valuefield AS vf ON vf.idvalfield = sq.idvalfield
JOIN event e on vf.idevent = e.idevent
WHERE NOT (e.idevent || array[]::uuid[]) && (SELECT array_agg(e.idevent) AS id_event
FROM sub_query AS sq
JOIN valuefield AS vf ON vf.idvalfield = sq.idvalfield
JOIN event e on vf.idevent = e.idevent
WHERE (idtable = 41 AND sq.value = 222)
OR (idtable = 43 AND sq.value = 18)
);
I described the construction of WITH. The number of tables in UNION is dynamic:
from sqlalchemy.dialects import postgresql
from sqlalchemy import or_, and_
from sqlalchemy import cast, Table, Text
from sqlalchemy.dialects.postgresql import array_agg, array, ARRAY, UUID
models_view = [
session.query(
model.c.idvalfield.label('id_value'),
cast(model.c.value, Text).label('value')
).filter(model.c.idvalfield.in_(id_fields))
for model, id_fields in model_values.items()
]
cte_union_view = models_view[0].union_all(*models_view[1:]).cte()
Described a subquery in WHERE:
filtered_event = session.query(array_agg(Event.idevent))\
.select_from(cte_union_view)\
.join(Valuefield, cte_union_view.c.id_value == Valuefield.idvalfield)\
.join(Event, Event.idevent == Valuefield.idevent)\
.filter(or_(and_(Valuefield.idtable == 41, cte_union_view.c.value == '222'),
and_(Valuefield.idtable == 43, cte_union_view.c.value == '18'))).subquery()
Described the main request:
event_all = session.query(cte_union_view)\
.join(Valuefield, cte_union_view.c.id_value == Valuefield.idvalfield)\
.join(Event, Event.idevent == Valuefield.idevent).all()
But I can't describe the selection condition in any way:
WHERE NOT (e.idevent || array[]::uuid[]) && (SELECT array_agg(e.idevent)
I try:
filter(cast([Event.idevent + array([])], ARRAY(UUID)).in_(filtered_event))
and
filter(cast([array([Event.idevent]) + array([])], ARRAY(UUID)).in_(filtered_event))
But I don't know how to describe it
Found a solution. Maybe it will be useful to someone.
Converting a UUID to an array of UUID's:
(e.idevent || array[]::uuid[])
you can do this using literal_column:
literal_column('ARRAY[]::uuid[]').op('||')(Event.idevent)
And now the whole `WHERE ' block can be described like this:
.filter(uuid_event_arr.notin_(filtered_event)).all()
But, in fact, it was easier to rewrite the subquery in WHERE, without the 'array_agg ()' function. This in turn makes it easier to build a query using alchemy:
filtered_event = session.query(Event.idevent)\
.select_from(cte_union_view)\
.join(Valuefield, cte_union_view.c.id_value == Valuefield.idvalfield)\
.join(Event, Event.idevent == Valuefield.idevent)\
.filter(and_(Valuefield.idtable == 41, cte_union_view.c.v_text == '222'))
views_value = session.query(cte_union_view)\
.join(Valuefield, cte_union_view.c.id_value == Valuefield.idvalfield)\
.join(Event, Event.idevent == Valuefield.idevent)\
.filter(Event.idevent.in_(filtered_event)).all()
You can use also: array(tuple_(Event.idevent), type_=UUID)
I have used manual join (Query.join not joinedload) in sqlalchemy for some reason. I have used alias since I have multiple join to a single table. Now I want to sort the result by one of relations' fields. How can I use Query.order_by with aliased name? When I do this, I get a ambiguous %(####) instead of field name in query.
if self.order_by:
entity = self.cls
for field, order in self.order_by:
if '.' in field:
m = re.match(r'(.+)\.(.+)', field)
if m.group(1) not in self.aliases:
for item in m.group(1).split('.'):
cls = inspect(entity)
attr = cls.attrs[item]
entity = get_type(attr)
if attr.innerjoin:
aliased_entity = aliased(entity)
self.aliases[m.group(1)] = aliased_entity
_query = _query.join(aliased_entity, item).options(contains_eager(item,
alias=aliased_entity))
else:
aliased_entity = aliased(entity)
self.aliases[m.group(1)] = aliased_entity
_query = _query.outerjoin(aliased_entity, item).options(contains_eager(item,
alias=aliased_entity))
if order == "desc":
_query = _query.order_by(self.get_order_by_field(field).desc())
else:
_query = _query.order_by(self.get_order_by_field(field).asc())
And then:
def get_order_by_field(self, field: str) -> Column:
if '.' in field:
m = re.match(r'(.+)\.(.+)', field)
if m.group(1) in self.aliases:
return getattr(self.aliases[m.group(1)], m.group(2))
else:
return Column(self.column_map[field])
else:
return Column(field)
See exsample:
entity = sqlalchemy.aliased(ModelUser)
session.query(ModelLog.id, ModelLog.date, ModelUser.id.label('current_user_id'),
entity.id.label('prev_user_id'))) \
.join(ModelUser, ModelUser.id == ModelLog.id_model_user) \
.join(entity, entity.id == ModelLog.id_prev_model_user) \
.filter(...) \
.order_by(entity.id.asc(), ModelUser.id.desc()
I need filter using not in but in two fields.
q = db_session.query(Necessidade, WFLeilao, BidHeader, BidItemPrice, func.sbprecobruto(BidItemPrice.bid_number,BidItemPrice.line_number, Necessidade.identportal, type_=Float))
q = q.join(WFLeilao, and_(Necessidade.numeroportal == WFLeilao.leilao, Necessidade.numeroitemportal == WFLeilao.itemleilao))
q = q.join(BidHeader, and_(BidHeader.bid_number == BidItemPrice.bid_number))
q = q.join(BidItemPrice, and_(BidItemPrice.auction_header_id == WFLeilao.leilao, BidItemPrice.auction_line_number == WFLeilao.itemleilao, BidItemPrice.bid_number == WFLeilao.lance, BidItemPrice.line_number == WFLeilao.itemlance))
subquery = db_session.query(ItfRetornoPedido.num_leilao_superbuy, ItfRetornoPedido.num_item_leilao_superbuy).filter_by(status_comprador=1).filter_by(acao='I').filter_by(empresa='NK').subquery()
q = q.filter(~(WFLeilao.leilao,Wfleilao.itemleilao).in_(subquery))
In oracle is possible, a similar example:
Select *
from table_a
where (leilao, itemleilao) not in
(Select num_leilao_superbuy, num_item_leilao_superbuy
from table_b
where empresa = 'NK')
Is it possible?
I found a solution using tuple_
q = q.filter(~tuple_(WFLeilao.leilao, WFLeilao.itemleilao).in_(subquery))
you can chain the query:
q = q.filter(~(WFLeilao.leilao.in_(subquery))) \
.filter(~(Wfleilao.itemleilao.in_(subquery)))