How to use variable in sql query odoo - python

I want to add branch_id variable in sql query. How can I use?When I use below code,I got psycopg2.ProgrammingError: column reference "branch_id" is ambiguous.
branch_id = self.env.user.branch_id.id
query = '''
SELECT DISTINCT l.partner_id, res_partner.name AS name, UPPER(res_partner.name) AS UPNAME, CASE WHEN prop.value_text IS NULL THEN 'normal' ELSE prop.value_text END AS trust
FROM account_move_line AS l
LEFT JOIN res_partner ON l.partner_id = res_partner.id
LEFT JOIN ir_property prop ON (prop.res_id = 'res.partner,'||res_partner.id AND prop.name='trust' AND prop.company_id=%s),
account_account, account_move am
WHERE (l.account_id = account_account.id)
AND (l.move_id = am.id)
AND (am.state IN %s)
AND (account_account.internal_type IN %s)
AND (
l.reconciled IS NOT TRUE
OR l.id IN(
SELECT credit_move_id FROM account_partial_reconcile where max_date > %s
UNION ALL
SELECT debit_move_id FROM account_partial_reconcile where max_date > %s
)
)
''' + partner_clause + '''
AND (l.date <= %s)
AND (l.branch_id = branch_id)
AND l.company_id IN %s
ORDER BY UPPER(res_partner.name)'''
arg_list = (self.env.company.id,) + arg_list
cr.execute(query, arg_list)

I would prefer variable substitute using name for your query, which will make this more understandable. For example:
branch_id = self.env.user.branch_id.id
query = '''
SELECT DISTINCT l.partner_id, res_partner.name AS name, UPPER(res_partner.name) AS UPNAME, CASE WHEN prop.value_text IS NULL THEN 'normal' ELSE prop.value_text END AS trust
FROM account_move_line AS l
LEFT JOIN res_partner ON l.partner_id = res_partner.id
LEFT JOIN ir_property prop ON (prop.res_id = 'res.partner,'||res_partner.id AND prop.name='trust' AND prop.company_id=%(company_id)s),
account_account, account_move am
WHERE (l.account_id = account_account.id)
AND (l.move_id = am.id)
AND (am.state IN %(state)s)
AND (account_account.internal_type IN %(internal_types)s
AND (
l.reconciled IS NOT TRUE
OR l.id IN(
SELECT credit_move_id FROM account_partial_reconcile where max_date > %(max_date)s
UNION ALL
SELECT debit_move_id FROM account_partial_reconcile where max_date > %(max_date)s
)
)
''' + partner_clause + '''
AND (l.date <= %(date)s)
AND (l.branch_id = %(branch_id)s)
AND l.company_id IN %(company_id)s
ORDER BY UPPER(res_partner.name)'''
args = {
'company_id': ...,
'internal_types': ...,
'max_date': ...,
'date': ...,
'branch_id': ...,
}
cr.execute(query, args)

Related

DAG giving error now but no there was no error 2-3 days earlier

I have created a DAG and passed a sql file which has many queries and it got triggered successfully for 2-3 days and when I triggered today, it's getting failed but I have not changed anything.I am unable to understand what's the issue. Can someone please help me and also please let me know how I can schedule my DAG at 3 am PST daily. My sql file is :-
DECLARE idx, col_cnt, row_cnt, idx_row INT64;
DECLARE col_name, col_flag STRING;
DECLARE cmp_cond,lookup_query, lookup_query_row STRING;
DECLARE col_list ARRAY <STRING>;
DECLARE is_required BOOLEAN;
DECLARE event_names_len, valid_values_len INT64;
DECLARE logic_based_fields STRING; -- this varible is used to hard-coded the rules that are not in the lookup table
DECLARE current_event_date STRING DEFAULT CONCAT("'",CAST(DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY) AS STRING),"'");
-- Re-create temp table to get invalid flags fields from base and lookup tables
CREATE OR REPLACE TABLE `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp` AS
SELECT
base.column_name,
base.column_flag,
base.required_field_flag,
base.event_names,
base.valid_values,
base.field_name,
base.__row_number,
DENSE_RANK() OVER(PARTITION BY base.column_name ORDER BY base.__row_number) AS field_rank
FROM
(
SELECT
bc.column_name,
vlk.field_name,
bc.column_flag,
vlk.required_field_flag,
vlk.event_names,
vlk.valid_values,
ROW_NUMBER() OVER() AS __row_number
FROM
(SELECT
column_name as column_flag,
SUBSTR (column_name, 1, INSTR(column_name, 'is_invalid')-2) column_name
FROM
`st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.INFORMATION_SCHEMA.COLUMNS`
WHERE
table_name = 'st_vix_ott_dev_dq_monitoring_base_test'
AND column_name LIKE "%is_invalid%"
) bc
INNER JOIN `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_valid_values_lookup_test` vlk
ON bc.column_name = vlk.field_name
ORDER BY 1
) base
ORDER BY base.__row_number;
--SELECT * FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`;
-- Set control variables
SET col_cnt = (SELECT COUNT (*)
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`);
SET idx = 1;
SET lookup_query = '';
--build case statements dynamically based on valid values on the lookup table
WHILE idx <= col_cnt DO
SET (col_flag,col_name) = (
SELECT AS STRUCT column_flag,column_name
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE __row_number = idx
);
SET row_cnt = (SELECT count(*)
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE IFNULL(field_name,'') = col_name );
IF row_cnt = 1 THEN
--Check if event_names & valid_values arrays are not empty
SET (event_names_len, valid_values_len, is_required) = (
SELECT AS STRUCT ARRAY_LENGTH(event_names),ARRAY_LENGTH(valid_values), required_field_flag
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE field_name = col_name
);
--Check event_name + field_name is required + field name has invalid values
IF (is_required AND event_names_len > 0 AND valid_values_len > 0) THEN
SET cmp_cond = (
SELECT CONCAT (",CASE WHEN REGEXP_CONTAINS(LOWER(event_name), '^(",LOWER(ARRAY_TO_STRING(event_names,"|")),
")') AND (IFNULL(",col_name,",'') = '' OR LOWER(CAST(", LOWER(col_name), " AS STRING)) NOT IN ('", LOWER(ARRAY_TO_STRING (valid_values, "', '")),
"')) THEN true ELSE false END AS ",col_flag)
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE field_name = col_name
);
SET lookup_query = CONCAT(lookup_query, cmp_cond);
--Check event_name + field_name is required
ELSEIF (is_required AND event_names_len > 0 AND valid_values_len = 0) THEN
SET cmp_cond = (
SELECT CONCAT (",CASE WHEN REGEXP_CONTAINS(LOWER(event_name), '^(",LOWER(ARRAY_TO_STRING(event_names,"|")),")') AND IFNULL("
,col_name,",'') = '' THEN true ELSE false END AS ",col_flag)
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE field_name = col_name
);
SET lookup_query = CONCAT(lookup_query, cmp_cond);
--Check field_name is required
ELSEIF (is_required AND event_names_len = 0 AND valid_values_len = 0) THEN
SET cmp_cond = (
SELECT CONCAT (",CASE WHEN IFNULL(", col_name, ",'') = '' THEN true ELSE false END AS ",col_flag)
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE field_name = col_name
);
SET lookup_query = CONCAT(lookup_query, cmp_cond);
END IF;
-- field_name with multiple rows
ELSEIF row_cnt > 1 THEN
SET idx_row = 1;
SET lookup_query_row = '';
WHILE idx_row <= row_cnt DO
--Check if event_names & valid_values arrays are not empty
SET (event_names_len, valid_values_len, is_required) = (
SELECT AS STRUCT ARRAY_LENGTH(event_names),ARRAY_LENGTH(valid_values), required_field_flag
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE column_name = col_name and field_rank = idx_row
);
--Check event_name + field_name is required + field name has invalid values
IF (is_required AND event_names_len > 0 AND valid_values_len > 0) THEN
SET cmp_cond = (
SELECT CONCAT ("WHEN REGEXP_CONTAINS(LOWER(event_name), '^(",LOWER(ARRAY_TO_STRING(event_names,"|")),
")') AND (IFNULL(",col_name,",'') = '' OR LOWER(CAST(", LOWER(col_name), " AS STRING)) NOT IN ('", LOWER(ARRAY_TO_STRING (valid_values, "', '")),
"')) THEN true ")
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`
WHERE column_name = col_name and field_rank = idx_row
);
SET lookup_query_row = CONCAT(lookup_query_row, cmp_cond);
END IF;
SET idx_row = idx_row + 1;
END WHILE;
SET lookup_query = CONCAT(lookup_query,",CASE ", lookup_query_row, "ELSE false END AS ", col_flag);
SET idx = idx + row_cnt - 1; -- increment to go to the next field
ELSE
SET cmp_cond = CONCAT(",NULL AS ", col_flag);
SET lookup_query = CONCAT(lookup_query, cmp_cond);
END IF;
SET idx = idx + 1; --counter main while loop
END WHILE;
-- This is a workaround due to BQ's dynamic SQL limitations with nested CASE statements
-- These fields aren't in the valid values lookup table
SET logic_based_fields = (SELECT """
,CASE
WHEN LOWER(event_name) LIKE '%video%' AND IFNULL(video_id_channel_id_sports_event_id,'') = '' THEN true
ELSE false END AS video_id_channel_id_sports_event_id_is_invalid_flag
,CASE
WHEN LOWER(event_name) LIKE '%video%'
AND ((IFNULL(navigation_section,'') ='' AND is_epg IS NOT NULL)
OR (is_epg IS NULL AND IFNULL(navigation_section,'') <>'')
OR (is_epg = TRUE AND IFNULL(epg_category,'') = '')) THEN true
ELSE false END AS client_path_sensitive_properties_is_invalid_flag
,CASE
WHEN LOWER(event_name) = 'video content playing'
AND (video_heartbeat_value IS NULL OR video_heartbeat_value > 60 OR video_heartbeat_value <= 0) THEN TRUE
ELSE FALSE END AS video_heartbeat_value_is_invalid_flag
,CASE WHEN LOWER(event_name) LIKE '%video%' THEN 1 ELSE 0 END AS video_event_flag
""");
-- Dynamic SQL to create temp table that will be use to insert into base table and invalid values table
EXECUTE IMMEDIATE format("""
CREATE OR REPLACE TABLE `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_temp`
AS
SELECT
event_date
,anonymous_id
,sl.context_segment_source AS platform_name
,os_version
,event_id
,event_name
,event_type
,stream_type
,session_id
,stream_id
,ip
,navigation_section
,is_epg
,epg_category
,screen_id
,screen_title
,screen_type
,video_content_vertical
,video_genres_first
,video_id_channel_id_sports_event_id
,video_id
,channel_id
,sports_event_id
,video_is_kids
,video_player_mode
,video_title
,video_type
,video_heartbeat_value
,CASE WHEN event_name = 'Video Content Started' THEN true ELSE false END AS event_is_video_start_flag
%s
%s
FROM (
SELECT
context_protocols_source_id,
DATE(original_timestamp) AS event_date,
id AS event_id,
original_event_name AS event_name,
original_event_type AS event_type,
context_ip AS ip,
anonymous_id,
user_id,
COALESCE(session_id,
context_screen_properties_session_id) AS session_id,
screen_id,
screen_title,
screen_type,
stream_id,
stream_type,
video_id,
video_type,
video_title,
video_genres_first,
video_content_vertical,
video_is_kids,
video_player_mode,
video_heartbeat_value,
channel_id,
sports_event_id,
COALESCE(COALESCE(channel_id,video_id),sports_event_id) AS video_id_channel_id_sports_event_id,
is_epg,
epg_category_id,
epg_category,
navigation_section,
context_os_version AS os_version,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY loaded_at DESC) AS __row_number
FROM
`st-vix-ott-dev.vix_collapsed_events_dev.master_event`
WHERE
DATE(_PARTITIONTIME) = %s
AND DATE(original_timestamp) = %s
) AS mev
LEFT JOIN
`st-vix-ott-dev.st_vix_ott_dev_us_data_master_dataset.st_vix_ott_dev_data_segment_lookup_table` sl
ON
mev.context_protocols_source_id = sl.context_protocols_source_id
WHERE mev.__row_number = 1
""",
logic_based_fields,
lookup_query,
current_event_date,
current_event_date
);
--Insert into the base and invalid values tables
IF (SELECT COUNT(*) FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_temp`) > 0 THEN
--Delete current event date data to handle multiple runs in the same day
EXECUTE IMMEDIATE format("""
DELETE
FROM
`st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_test`
WHERE event_date = %s;
""",
current_event_date
);
EXECUTE IMMEDIATE format("""
DELETE
FROM
`st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_invalid_values_test`
WHERE event_date = %s;
""",
current_event_date
);
--Insert into base table
INSERT INTO `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_test`
(
event_date,
anonymous_id,
platform_name,
os_version,
event_id,
event_name,
event_type,
stream_type,
session_id,
stream_id,
ip,
navigation_section,
is_epg,
epg_category,
screen_id,
screen_title,
screen_type,
video_content_vertical,
video_genres_first,
video_id_channel_id_sports_event_id,
video_id,
channel_id,
sports_event_id,
video_is_kids,
video_player_mode,
video_type,
anonymous_id_is_invalid_flag,
client_path_sensitive_properties_is_invalid_flag,
event_is_video_start_flag,
ip_is_invalid_flag,
screen_id_is_invalid_flag,
screen_title_is_invalid_flag,
screen_type_is_invalid_flag,
session_id_is_invalid_flag,
stream_id_is_invalid_flag,
stream_type_is_invalid_flag,
video_heartbeat_value,
video_content_vertical_is_invalid_flag,
video_genres_first_is_invalid_flag,
video_heartbeat_value_is_invalid_flag,
video_id_channel_id_sports_event_id_is_invalid_flag,
video_is_kids_is_invalid_flag,
video_player_mode_is_invalid_flag,
video_type_is_invalid_flag,
video_event_flag,
created_datetime
)
SELECT
event_date,
anonymous_id,
platform_name,
os_version,
event_id,
event_name,
event_type,
stream_type,
session_id,
stream_id,
ip,
navigation_section,
is_epg,
epg_category,
screen_id,
screen_title,
screen_type,
video_content_vertical,
video_genres_first,
video_id_channel_id_sports_event_id,
video_id,
channel_id,
sports_event_id,
video_is_kids,
video_player_mode,
video_type,
anonymous_id_is_invalid_flag,
client_path_sensitive_properties_is_invalid_flag,
event_is_video_start_flag,
ip_is_invalid_flag,
screen_id_is_invalid_flag,
screen_title_is_invalid_flag,
screen_type_is_invalid_flag,
session_id_is_invalid_flag,
stream_id_is_invalid_flag,
stream_type_is_invalid_flag,
video_heartbeat_value,
video_content_vertical_is_invalid_flag,
video_genres_first_is_invalid_flag,
video_heartbeat_value_is_invalid_flag,
video_id_channel_id_sports_event_id_is_invalid_flag,
video_is_kids_is_invalid_flag,
video_player_mode_is_invalid_flag,
video_type_is_invalid_flag,
video_event_flag,
CURRENT_DATETIME()
FROM `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_temp`;
--Insert into invalid value_values tables
INSERT INTO `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_invalid_values_test`
(
event_date,
anonymous_id,
platform_name,
os_version,
event_name,
event_type,
event_invalid_values,
created_datetime
)
WITH cte_invalid
AS
(
SELECT
event_date,
anonymous_id,
platform_name,
os_version,
event_id,
event_name,
event_type,
CASE WHEN video_id_channel_id_sports_event_id_is_invalid_flag THEN video_id_channel_id_sports_event_id ELSE 'valid' END AS video_id_channel_id_sports_event_id,
CASE WHEN video_heartbeat_value_is_invalid_flag THEN cast(video_heartbeat_value as string) ELSE 'valid' END AS video_heartbeat_value,
CASE WHEN ip_is_invalid_flag THEN ip ELSE 'valid' END AS ip,
CASE WHEN screen_id_is_invalid_flag THEN screen_id ELSE 'valid' END AS screen_id,
CASE WHEN screen_title_is_invalid_flag THEN screen_title ELSE 'valid' END AS screen_title,
CASE WHEN screen_type_is_invalid_flag THEN screen_type ELSE 'valid' END AS screen_type,
CASE WHEN session_id_is_invalid_flag THEN session_id ELSE 'valid' END AS session_id,
CASE WHEN stream_id_is_invalid_flag THEN stream_id ELSE 'valid' END AS stream_id,
CASE WHEN stream_type_is_invalid_flag THEN stream_type ELSE 'valid' END AS stream_type,
CASE WHEN video_content_vertical_is_invalid_flag THEN video_content_vertical ELSE 'valid' END AS video_content_vertical,
CASE WHEN video_genres_first_is_invalid_flag THEN video_genres_first ELSE 'valid' END AS video_genres_first,
CASE WHEN video_is_kids_is_invalid_flag THEN video_is_kids ELSE 'valid' END AS video_is_kids,
CASE WHEN video_player_mode_is_invalid_flag THEN video_player_mode ELSE 'valid' END AS video_player_mode,
CASE WHEN video_type_is_invalid_flag THEN video_type ELSE 'valid' END AS video_type,
CASE WHEN client_path_sensitive_properties_is_invalid_flag THEN navigation_section ELSE 'valid' END AS navigation_section,
CASE WHEN client_path_sensitive_properties_is_invalid_flag THEN CAST(is_epg AS STRING) ELSE 'valid' END AS is_epg,
CASE WHEN client_path_sensitive_properties_is_invalid_flag THEN epg_category ELSE 'valid' END AS epg_category
FROM
`st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_temp`
WHERE
video_id_channel_id_sports_event_id_is_invalid_flag
OR client_path_sensitive_properties_is_invalid_flag
OR video_heartbeat_value_is_invalid_flag
OR anonymous_id_is_invalid_flag
OR ip_is_invalid_flag
OR screen_id_is_invalid_flag
OR screen_title_is_invalid_flag
OR screen_type_is_invalid_flag
OR session_id_is_invalid_flag
OR stream_id_is_invalid_flag
OR stream_type_is_invalid_flag
OR video_content_vertical_is_invalid_flag
OR video_genres_first_is_invalid_flag
OR video_is_kids_is_invalid_flag
OR video_player_mode_is_invalid_flag
OR video_type_is_invalid_flag
),
cte_invalid_agg
AS
(SELECT
event_date,
anonymous_id,
platform_name,
os_version,
event_name,
event_type,
event_id,
ARRAY_AGG(STRUCT(field_name,invalid_field_value)) AS invalid_field_value,
FROM
(
SELECT
*
FROM
cte_invalid
) sl
UNPIVOT INCLUDE NULLS
(
invalid_field_value FOR field_name IN (video_id_channel_id_sports_event_id,
video_heartbeat_value,
ip,
screen_id,
screen_title,
screen_type,
session_id,
stream_id,
stream_type,
video_content_vertical,
video_genres_first,
video_is_kids,
video_player_mode,
video_type,
navigation_section,
is_epg,
epg_category
)
)
WHERE IFNULL(invalid_field_value,'') <> 'valid'
GROUP BY
event_date,
anonymous_id,
platform_name,
os_version,
event_name,
event_type,
event_id
)
SELECT
event_date,
anonymous_id,
platform_name,
os_version,
event_name,
event_type,
ARRAY_AGG(STRUCT(event_id, invalid_field_value)) AS event_invalid_values,
CURRENT_DATETIME()
FROM cte_invalid_agg
GROUP BY
event_date,
anonymous_id,
platform_name,
os_version,
event_name,
event_type;
--Drop temp tables
DROP TABLE `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_temp`;
DROP TABLE `st-vix-ott-dev.st_vix_ott_dev_us_data_dq_quality_checks.st_vix_ott_dev_dq_monitoring_base_fields_temp`;
END IF
The DAG which I have created is :-
import datetime
import os
import logging
from airflow import DAG
from airflow.providers.google.cloud.operators.bigquery import BigQueryInsertJobOperator
from composer_plugins import get_query_content
# Environments variables
event_collection_project_id = os.environ["EVENT_COLLECTION_PROJECT_ID"]
sql_scripts_folder = os.environ["SQL_SCRIPTS_FOLDER"]
QA_CHECK_QUERY= "DMLs/data_qa_checks/DQ_check_base_table_new.sql"
yesterday = datetime.datetime.combine(
datetime.datetime.today() - datetime.timedelta(1), datetime.datetime.min.time()
)
insert_data_from_sql_file = get_query_content(
sql_scripts_folder,QA_CHECK_QUERY
)
logging.info(f"query: {insert_data_from_sql_file}")
default_dag_args = {
# Setting start date as yesterday starts the DAG immediately when it is
# detected in the Cloud Storage bucket.
"start_date": yesterday,
# To email on failure or retry set 'email' arg to your email and enable
# emailing here.
"email_on_failure": False,
"email_on_retry": False,
# If a task fails, retry it once after waiting at least what's specified in retry_delay
"retries": 1,
"retry_delay": datetime.timedelta(seconds=10),
"project_id": event_collection_project_id,
}
with DAG(
dag_id="data_qa_checks",
schedule_interval=None,
default_args=default_dag_args,
) as dag:
# call the query that will insert the data from sql file and it will do the operations which are mentioned in the query
DQ_dml = BigQueryInsertJobOperator(
task_id="DQ_dml",
job_id="{{ ts_nodash }}-DQ_dml",
configuration={
"query": {
"query": insert_data_from_sql_file,
"useLegacySql": "False",
},
},
dag=dag,
)
Image which shows my DAG got triggered but now it's failing :-
Error :-
google.api_core.exceptions.BadRequest: 400 Query error: No matching signature for function ARRAY_LENGTH for argument types: STRING. Supported signature: ARRAY_LENGTH(ARRAY) at [70:30]
Location: US
Job ID: 20220617T045920-DQ_dml_6a48df549274df9c428e14be75b52d31
[2022-06-17 04:59:49,628] {taskinstance.py:1511} INFO - Marking task as FAILED. dag_id=data_qa_checks, task_id=DQ_dml, execution_date=20220617T045920, start_date=20220617T045943, end_date=20220617T045949
[2022-06-17 04:59:49,838] {local_task_job.py:151} INFO - Task exited with return code 1
[2022-06-17 04:59:49,890] {local_task_job.py:261} INFO - 0 downstream tasks scheduled from follow-on schedule check
There was no error when it was getting triggered :-
Can someone please help me for the same.

parallel sql query in bigquery (mix legacy and standard) running on metadata schema. (each row is a different table/column)

df_table contains meta data for the some list of columns with information like: table_schema, table_name and column_name.
for each column in column_name, I would like to calculate entropy (bits) , shannon_entropy and count of values.
the following code works good in python, but it is not parallel.
I wonder if more efficient way to run this :
job_config_True = bigquery.QueryJobConfig(use_legacy_sql=True)
job_config_False = bigquery.QueryJobConfig(use_legacy_sql=False)
for i,j in df_table[df_table['shannon_entropy'].isna()].iterrows():
try:
table_schema = (j['table_schema'])
table_name = (j['table_name'])
column_name = (j['column_name'])
q1 = f'''select -sum(p*log2(p)) as shannon_entropy from (
select RATIO_TO_REPORT(c) over() p from (
select {column_name}, count(*) c FROM {table_schema}.{table_name} group by 1))
'''
query_job = bqclient.query(q1, job_config=job_config_True) # Make an API request.
shannon_entropy = query_job.result().to_dataframe()['shannon_entropy'][0]
except:
shannon_entropy = np.nan
pass
q = f'''UPDATE `myproject.info_tabels_all` t1
set t1.entropy =t2.entropy ,t1.values = t2.total , t1.unique = t2.distinct_total , t1.shannon_entropy = {shannon_entropy}
from (
SELECT
LOG(2, COUNT(DISTINCT {column_name})) as entropy,
count({column_name}) as total,
COUNT(DISTINCT {column_name}) as distinct_total
FROM `datateam-248616.{table_schema}.{table_name}` ) t2
where table_schema = '{table_schema}' and table_name = '{table_name}' and column_name = '{column_name}'
'''
print( table_name , shannon_entropy)
query_job = bqclient.query(q, job_config_False) # Make an API request.
I used this code in the process :
BigQuery: compute entropy of a column

Not able to fetch records from DB through python by using Parameterized Queries

The above function has parameters endTime, startTime, list1 and column_filter to it and I am trying to read a query by making the WHERE clause conditions parameterized.
endT = endTime
startT = startTime
myList = ",".join("'" + str(i) + "'" for i in list1)
queryArgs = {'db': devDB,
'schema': dbo,
'table': table_xyz,
'columns': ','.join(column_filter)}
query = '''
WITH TIME_SERIES AS
(SELECT ROW_NUMBER() OVER (PARTITION BY LocId ORDER BY Created_Time DESC) RANK, {columns}
from {schema}.{table}
WHERE s_no in ? AND
StartTime >= ? AND
EndTime <= ? )
SELECT {columns} FROM TIME_SERIES WHERE RANK = 1
'''.format(**queryArgs)
args = (myList, startT, endT)
return self.read(query, args)
The below is my read which connects to the DB to fetch records and a condition is also added to check if its parameterized or not.
def read(self, query, parameterValues = None):
cursor = self.connect(cursor=True)
if parameterValues is not None:
rows = cursor.execute(query, parameterValues)
else:
rows = cursor.execute(query)
df = pd.DataFrame.from_records(rows.fetchall())
if len(df.columns) > 0:
df.columns = [x[0] for x in cursor.description]
cursor.close()
return df
The query args are getting picked up but not the parameterized values. In my case, it is going inside the read method with parameter values of (myList, startT ,endT) as a tuple. The query in WHERE clause remains unchanged (parameters not able to replace ? ), and as a result I am not able to fetch any records. Can you specify where I might be going wrong?

How to pass list values to SQL Select query?

SQL query:
Select *
from table_name
where ID in (123)
and date in (Select max(date)
from table_name
where ID in (123))
I want to pass below mentioned list values one at time in above SQL query and collect results for each ID in list
Package: cx_Oracle
My try:
import cx_oracle
List= {123, 234,345,....}
List1 = []
query = " Select * from table_name where ID in (%s)
and date in (Select max(date) from table_name where ID in (%s))"
for j in List:
cursor1 = db_ora.cursor()
tb = cursor1.execute(query, params= List )
for i in tb:
List1.append(i)
Thank you in advance, let me know if you need more details from my side
If you want to keep it similar to your original code, you can use string formatting
Python 2
import cx_oracle
List= [123, 234,345,....]
List1 = []
masterQuery = " Select * from table_name where ID in (%s)
and date in (Select max(date) from table_name where ID in (%s))"
for j in List:
cursor1 = db_ora.cursor()
newQuery = masterQuery % (j, j)
tb = cursor1.execute(newQuery)
for i in tb:
List1.append(i)
Python 3
import cx_oracle
List= [123, 234,345,....]
List1 = []
masterQuery = " Select * from table_name where ID in {}
and date in (Select max(date) from table_name where ID in {})"
for j in List:
cursor1 = db_ora.cursor()
newQuery = masterQuery.format(j, j)
tb = cursor1.execute(newQuery)
for i in tb:
List1.append(i)
As far as I can tell, Oracle won't accept such a list as a valid parameter. Either store that list of values into a separate table and use it as a source for your query, such as
and t.date in (select max(t1.date) from table_name t1
where t1.id in (select st.id from some_table st)
)
or, if possible, split that comma-separated-values string into rows, e.g.
and t.date in (select max(t1.date) from table_name t1
where t1.id in (select regexp_substr(%s, '[^,]+', 1, level)
from dual
connect by level <= regexp_count(%s, ',') + 1
)
)
Also, I'd suggest you to precede column names with table aliases to avoid possible confusion.

How can I write this SQL query in SQLAlchemy?

I wrote the following SQL query. How can I do the same thing in SQLAlchemy?
SELECT
T.campaign_id,
T.spend,
T.id
FROM activity_log T
WHERE T.end_time = (
SELECT MAX( T1.end_time ) FROM activity_log T1
WHERE T1.campaign_id = T.campaign_id and cast(T1.end_time as DATE) = cast(T.end_time as DATE)
);
Below should get you started:
T = aliased(ActivityLog, name="T")
T1 = aliased(ActivityLog, name="T1")
subquery = (
session.query(func.max(T1.end_time).label("end_time"))
.filter(T1.campaign_id == T.campaign_id)
.filter(cast(T1.end_time, Date) == cast(T.end_time, Date))
.correlate(T)
.as_scalar()
)
qry = (
session.query(T.campaign_id, T.spend, T.id)
.filter(T.end_time == subquery)
)

Categories