My json looks like this -
[
{
"Monitor Level":"1",
"Estimate SLA (+30)":"214",
"New Schedule":"",
"Job Name":"\\Job1\\jobgroup",
"Estimated Duration":"183",
"Actual Duration":"184"
},
{
"Monitor Level":"1",
"Estimate SLA (+30)":"179",
"New Schedule":"8:00 PM",
"Job Name":"\\Job2\\jobgroup",
"Estimated Duration":"1349",
"Actual Duration":"1349"
}
]
I run the following code -
for o in json_object:
# get jobid
db = pyodbc.connect('DRIVER={SQL Server};SERVER=dvidbsql01\dev2008;DATABASE=Admiral;UID=Tidal;PWD=tidal97')
cur = db.cursor()
cur.execute("""select jobmst_id from jobmst where jobmst_prntname + '\\' + jobmst_name = ?""", o['Job Name'])
r= cur.fetchall()
print r
And r returns the value I want.
If I use the code I want to however -
sql_jobid = """
select jobmst_id 'Job ID' from jobmst where jobmst_prntname + '\\' + jobmst_name = ?
"""
## DEFINE ENVIRONMENT DATABASES
def db():
if args.environment == 'DEV':
return pyodbc.connect('DRIVER={SQL Server};SERVER=server\instance;DATABASE=db;UID=user;PWD=pass')
## DEFINE UPDATE
def query_db(query, args=(), one=False):
cur = db().cursor()
cur.execute(query, args)
r = [dict((cur.description[i][0], value) \
for i, value in enumerate(row)) for row in cur.fetchall()]
cur.connection.close()
return (r[0] if r else None) if one else r
for o in json_object:
# get jobid
jobid = query_db(sql_jobid, (o['Job Name'][0]))
print jobid
It is not printing the value I want even though it's doing the same thing. even replacing o['Job Name'][0] with 'Job1\jobgroup' still doesn't return anything so it's something with my more pythonic code that seems to not want to parse the Job Name.
In the following line,
jobid = query_db(sql_jobid, (o['Job Name'][0]))
(o['Job Name'][0]) is not a tuple. If you want to pass a tuple, you need to append a trailing comma.
jobid = query_db(sql_jobid, (o['Job Name'][0],))
# ^
Related
The above function has parameters endTime, startTime, list1 and column_filter to it and I am trying to read a query by making the WHERE clause conditions parameterized.
endT = endTime
startT = startTime
myList = ",".join("'" + str(i) + "'" for i in list1)
queryArgs = {'db': devDB,
'schema': dbo,
'table': table_xyz,
'columns': ','.join(column_filter)}
query = '''
WITH TIME_SERIES AS
(SELECT ROW_NUMBER() OVER (PARTITION BY LocId ORDER BY Created_Time DESC) RANK, {columns}
from {schema}.{table}
WHERE s_no in ? AND
StartTime >= ? AND
EndTime <= ? )
SELECT {columns} FROM TIME_SERIES WHERE RANK = 1
'''.format(**queryArgs)
args = (myList, startT, endT)
return self.read(query, args)
The below is my read which connects to the DB to fetch records and a condition is also added to check if its parameterized or not.
def read(self, query, parameterValues = None):
cursor = self.connect(cursor=True)
if parameterValues is not None:
rows = cursor.execute(query, parameterValues)
else:
rows = cursor.execute(query)
df = pd.DataFrame.from_records(rows.fetchall())
if len(df.columns) > 0:
df.columns = [x[0] for x in cursor.description]
cursor.close()
return df
The query args are getting picked up but not the parameterized values. In my case, it is going inside the read method with parameter values of (myList, startT ,endT) as a tuple. The query in WHERE clause remains unchanged (parameters not able to replace ? ), and as a result I am not able to fetch any records. Can you specify where I might be going wrong?
I'm using Python 2 and have the following code:
with conn.cursor() as cursor:
info("Updating {} records".format(len(records_to_update)))
for record in records_to_update:
query = "UPDATE my_table SET "
params_setters = []
# Process all fields except wsid when updating
for index, header in enumerate(DB_COLUMNS_IN_ORDER[1:]):
if record[index] is not None:
params_setters.append("{} = '{}' ".format(header, record[index]))
query += " , ".join(params_setters)
query += " WHERE id = '{}'".format(record[0])
cursor.execute(query)
How can I use query params for escaping here and not have to do it manually in places like:
params_setters.append("{} = '{}' ".format(header, record[index]))
If I understand your question, you want to use a prepared statement. If you are using a driver where %s is used to represent a query parameter (SQLite uses ?), then:
with conn.cursor() as cursor:
info("Updating {} records".format(len(records_to_update)))
params = []
for record in records_to_update:
query = "UPDATE my_table SET "
params_setters = []
# Process all fields except wsid when updating
for index, header in enumerate(DB_COLUMNS_IN_ORDER[1:]):
if record[index] is not None:
params_setters.append("{} = %s ".format(header))
params.append(record[index])
query += " , ".join(params_setters)
query += " WHERE id = %s"
params.append(record[0])
cursor.execute(query, params)
I am trying to query the dataset present in s3 bucket, using Athena query via python script with help of boto3 functions.
I am using start_query_execution() to run my query. this is being executed perfectly, next to get results in my python script, so that I get access to the result of the query I am using the function get_query_results().
Now if I run these two function separately(one script after another) I get the data which is an output of Athena query. I want them to be written in a single script - something like, fetch data from s3 and start manipulating the output of query using python code.
Since the query is asyn in nature, i am using pool technique, where it waits till the Athena query is executed. But if i run the below codethe, the status show is running for the query.
I think I am doing some silly mistake as if I run them separately I get desired output. In short, I want to query the data present in s3 using Athena, then do some processing on this fetched data in python script, hence this approach. Please help
Here is the sample code
#!/usr/bin/env python3
import boto3
import time
from functools import partial
from multiprocessing.dummy import Pool
pool = Pool(processes=1)
# def async_function(name):
# time.sleep(1)
# return name
#
# def callback_function(name, age):
# print(name, age)
def run_query(query, database, s3_output):
client = boto3.client('athena')
response = client.start_query_execution(
QueryString=query,
QueryExecutionContext={
'Database': database
},
ResultConfiguration={
'OutputLocation': s3_output,
}
)
print('Execution ID: ' + response['QueryExecutionId'])
return response
def show_res(res, q):
client = boto3.client('athena')
print("Executing query: %s" % (q))
print('Execution ID: ' + res['QueryExecutionId'])
# response = client.stop_query_execution(
# QueryExecutionId=res['QueryExecutionId']
# )
response = client.get_query_results(
# QueryExecutionId='f3642735-d9d9-4246-ade4-7453eaed0717'
QueryExecutionId=res['QueryExecutionId']
)
print("Executing query: %s" % (q))
print('Execution ID: ' + res['QueryExecutionId'])
print('rRespone:'.join(str(x) for x in response['ResultSet']['Rows']));
return response
# for age, name in enumerate(['jack', 'jill', 'james']):
# new_callback_function = partial(callback_function, age=age)
# pool.apply_async(
# async_function,
# args=[name],
# callback=new_callback_function
# )
#Athena configuration
s3_input = 's3://dummy/'
s3_ouput = 's3://dummy/results/'
database = 'dummy'
table = 'dummy'
#Query definitions
query_1 = "SELECT * FROM %s.%s where sex = 'F';" % (database, table)
query_2 = "SELECT * FROM %s.%s where age > 30;" % (database, table)
#Execute all queries
queries = [ query_1 ]
for q in queries:
print("Executing query: %s" % (q))
new_callback_function = partial(show_res, q=q)
pool.apply_async(
run_query,
args=[q, database, s3_ouput],
callback=new_callback_function
)
pool.close()
pool.join()
Instead of use apply_async try with:
pool = Pool(cores)
df = pd.concat(pool.map(func, [value_1,...,value_n]))
pool.close()
pool.join()
I wrote you my code that it works great and I expect you can reuse some lines. Basically, I run multiples queries in Athena at the "same" time (I parallelized the array named endpoints), and I store each result in a row of a Pandas dataframe. Also, you can fetch data for each query and I added a status print then you can see the status of each query. Remember that Athena has a limit of queries that you can run concurrently.
import time
import boto3
import pandas as pd
from multiprocessing import Pool
class QueryAthena:
def __init__(self, endpoint, init_date, end_date):
self.s3_input = 's3://my_bucket/input'
self.s3_output = 's3://my_bucket/output'
self.database = 'datalake'
self.table = 'my_table'
self.endpoint = "'" + endpoint + "'"
self.init_date = "'" + init_date + "'"
self.end_date = "'" + end_date + "'"
self.year = self.init_date[1:5]
self.month = self.init_date[6:8]
self.day = self.init_date[9:11]
self.region_name = 'us-east-1'
self.aws_access_key_id = "my_id"
self.aws_secret_access_key = "my_key"
def load_conf(self, q):
self.client = boto3.client('athena',
region_name = self.region_name,
aws_access_key_id = self.aws_access_key_id,
aws_secret_access_key= self.aws_secret_access_key)
try:
response = self.client.start_query_execution(
QueryString = q,
QueryExecutionContext={
'Database': self.database
},
ResultConfiguration={
'OutputLocation': self.s3_output,
}
)
print('Execution ID: ' + response['QueryExecutionId'])
except Exception as e:
print(e)
return response
def query(self):
self.query = "SELECT count(*) as total_requests, SUM(CASE WHEN count_endpoints > 1 THEN 1 ELSE 0 END) as total_repeated, AVG(CASE WHEN count_endpoints > 1 THEN count_endpoints END) as TRAFFIC_QUALITY FROM (SELECT * from (SELECT domain, size, device_id, ip, array_join(array_agg(distinct endpoint), ',') as endpoints_all, count(distinct endpoint) as count_endpoints FROM %s.%s WHERE year=%s and month=%s and day=%s and ts between timestamp %s and timestamp %s and status = '2' GROUP BY domain, size, device_id, ip) l1 where endpoints_all LIKE '%%' || %s || '%%') l2;" % (self.database, self.table, self.year, self.month, self.day, self.init_date, self.end_date, self.endpoint)
def run_query(self):
self.query()
queries = [self.query]
for q in queries:
#print("Executing query: %s" % (q))
res = self.load_conf(q)
try:
query_status = None
while query_status == 'QUEUED' or query_status == 'RUNNING' or query_status is None:
query_status = self.client.get_query_execution(QueryExecutionId=res["QueryExecutionId"])['QueryExecution']['Status']['State']
print(query_status + " " + self.endpoint)
if query_status == 'FAILED' or query_status == 'CANCELLED':
raise Exception('Athena query with the string "{}" failed or was cancelled'.format(query_string))
time.sleep(20)
print("Query %s finished." % (self.endpoint))
response = self.client.get_query_results(QueryExecutionId=res['QueryExecutionId'])
df = self.results_to_df(response)
df = pd.DataFrame(df)
df["endpoint"] = str(self.endpoint)
try:
df["percentaje_repeated"] = str(int(df["total_repeated"].iloc[0]) * 100 / int(df["total_requests"].iloc[0]))
except Exception as e:
print(self.endpoint + " here")
df["date"] = str(self.init_date + "-" + self.end_date)
return df
except Exception as e:
print(e + " " + endpoint)
print(df["total_repeated"].iloc[0])
print(df["total_requests"].iloc[0])
def results_to_df(self, results):
columns = [
col['Label']
for col in results['ResultSet']['ResultSetMetadata']['ColumnInfo']
]
listed_results = []
for res in results['ResultSet']['Rows'][1:]:
values = []
for field in res['Data']:
try:
values.append(list(field.values())[0])
except:
values.append(list(' '))
listed_results.append(
dict(zip(columns, values))
)
return listed_results
def func(end):
qa = QueryAthena(end, "2018-10-09 00:00:00", "2018-10-09 05:59:59")
result = qa.run_query()
return result
endpoints = ["677SRI149821","V14509674","1426R"]
if __name__ == '__main__':
pool = Pool(15)
df = pd.concat(pool.map(func, endpoints))
pool.close()
pool.join()
my code :
do_nomor = request.args.get('do_nomor', '')
bjdt_no_urut = request.args.get('bjdt_no_urut', '')
try:
SQL = """ SELECT
bjdt_id,
do_nomor,
to_char(do_tgl, 'DD/MM/YYYY' ) as do_tgl,
bjdt_no_urut,
bjqc_nomor,
to_char(bjqc_tgl_buat, 'DD/MM/YYYY' ) as bjqc_tgl_buat,
pelanggan_nama,
pry_nama,
to_char(tgl_rencana_test, 'DD/MM/YYYY' ) as tgl_rencana_test
from v_bendauji_detil where
bjdt_tgl_test is null and
do_nomor = %s
and bjdt_no_urut not in (%s)
ORDER BY bjdt_no_urut ASC limit 10;
"""
conn_string = "dbname='api_rc_38' user='appusr' host='localhost' password='1' port='8765' "
conn = psycopg2.connect(conn_string)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
data = ( do_nomor, bjdt_no_urut, )
cur.execute(SQL, data)
rows = cur.fetchone()
cur.close()
conn.close()
json_data = json.dumps(rows)
return json_data
How can I passing variable to url in dictionary with flask on GET method ?
like this example :
http://localhost:5000/bendauji?do_nomor=DOCKET/ICN/05-2017/5175&bjdt_no_urut=('5','6','7',)*
You can use this format of url http://localhost:5000/bendauji?do_nomor=DOCKET/ICN/05-2017/5175&bjdt_no_urut=5&bjdt_no_urut=6&bjdt_no_urut=7
And get the value list of bjdt_no_urut use request.args.getlist('bjdt_no_urut', None).
this is a part of my source code that return not true
query = ""
for i in range(len(values)):
if type(values.values()[i]) is str:
query += "'" + str(values.values()[i]) + "', "
else:
query += str(values.values()[i]) + ", "
when I use
values = {'Date': '2014-08-09 07:12:40', 'Ip': '127.0.0.1', 'MembershipID': 1}
query is
"\\\'2014-08-09 07:12:40\\\', \\\'127.0.0.1\\\', 1, "
instead of
"'2014-08-09 07:12:40', '127.0.0.1', 1, "
how can I fix this?
Do not try to quote SQL parameters yourself. Instead, leave this to the database adapter; it can do it more efficiently, always correctly and helps make database query parsing more efficient.
For MySQL you can use named parameters in the form of %(name)s and pass in your dictionary as the second argument to cursor.execute():
query = '''\
SELECT * FROM foo
WHERE
date < %(Date)s AND
ip_address = %(Ip)s AND
membership = %(MembershipId)s
'''
cursor.execute(query, values)
for row in cursor:
# ...