Snowflake python-connector; Error 604 when issuing multiple requests - python
I have an Azure Function that sends queries to snowflake using the Python snowflake-connector. It opens a connection, creates a cursor, sends the query and does not check to ensure the query was successful using _no_results=True. When I run it it works fine. However when I use it to run multiple queries at once, some queries are randomly failing with status code 604: Query Execution was canceled. Is there some sort of concurrent limit that I'm hitting? I cannot find any information in the documentation. The queries being sent are very simple (truncate table x) and are not timing out.
My code is attached below.
import logging
import json
import time
import gc
from flatten_json import flatten
import os
import snowflake.connector
import azure.functions as func
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
# Deserialize request body
req_body = req.get_json()
logging.info('Deserialized input successfully. Request body: ' + json.dumps(req_body))
#Create result JSON to be returned to caller
result = {
"TaskName":req_body['TaskName'],
"Status":"Complete",
"TaskKey": req_body['TaskKey'],
"Query_ID":"",
"Session_ID":""
}
#Create the Snowflake parameters for connection
USER = <sfusername>
PASSWD = <sfpw>
ACCOUNT = <sfAcc>
WAREHOUSE = <sfwh>
DATABASE = <sfdb>
logging.info('Connection string created')
copy_sql_statement = create_sql_statement(req_body)
logging.info('Insert SQL Statement: ' + copy_sql_statement)
logging.info('Attempting to Connect to Snowflake...')
try:
# Try to connect to Snowflake
connection = snowflake.connector.connect(user=USER, password=PASSWD, account=ACCOUNT, warehouse=WAREHOUSE, database=DATABASE)
logging.info('Connection Successful')
except Exception as e:
raise e
logging.info('Try block for query_snowflake started.')
try:
# Call function to execute copy into
output_list = query_snowflake(req_body, connection, copy_sql_statement) #return queryid and sessionid from Snowflake
queryid = output_list[0]
sessionid = output_list[1]
result['Query_ID']= queryid
result['Session_ID']= sessionid
logging.info('Query sent to Snowflake Successfully.')
return func.HttpResponse(json.dumps(result), status_code = 200)
except Exception as e:
result['Status'] = 'Failed'
result['Error_Message'] = str(e)
logging.info('Copy Into function failed. Error: ' + str(e))
return func.HttpResponse(json.dumps(result),
status_code=400)
def create_sql_statement(req_body):
# Replace TaskKey and CDCMin
copy_sql_statement = req_body['InsertSQL'].replace('#TaskKey', req_body['TaskKey']).replace('#CDCMinDate', req_body['CDCMinDate']).replace('#CDCMaxDate', req_body['CDCMaxDate'])
return copy_sql_statement
def query_snowflake(req_body, connection, copy_sql_statement):
try:
# Execute copy into statement
cur = connection.cursor()
sessionid = cur.execute("select current_session()").fetchone()
cur.execute(copy_sql_statement, _no_results=True)
#connection.execute('COMMIT;')
return [cur.sfqid, sessionid[0]] #return queryid and sessionid as list for result body
except Exception as e:
raise e
#finally:
# Close and dispose connection
cur.close()
connection.close()
NEW CODE:
import logging
import json
import time
import gc
from flatten_json import flatten
import os
import snowflake.connector
import azure.functions as func
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
# Deserialize request body
req_body = req.get_json()
logging.info('Deserialized input successfully. Request body: ' + json.dumps(req_body))
#Create result JSON to be returned to caller
result = {
"TaskName":req_body['TaskName'],
"Status":"Complete",
"TaskKey": req_body['TaskKey'],
"Query_ID":"",
"Session_ID":"",
"Error_Message":""
}
#Create the Snowflake parameters for connection
USER = <sfusername>
PASSWD = <sfpw>
ACCOUNT = <sfAcc>
WAREHOUSE = <sfwh>
DATABASE = <sfdb>
logging.info('Connection string created')
copy_sql_statement = create_sql_statement(req_body)
logging.info('SQL Statement: ' + copy_sql_statement)
logging.info('Attempting to Connect to Snowflake...')
try:
# Try to connect to Snowflake
connection = snowflake.connector.connect(user=USER, password=PASSWD, account=ACCOUNT, warehouse=WAREHOUSE, database=DATABASE)
logging.info('Connection Successful')
except Exception as e:
raise e
logging.info('Try block for send query started.')
try:
# Call function to execute copy into
logging.info('Sending Query to Snowflake...')
output_list = query_snowflake(req_body, connection, copy_sql_statement) #return queryid and sessionid from Snowflake
queryid = output_list[0]
sessionid = output_list[1]
result['Query_ID']= queryid
result['Session_ID']= sessionid
logging.info('Ensuring Query was Sent...')
status_stmt = create_status_statement(queryid, sessionid)
for x in range(1,14): #it will try for 3.5min in case query is pending
time.sleep(5)
returnValues = get_query_status(status_stmt, connection)
# check result, if error code 604 we know the query canceled.
if returnValues[1] == '604':
result['Status'] = 'Failed'
result['Error_Message'] = 'SQL Execution Canceled'
return func.HttpResponse(json.dumps(result), status_code = 400)
# if its anything but pending, we know the query was sent to snowflake
# 2nd Function worries about the result
elif returnValues[0] != 'PENDING':
result['Status'] = returnValues[0]
logging.info('Query sent to Snowflake Successfully.')
return func.HttpResponse(json.dumps(result), status_code = 200)
else:
logging.info('Loop ' + str(x) + ' completed, trying again...')
time.sleep(10)
#if it exits for loop, mark success, let 2nd function surface any failures.
result['Status'] = 'Success'
return func.HttpResponse(json.dumps(result), status_code = 200)
except Exception as e:
result['Status'] = 'Failed'
result['Error_Message'] = str(e)
logging.info('Copy Into function failed. Error: ' + str(e))
return func.HttpResponse(json.dumps(result),
status_code=400)
def create_sql_statement(req_body):
# Replace TaskKey and CDCMin
copy_sql_statement = req_body['InsertSQL'].replace('#TaskKey', req_body['TaskKey']).replace('#CDCMinDate', req_body['CDCMinDate']).replace('#CDCMaxDate', req_body['CDCMaxDate'])
return copy_sql_statement
def query_snowflake(req_body, connection, copy_sql_statement):
try:
# Execute copy into statement
cur = connection.cursor()
sessionid = cur.execute("select current_session()").fetchone()
cur.execute(copy_sql_statement, _no_results=True)
# return queryid and sessionid as list for result body
return [cur.sfqid, sessionid[0]]
except Exception as e:
raise e
def create_status_statement(queryid, sessionid):
sql_statement = "SELECT execution_status, error_code, query_id \
FROM TABLE(INFORMATION_SCHEMA.QUERY_HISTORY_BY_SESSION(SESSION_ID => " + sessionid + ")) \
WHERE QUERY_ID = '" + queryid + "'"
return sql_statement
def get_query_status(etl_sql_statement, conn):
QueryStatus = ''
ErrorCode = ''
QueryID = ''
try:
#Execute sql statement.
cur = conn.cursor()
Result = cur.execute(etl_sql_statement)
row = Result.fetchone()
if row is None:
ErrorCode = 'PENDING'
else:
QueryStatus = str(row[0])
ErrorCode = str(row[1])
QueryID = str(row[2])
except Exception as e:
logging.info('Failed to get query status. Error: ' + str(e))
raise e
finally:
#Close and dispose cursor
cur.close()
return (QueryStatus, ErrorCode, QueryID)
Related
Boto3 throws exception when uploading file but the file is saved in S3
I need to upload files using boto3 with Flask. I have the following method to upload the files and I want to return the path of the file within S3. utils.py import io import boto3 from app import app s3 = boto3.client( "s3", aws_access_key_id = app.config['S3_KEY'], aws_secret_access_key = app.config['S3_SECRET'] ) def upload_file(file, bucket_name, acl=app.config['AWS_DEFAULT_ACL']): try: s3.upload_fileobj( file, bucket_name, f"Profile_Photos/{file.filename}", ExtraArgs = { "ACL": 'private', "ContentType": file.content_type } ) return "{}{}".format(app.config['S3_LOCATION', file.filename]) except Exception as e: print("Something was wrong: ", e) # This exception is thrown return e Inside the main class I have the following: main.py #app.route('/registro', methods=['POST']) def register(): conn = None cursor = None try: username= request.form.get('user', None) password = request.form.get('password', None) if username and password: hashed_password = hashlib.md5(password.encode()).hexdigest() sql = "INSERT INTO Users(username, password) VALUES (%s, %s)" data = (username, hashed_password) conn = mySQL.connect() cursor = conn.cursor() cursor.execute(sql, data) conn.commit() if 'current_photo' in request.files: file = request.files['current_photo'] if file.filename != '': file_name = os.path.splitext(file.filename)[0] extension = file.filename.split('.')[-1] new_name = "{}_{}.{}".format(file_name, username, extension) file.filename = new_name file.filename = secure_filename(file.filename) print("Before") path = upload_file(file, app.config['S3_BUCKET']) # Error occurs here print("After") res = jsonify('User created.') res.status_code = 200 return res except Exception as e: print(e) return 'Error' if __name__ == '__main__': app.run() The problem is that when executing the code, an exception is always thrown in the upload_file method, however the photo is uploaded to S3. The message doesn't seem very informative: Something was wrong ('S3_LOCATION', 'profile_user2.jpg'). What does that message mean and why is the exception being thrown?
unable to use s3 trigger to transfer s3 objects to rds
I have the lambda function code below that transfers objects from s3 buckets to AWS RDS database. import json import boto3 import pymysql s3_client = boto3.client('s3') def lambda_handler(event, context): bucket_name = event["bucket"] s3_file_name = event["object"] resp = s3_client.get_object(Bucket=bucket_name, Key=s3_file_name) data = resp['Body'] rds_endpoint = "" username = #username for RDS Mysql password = # RDS Mysql password db_name = # RDS MySQL DB name conn = None try: conn = pymysql.connect(host=rds_endpoint, user=username, password=password, database=db_name) except pymysql.MySQLError as e: print("ERROR: Unexpected error: Could not connect to MySQL instance.") try: cur = conn.cursor() cur.execute(#db stuff) conn.commit() except Exception as e: print(e) return 'Table not created!' with conn.cursor() as cur: try: cur.execute(#db stuff) conn.commit() output = cur.execute() except: output = ("Entry not inputted! Error!") print("Deleting the csv file from s3 bucket") return { 'statusCode': 200, 'body': 'Successfully uploaded!' } The code above works fine with this given test ev: { "bucket": "python-bucket", "object": "bobmarley.mp3" } However, when I try to adapt it to the s3 bucket by changing the lines of code to below as seen in this tutorial: https://www.data-stats.com/s3-data-ingestion-to-rds-through-lambda/ bucket_name = event["Records"][0]["s3"]["bucket"]["name"] s3_file_name = event["Records"][0]["s3"]["object"]["key"] I get this error: [ERROR] TypeError: list indices must be integers or slices, not str Traceback (most recent call last): File "/var/task/lambda_function.py", line 7, in lambda_handler bucket_name = event["Records"]["s3"]["bucket"]["name"]
execute a large sql file sqlite
i need to execute 178 mg sql file (bdd RED: Rich_Epinions_Dataset_anonym ) i have this error message :MemoryError i use sqlite and python here my code : import sqlite3 def import(): print("Opened database ...") conn = sqlite3.connect('F:\\PROJECT\\testDict.db') print("Opened database successfully") qry = open('F:\\PROJECT\\epinions_anonym.sql', 'r').read() sqlite3.complete_statement(qry) cursor = conn.cursor() try: cursor.executescript(qry) except Exception as e: MessageBoxW = ctypes.windll.user32.MessageBoxW errorMessage = databaseFile + ': ' + str(e) MessageBoxW(None, errorMessage, 'Error', 0) cursor.close() raise import()
AWS Lambda RDS MySQL DB Connection InterfaceError
When I try to connect to AWS RDS (MySQL), most of the time I receive an InterfaceError. When I edit the Lambda code and re-run, it will work fine the first time, but then the same error occurs. My code: import sys import logging import pymysql import json import traceback rds_host = "*****.rds.amazonaws.com" name = "*****" password = "****" db_name = "myDB" logger = logging.getLogger() logger.setLevel(logging.INFO) try: conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5) except: logger.error("ERROR: Unexpected error: Could not connect to MySql instance.") sys.exit() logger.info("SUCCESS: Connection to RDS mysql instance succeeded") def handler(event, context): sub = event['sub'] username = event['username'] givenname = event['givenname'] isAdmin = event['isAdmin'] print (sub) print (username) print (givenname) print (isAdmin) data = {} cur = conn.cursor() try: cmd = "SELECT AuthState FROM UserInfo WHERE UserName=" + "\'" + username + "\'" rowCnt = cur.execute(cmd) print (cmd) except: print("ERROR: DB Query Execution failed.") traceback.print_exc() data['errorMessage'] = 'Internal server error' response = {} response['statusCode'] = 500 response['body'] = data return response if rowCnt <= 0: print (username) data['errorMessage'] = 'No User Name Found' response = {} response['statusCode'] = 400 response['body'] = data conn.close() return response for row in cur: print row[0] if int(row[0]) == 0:#NOT_AUTHORIZED ret = "NOT_AUTHORIZED" elif int(row[0]) == 1:#PENDING ret = "PENDING" elif int(row[0]) == 2:#AUTHORIZED ret = "AUTHORIZED" else:#BLOCKED ret = "BLOCKED" data['state'] = ret response = {} response['statusCode'] = 200 response['body'] = data conn.close() return response The stacktrace: Traceback (most recent call last): File "/var/task/app.py", line 37, in handler File "/var/task/pymysql/connections.py", line 851, in query self._execute_command(COMMAND.COM_QUERY, sql) File "/var/task/pymysql/connections.py", line 1067, in _execute_command raise err.InterfaceError("(0, '')") InterfaceError: (0, '')
Read Understanding Container Reuse in Lambda. It was written about Node but is just as accurate for Python. Your code doesn't run from the top with each invocation. Sometimes it starts with the handler. Why? It's faster. How do you know when this will happen? You don't... except for each time you redeploy the function, of course, you'll always get a fresh container on the first invocation, because the old containers would have been abandoned by the redeploy. If you're going to do your DB connection outside the handler, don't call conn.close(), because on the next invocation of the function, you might find your container is still alive, and the handler is invoked with an already-closed database handle. You have to write Lambda functions so that they neither fail if a container is reused, nor fail if a container is not reused. The simpler solution is to open the DB connection inside the handler. The more complex but also more optimal solution (in terms of runtime) is to never close it, so that it can potentially be reused.
Passing variables in python to another web platform
I have a code which requires to pass the latency, upspeed, dlspeed to another web site to display. Right now the code is as below import datetime import os import sys import shutil import webbrowser import tempfile import subprocess import json import urllib.request import statistics import pymysql import pymysql.cursors IPERF3_WIN_PATH = "data/iperf3.exe" HTML_TEMPLATE_PATH = "data/template.html" IPERF3_HOST = "127.0.0.1" RESULT_UPLOAD_URL = "UPLOAD URL" RESULT_VIEW_URL = "VIEW URL" def resource_path(relative_path): """ Get absolute path to resource, works for dev and for PyInstaller This is to get a path which will work with pyinstaller """ try: # PyInstaller creates a temp folder and stores path in # _MEIPASS base_path = sys._MEIPASS except Exception: base_path = os.path.abspath(".") return os.path.join(base_path, relative_path) def ping(ip, tries): """ Ping "ip" using the windows ping commmand Return the average ping as a int """ res = 0 try: output = subprocess.check_output( ["ping", "-n", str(tries), ip]).decode("utf-8") res = int(output.split(" = ")[-1].split("ms")[0]) except subprocess.CalledProcessError: input("Press Enter to Continue...") sys.exit("Error while trying to ping the server, exiting") else: return res def copyIperf3Exec(): """ On OSX : Copy the iperf3 binary to a tmp file, make it executable and return his path This is to avoid many bundle related problems On Windows, just return the package path """ return resource_path(IPERF3_WIN_PATH) def get_iperf3_download(): """ Return the output of the iperf3 cli as a python dict """ ipf3_tmp = copyIperf3Exec() try: output = subprocess.check_output([ipf3_tmp, "-c", IPERF3_HOST, "-J", "-P", "16", "-w", "710000", "-R"]) res_string = output.decode("utf-8") except subprocess.CalledProcessError: input("Press Enter to Continue...") sys.exit("Problem while doing the test, please try again later") else: return json.loads(res_string) def get_iperf3_upload(): """ Return the output of the iperf3 cli as a python dict """ ipf3_tmp = copyIperf3Exec() try: output = subprocess.check_output([ipf3_tmp, "-c", IPERF3_HOST, "-J", "-P", "10", "-w", "710000"]) res_string = output.decode("utf-8") except subprocess.CalledProcessError: input("Press Enter to Continue...") sys.exit("Error while doing the upload test, please try again later") else: return json.loads(res_string) def get_userinfos(): """ Get the 3 informations to be presented to the user ( ip, upload speed, download speed ) Return a Dictionary """ show_start_msg(0) # 0% Progress bar avg_latency = ping(IPERF3_HOST, 5) u_json = get_iperf3_upload() show_start_msg(1) # 40% d_json = get_iperf3_download() show_start_msg(2) # 80% ip = getip_apify() u_bits_per_second = u_json['end']['sum_received']['bits_per_second'] d_bits_per_second = d_json['end']['sum_received']['bits_per_second'] u_testtime = u_json['end']['sum_received']['seconds'] d_testtime = d_json['end']['sum_received']['seconds'] u_testdate = u_json["start"]["timestamp"]["timesecs"] d_testdate = d_json["start"]["timestamp"]["timesecs"] res = { 'ip': ip, 'latency': avg_latency, 'upspeed': u_bits_per_second, 'dlspeed': d_bits_per_second, 'upspeedtime': u_testtime, 'dlspeedtime': d_testtime, 'upspeeddate': u_testdate, 'dlspeeddate': d_testdate } return res def sendToDB(infos): # Connect to the database connection = pymysql.connect(host='127.0.0.1', user='testclient', password='password', db='speed', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with connection.cursor() as cursor: # Create a new record def stp_date(stp): return datetime.datetime.fromtimestamp(stp).strftime( '%Y-%m-%d %H:%M:%S') sql = ("INSERT INTO `speedlog`" "(`externalIP`, `uploadspeed`, `uploadspeedtime`," "`uploadspeeddate`, `downloadspeed`, `downloadspeedtime`," "`downloadspeeddate`, `latency`)" "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)") cursor.execute(sql, (infos["ip"], str(int(infos["upspeed"])), str("{0:.2f}".format(infos["upspeedtime"])), stp_date(infos["upspeeddate"]), str(int(infos["dlspeed"])), str("{0:.2f}".format(infos["dlspeedtime"])), stp_date(infos["dlspeeddate"]), str(int(infos["latency"])))) # connection is not autocommit by # default. So you must commit to save # your changes. connection.commit() finally: connection.close() return def getip_apify(): res = urllib.request.urlopen("http://api.ipify.org") raw_ip = res.read() return raw_ip.decode('utf-8') def prepare_template(templatePath, infos): """ Load an html located at templatePath and replace the necessary text with the associated values from the iPerf3 infos Return a string """ f_template = open(templatePath) s_template = f_template.read() f_template.close() mod_template = s_template.replace("avglatency", str(int(infos['latency']))) mod_template = mod_template.replace( "upspeed", str("{0:.3f}".format(infos['upspeed']/(1000*1000*1000)))) mod_template = mod_template.replace( "dlspeed", str("{0:.3f}".format(infos['dlspeed']/(1000*1000*1000)))) return mod_template def str_to_tempHtml(str): """ Write "str" in an .html temporary file And return his path """ data = bytes(str, "utf-8") tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False) tmp.write(data) tmp.flush() return tmp.name def show_start_msg(progress): if sys.platform.startswith('darwin'): unused = os.system('clear') elif sys.platform.startswith('win32'): unused = os.system('cls') print("="*70) print("Speed Testing for 10G Network \n") print("Powered by iPerf3") print("="*70) if progress == -1: input("Press Enter to Continue...\n") return else: print("Press Enter to Continue...\n") print("Testing in progress") if progress == 0: print("[" + " "*68 + "]" + " 0%") elif progress == 1: print("[" + "#" * 27 + " " * 41 + "]" + " 40%") elif progress == 2: print("[" + "#" * 54 + " " * 14 + "]" + " 80%") elif progress == 3: print("[" + "#"*68 + "]" + " 100%") print("Completed") if __name__ == '__main__': show_start_msg(-1) infos = get_userinfos() sendToDB(infos) show_start_msg(3) # 100% Complete data = { "key":"Jasdkjfhsda349*lio34sdfFdslaPisdf", "download":"2048000", "upload":"2048000", "latency":"10"} req = urllib.request.Request(RESULT_UPLOAD_URL, json.dumps(data).encode( 'ascii')) req.add_header('Content-Type', 'application/json') resp = urllib.request.urlopen(req).read().decode('ascii') resp = resp.replace('\'', '"') webbrowser.open(RESULT_VIEW_URL.format(json.loads(resp)['test_id'])) input("Press Enter to Continue...") My latency, upspeed and dlspeed variables are stored as infos, and later sent over to the DB for recording via sendtoDB(infos). The next part is to also pass these sets of variables to another web using RESTful, which in the data, the first attribute "key" is the REST key for authentication, followed by the rest of the values like latency, downloadspeed and uploadspeed. However, you can see that in the data, all the 3 variables are hard-coded value instead of the values derived from the test, which is latency, upspeedand dlspeed. How can I modify the code to get these attributes instead of the hardcoded ones?
You have a method that returns this dictionary... res = { 'ip': ip, 'latency': avg_latency, 'upspeed': u_bits_per_second, 'dlspeed': d_bits_per_second, 'upspeedtime': u_testtime, 'dlspeedtime': d_testtime, 'upspeeddate': u_testdate, 'dlspeeddate': d_testdate } And it is called infos, so use it data = { "key":"xxxxxxxx", "download":infos['dlspeed'] "upload":infos['upspeed'], "latency":infos['latency']}