Snowflake python-connector; Error 604 when issuing multiple requests - python

I have an Azure Function that sends queries to snowflake using the Python snowflake-connector. It opens a connection, creates a cursor, sends the query and does not check to ensure the query was successful using _no_results=True. When I run it it works fine. However when I use it to run multiple queries at once, some queries are randomly failing with status code 604: Query Execution was canceled. Is there some sort of concurrent limit that I'm hitting? I cannot find any information in the documentation. The queries being sent are very simple (truncate table x) and are not timing out.
My code is attached below.
import logging
import json
import time
import gc
from flatten_json import flatten
import os
import snowflake.connector
import azure.functions as func
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
# Deserialize request body
req_body = req.get_json()
logging.info('Deserialized input successfully. Request body: ' + json.dumps(req_body))
#Create result JSON to be returned to caller
result = {
"TaskName":req_body['TaskName'],
"Status":"Complete",
"TaskKey": req_body['TaskKey'],
"Query_ID":"",
"Session_ID":""
}
#Create the Snowflake parameters for connection
USER = <sfusername>
PASSWD = <sfpw>
ACCOUNT = <sfAcc>
WAREHOUSE = <sfwh>
DATABASE = <sfdb>
logging.info('Connection string created')
copy_sql_statement = create_sql_statement(req_body)
logging.info('Insert SQL Statement: ' + copy_sql_statement)
logging.info('Attempting to Connect to Snowflake...')
try:
# Try to connect to Snowflake
connection = snowflake.connector.connect(user=USER, password=PASSWD, account=ACCOUNT, warehouse=WAREHOUSE, database=DATABASE)
logging.info('Connection Successful')
except Exception as e:
raise e
logging.info('Try block for query_snowflake started.')
try:
# Call function to execute copy into
output_list = query_snowflake(req_body, connection, copy_sql_statement) #return queryid and sessionid from Snowflake
queryid = output_list[0]
sessionid = output_list[1]
result['Query_ID']= queryid
result['Session_ID']= sessionid
logging.info('Query sent to Snowflake Successfully.')
return func.HttpResponse(json.dumps(result), status_code = 200)
except Exception as e:
result['Status'] = 'Failed'
result['Error_Message'] = str(e)
logging.info('Copy Into function failed. Error: ' + str(e))
return func.HttpResponse(json.dumps(result),
status_code=400)
def create_sql_statement(req_body):
# Replace TaskKey and CDCMin
copy_sql_statement = req_body['InsertSQL'].replace('#TaskKey', req_body['TaskKey']).replace('#CDCMinDate', req_body['CDCMinDate']).replace('#CDCMaxDate', req_body['CDCMaxDate'])
return copy_sql_statement
def query_snowflake(req_body, connection, copy_sql_statement):
try:
# Execute copy into statement
cur = connection.cursor()
sessionid = cur.execute("select current_session()").fetchone()
cur.execute(copy_sql_statement, _no_results=True)
#connection.execute('COMMIT;')
return [cur.sfqid, sessionid[0]] #return queryid and sessionid as list for result body
except Exception as e:
raise e
#finally:
# Close and dispose connection
cur.close()
connection.close()
NEW CODE:
import logging
import json
import time
import gc
from flatten_json import flatten
import os
import snowflake.connector
import azure.functions as func
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
# Deserialize request body
req_body = req.get_json()
logging.info('Deserialized input successfully. Request body: ' + json.dumps(req_body))
#Create result JSON to be returned to caller
result = {
"TaskName":req_body['TaskName'],
"Status":"Complete",
"TaskKey": req_body['TaskKey'],
"Query_ID":"",
"Session_ID":"",
"Error_Message":""
}
#Create the Snowflake parameters for connection
USER = <sfusername>
PASSWD = <sfpw>
ACCOUNT = <sfAcc>
WAREHOUSE = <sfwh>
DATABASE = <sfdb>
logging.info('Connection string created')
copy_sql_statement = create_sql_statement(req_body)
logging.info('SQL Statement: ' + copy_sql_statement)
logging.info('Attempting to Connect to Snowflake...')
try:
# Try to connect to Snowflake
connection = snowflake.connector.connect(user=USER, password=PASSWD, account=ACCOUNT, warehouse=WAREHOUSE, database=DATABASE)
logging.info('Connection Successful')
except Exception as e:
raise e
logging.info('Try block for send query started.')
try:
# Call function to execute copy into
logging.info('Sending Query to Snowflake...')
output_list = query_snowflake(req_body, connection, copy_sql_statement) #return queryid and sessionid from Snowflake
queryid = output_list[0]
sessionid = output_list[1]
result['Query_ID']= queryid
result['Session_ID']= sessionid
logging.info('Ensuring Query was Sent...')
status_stmt = create_status_statement(queryid, sessionid)
for x in range(1,14): #it will try for 3.5min in case query is pending
time.sleep(5)
returnValues = get_query_status(status_stmt, connection)
# check result, if error code 604 we know the query canceled.
if returnValues[1] == '604':
result['Status'] = 'Failed'
result['Error_Message'] = 'SQL Execution Canceled'
return func.HttpResponse(json.dumps(result), status_code = 400)
# if its anything but pending, we know the query was sent to snowflake
# 2nd Function worries about the result
elif returnValues[0] != 'PENDING':
result['Status'] = returnValues[0]
logging.info('Query sent to Snowflake Successfully.')
return func.HttpResponse(json.dumps(result), status_code = 200)
else:
logging.info('Loop ' + str(x) + ' completed, trying again...')
time.sleep(10)
#if it exits for loop, mark success, let 2nd function surface any failures.
result['Status'] = 'Success'
return func.HttpResponse(json.dumps(result), status_code = 200)
except Exception as e:
result['Status'] = 'Failed'
result['Error_Message'] = str(e)
logging.info('Copy Into function failed. Error: ' + str(e))
return func.HttpResponse(json.dumps(result),
status_code=400)
def create_sql_statement(req_body):
# Replace TaskKey and CDCMin
copy_sql_statement = req_body['InsertSQL'].replace('#TaskKey', req_body['TaskKey']).replace('#CDCMinDate', req_body['CDCMinDate']).replace('#CDCMaxDate', req_body['CDCMaxDate'])
return copy_sql_statement
def query_snowflake(req_body, connection, copy_sql_statement):
try:
# Execute copy into statement
cur = connection.cursor()
sessionid = cur.execute("select current_session()").fetchone()
cur.execute(copy_sql_statement, _no_results=True)
# return queryid and sessionid as list for result body
return [cur.sfqid, sessionid[0]]
except Exception as e:
raise e
def create_status_statement(queryid, sessionid):
sql_statement = "SELECT execution_status, error_code, query_id \
FROM TABLE(INFORMATION_SCHEMA.QUERY_HISTORY_BY_SESSION(SESSION_ID => " + sessionid + ")) \
WHERE QUERY_ID = '" + queryid + "'"
return sql_statement
def get_query_status(etl_sql_statement, conn):
QueryStatus = ''
ErrorCode = ''
QueryID = ''
try:
#Execute sql statement.
cur = conn.cursor()
Result = cur.execute(etl_sql_statement)
row = Result.fetchone()
if row is None:
ErrorCode = 'PENDING'
else:
QueryStatus = str(row[0])
ErrorCode = str(row[1])
QueryID = str(row[2])
except Exception as e:
logging.info('Failed to get query status. Error: ' + str(e))
raise e
finally:
#Close and dispose cursor
cur.close()
return (QueryStatus, ErrorCode, QueryID)

Related

Boto3 throws exception when uploading file but the file is saved in S3

I need to upload files using boto3 with Flask. I have the following method to upload the files and I want to return the path of the file within S3.
utils.py
import io
import boto3
from app import app
s3 = boto3.client(
"s3",
aws_access_key_id = app.config['S3_KEY'],
aws_secret_access_key = app.config['S3_SECRET']
)
def upload_file(file, bucket_name, acl=app.config['AWS_DEFAULT_ACL']):
try:
s3.upload_fileobj(
file,
bucket_name,
f"Profile_Photos/{file.filename}",
ExtraArgs = {
"ACL": 'private',
"ContentType": file.content_type
}
)
return "{}{}".format(app.config['S3_LOCATION', file.filename])
except Exception as e:
print("Something was wrong: ", e) # This exception is thrown
return e
Inside the main class I have the following:
main.py
#app.route('/registro', methods=['POST'])
def register():
conn = None
cursor = None
try:
username= request.form.get('user', None)
password = request.form.get('password', None)
if username and password:
hashed_password = hashlib.md5(password.encode()).hexdigest()
sql = "INSERT INTO Users(username, password) VALUES (%s, %s)"
data = (username, hashed_password)
conn = mySQL.connect()
cursor = conn.cursor()
cursor.execute(sql, data)
conn.commit()
if 'current_photo' in request.files:
file = request.files['current_photo']
if file.filename != '':
file_name = os.path.splitext(file.filename)[0]
extension = file.filename.split('.')[-1]
new_name = "{}_{}.{}".format(file_name, username, extension)
file.filename = new_name
file.filename = secure_filename(file.filename)
print("Before")
path = upload_file(file, app.config['S3_BUCKET']) # Error occurs here
print("After")
res = jsonify('User created.')
res.status_code = 200
return res
except Exception as e:
print(e)
return 'Error'
if __name__ == '__main__':
app.run()
The problem is that when executing the code, an exception is always thrown in the upload_file method, however the photo is uploaded to S3. The message doesn't seem very informative:
Something was wrong ('S3_LOCATION', 'profile_user2.jpg').
What does that message mean and why is the exception being thrown?

unable to use s3 trigger to transfer s3 objects to rds

I have the lambda function code below that transfers objects from s3 buckets to AWS RDS database.
import json
import boto3
import pymysql
s3_client = boto3.client('s3')
def lambda_handler(event, context):
bucket_name = event["bucket"]
s3_file_name = event["object"]
resp = s3_client.get_object(Bucket=bucket_name, Key=s3_file_name)
data = resp['Body']
rds_endpoint = ""
username = #username for RDS Mysql
password = # RDS Mysql password
db_name = # RDS MySQL DB name
conn = None
try:
conn = pymysql.connect(host=rds_endpoint, user=username, password=password, database=db_name)
except pymysql.MySQLError as e:
print("ERROR: Unexpected error: Could not connect to MySQL instance.")
try:
cur = conn.cursor()
cur.execute(#db stuff)
conn.commit()
except Exception as e:
print(e)
return 'Table not created!'
with conn.cursor() as cur:
try:
cur.execute(#db stuff)
conn.commit()
output = cur.execute()
except:
output = ("Entry not inputted! Error!")
print("Deleting the csv file from s3 bucket")
return {
'statusCode': 200,
'body': 'Successfully uploaded!'
}
The code above works fine with this given test ev:
{
"bucket": "python-bucket",
"object": "bobmarley.mp3"
}
However, when I try to adapt it to the s3 bucket by changing the lines of code to below as seen in this tutorial: https://www.data-stats.com/s3-data-ingestion-to-rds-through-lambda/
bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
s3_file_name = event["Records"][0]["s3"]["object"]["key"]
I get this error:
[ERROR] TypeError: list indices must be integers or slices, not str
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 7, in lambda_handler
bucket_name = event["Records"]["s3"]["bucket"]["name"]

execute a large sql file sqlite

i need to execute 178 mg sql file (bdd RED: Rich_Epinions_Dataset_anonym )
i have this error message :MemoryError
i use sqlite and python here my code :
import sqlite3
def import():
print("Opened database ...")
conn = sqlite3.connect('F:\\PROJECT\\testDict.db')
print("Opened database successfully")
qry = open('F:\\PROJECT\\epinions_anonym.sql', 'r').read()
sqlite3.complete_statement(qry)
cursor = conn.cursor()
try:
cursor.executescript(qry)
except Exception as e:
MessageBoxW = ctypes.windll.user32.MessageBoxW
errorMessage = databaseFile + ': ' + str(e)
MessageBoxW(None, errorMessage, 'Error', 0)
cursor.close()
raise
import()

AWS Lambda RDS MySQL DB Connection InterfaceError

When I try to connect to AWS RDS (MySQL), most of the time I receive an InterfaceError. When I edit the Lambda code and re-run, it will work fine the first time, but then the same error occurs.
My code:
import sys
import logging
import pymysql
import json
import traceback
rds_host = "*****.rds.amazonaws.com"
name = "*****"
password = "****"
db_name = "myDB"
logger = logging.getLogger()
logger.setLevel(logging.INFO)
try:
conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5)
except:
logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
sys.exit()
logger.info("SUCCESS: Connection to RDS mysql instance succeeded")
def handler(event, context):
sub = event['sub']
username = event['username']
givenname = event['givenname']
isAdmin = event['isAdmin']
print (sub)
print (username)
print (givenname)
print (isAdmin)
data = {}
cur = conn.cursor()
try:
cmd = "SELECT AuthState FROM UserInfo WHERE UserName=" + "\'" + username + "\'"
rowCnt = cur.execute(cmd)
print (cmd)
except:
print("ERROR: DB Query Execution failed.")
traceback.print_exc()
data['errorMessage'] = 'Internal server error'
response = {}
response['statusCode'] = 500
response['body'] = data
return response
if rowCnt <= 0:
print (username)
data['errorMessage'] = 'No User Name Found'
response = {}
response['statusCode'] = 400
response['body'] = data
conn.close()
return response
for row in cur:
print row[0]
if int(row[0]) == 0:#NOT_AUTHORIZED
ret = "NOT_AUTHORIZED"
elif int(row[0]) == 1:#PENDING
ret = "PENDING"
elif int(row[0]) == 2:#AUTHORIZED
ret = "AUTHORIZED"
else:#BLOCKED
ret = "BLOCKED"
data['state'] = ret
response = {}
response['statusCode'] = 200
response['body'] = data
conn.close()
return response
The stacktrace:
Traceback (most recent call last):
File "/var/task/app.py", line 37, in handler
File "/var/task/pymysql/connections.py", line 851, in query
self._execute_command(COMMAND.COM_QUERY, sql)
File "/var/task/pymysql/connections.py", line 1067, in _execute_command
raise err.InterfaceError("(0, '')")
InterfaceError: (0, '')
Read Understanding Container Reuse in Lambda.
It was written about Node but is just as accurate for Python.
Your code doesn't run from the top with each invocation. Sometimes it starts with the handler.
Why? It's faster.
How do you know when this will happen? You don't... except for each time you redeploy the function, of course, you'll always get a fresh container on the first invocation, because the old containers would have been abandoned by the redeploy.
If you're going to do your DB connection outside the handler, don't call conn.close(), because on the next invocation of the function, you might find your container is still alive, and the handler is invoked with an already-closed database handle.
You have to write Lambda functions so that they neither fail if a container is reused, nor fail if a container is not reused.
The simpler solution is to open the DB connection inside the handler. The more complex but also more optimal solution (in terms of runtime) is to never close it, so that it can potentially be reused.

Passing variables in python to another web platform

I have a code which requires to pass the latency, upspeed, dlspeed to another web site to display. Right now the code is as below
import datetime
import os
import sys
import shutil
import webbrowser
import tempfile
import subprocess
import json
import urllib.request
import statistics
import pymysql
import pymysql.cursors
IPERF3_WIN_PATH = "data/iperf3.exe"
HTML_TEMPLATE_PATH = "data/template.html"
IPERF3_HOST = "127.0.0.1"
RESULT_UPLOAD_URL = "UPLOAD URL"
RESULT_VIEW_URL = "VIEW URL"
def resource_path(relative_path):
""" Get absolute path to resource, works for dev and for PyInstaller
This is to get a path which will work with pyinstaller
"""
try:
# PyInstaller creates a temp folder and stores path in
# _MEIPASS
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
def ping(ip, tries):
""" Ping "ip" using the windows ping commmand
Return the average ping as a int
"""
res = 0
try:
output = subprocess.check_output(
["ping", "-n", str(tries), ip]).decode("utf-8")
res = int(output.split(" = ")[-1].split("ms")[0])
except subprocess.CalledProcessError:
input("Press Enter to Continue...")
sys.exit("Error while trying to ping the server, exiting")
else:
return res
def copyIperf3Exec():
""" On OSX :
Copy the iperf3 binary to a tmp file,
make it executable and return his path
This is to avoid many bundle related problems
On Windows, just return the package path """
return resource_path(IPERF3_WIN_PATH)
def get_iperf3_download():
""" Return the output of the iperf3 cli as a python dict """
ipf3_tmp = copyIperf3Exec()
try:
output = subprocess.check_output([ipf3_tmp,
"-c", IPERF3_HOST,
"-J",
"-P", "16",
"-w", "710000",
"-R"])
res_string = output.decode("utf-8")
except subprocess.CalledProcessError:
input("Press Enter to Continue...")
sys.exit("Problem while doing the test, please try again later")
else:
return json.loads(res_string)
def get_iperf3_upload():
""" Return the output of the iperf3 cli as a python dict """
ipf3_tmp = copyIperf3Exec()
try:
output = subprocess.check_output([ipf3_tmp,
"-c", IPERF3_HOST,
"-J",
"-P", "10",
"-w", "710000"])
res_string = output.decode("utf-8")
except subprocess.CalledProcessError:
input("Press Enter to Continue...")
sys.exit("Error while doing the upload test, please try again later")
else:
return json.loads(res_string)
def get_userinfos():
""" Get the 3 informations to be presented to the user
( ip, upload speed, download speed )
Return a Dictionary
"""
show_start_msg(0) # 0% Progress bar
avg_latency = ping(IPERF3_HOST, 5)
u_json = get_iperf3_upload()
show_start_msg(1) # 40%
d_json = get_iperf3_download()
show_start_msg(2) # 80%
ip = getip_apify()
u_bits_per_second = u_json['end']['sum_received']['bits_per_second']
d_bits_per_second = d_json['end']['sum_received']['bits_per_second']
u_testtime = u_json['end']['sum_received']['seconds']
d_testtime = d_json['end']['sum_received']['seconds']
u_testdate = u_json["start"]["timestamp"]["timesecs"]
d_testdate = d_json["start"]["timestamp"]["timesecs"]
res = {
'ip': ip,
'latency': avg_latency,
'upspeed': u_bits_per_second,
'dlspeed': d_bits_per_second,
'upspeedtime': u_testtime,
'dlspeedtime': d_testtime,
'upspeeddate': u_testdate,
'dlspeeddate': d_testdate
}
return res
def sendToDB(infos):
# Connect to the database
connection = pymysql.connect(host='127.0.0.1',
user='testclient',
password='password',
db='speed',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
# Create a new record
def stp_date(stp):
return datetime.datetime.fromtimestamp(stp).strftime(
'%Y-%m-%d %H:%M:%S')
sql = ("INSERT INTO `speedlog`"
"(`externalIP`, `uploadspeed`, `uploadspeedtime`,"
"`uploadspeeddate`, `downloadspeed`, `downloadspeedtime`,"
"`downloadspeeddate`, `latency`)"
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
cursor.execute(sql,
(infos["ip"],
str(int(infos["upspeed"])),
str("{0:.2f}".format(infos["upspeedtime"])),
stp_date(infos["upspeeddate"]),
str(int(infos["dlspeed"])),
str("{0:.2f}".format(infos["dlspeedtime"])),
stp_date(infos["dlspeeddate"]),
str(int(infos["latency"]))))
# connection is not autocommit by
# default. So you must commit to save
# your changes.
connection.commit()
finally:
connection.close()
return
def getip_apify():
res = urllib.request.urlopen("http://api.ipify.org")
raw_ip = res.read()
return raw_ip.decode('utf-8')
def prepare_template(templatePath, infos):
""" Load an html located at templatePath and replace the necessary text
with the associated values from the iPerf3 infos
Return a string
"""
f_template = open(templatePath)
s_template = f_template.read()
f_template.close()
mod_template = s_template.replace("avglatency", str(int(infos['latency'])))
mod_template = mod_template.replace(
"upspeed", str("{0:.3f}".format(infos['upspeed']/(1000*1000*1000))))
mod_template = mod_template.replace(
"dlspeed", str("{0:.3f}".format(infos['dlspeed']/(1000*1000*1000))))
return mod_template
def str_to_tempHtml(str):
""" Write "str" in an .html temporary file
And return his path
"""
data = bytes(str, "utf-8")
tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
tmp.write(data)
tmp.flush()
return tmp.name
def show_start_msg(progress):
if sys.platform.startswith('darwin'):
unused = os.system('clear')
elif sys.platform.startswith('win32'):
unused = os.system('cls')
print("="*70)
print("Speed Testing for 10G Network \n")
print("Powered by iPerf3")
print("="*70)
if progress == -1:
input("Press Enter to Continue...\n")
return
else:
print("Press Enter to Continue...\n")
print("Testing in progress")
if progress == 0:
print("[" + " "*68 + "]" + " 0%")
elif progress == 1:
print("[" + "#" * 27 + " " * 41 + "]" + " 40%")
elif progress == 2:
print("[" + "#" * 54 + " " * 14 + "]" + " 80%")
elif progress == 3:
print("[" + "#"*68 + "]" + " 100%")
print("Completed")
if __name__ == '__main__':
show_start_msg(-1)
infos = get_userinfos()
sendToDB(infos)
show_start_msg(3) # 100% Complete
data = { "key":"Jasdkjfhsda349*lio34sdfFdslaPisdf",
"download":"2048000",
"upload":"2048000",
"latency":"10"}
req = urllib.request.Request(RESULT_UPLOAD_URL, json.dumps(data).encode(
'ascii'))
req.add_header('Content-Type', 'application/json')
resp = urllib.request.urlopen(req).read().decode('ascii')
resp = resp.replace('\'', '"')
webbrowser.open(RESULT_VIEW_URL.format(json.loads(resp)['test_id']))
input("Press Enter to Continue...")
My latency, upspeed and dlspeed variables are stored as infos, and later sent over to the DB for recording via sendtoDB(infos).
The next part is to also pass these sets of variables to another web using RESTful, which in the data, the first attribute "key" is the REST key for authentication, followed by the rest of the values like latency, downloadspeed and uploadspeed. However, you can see that in the data, all the 3 variables are hard-coded value instead of the values derived from the test, which is latency, upspeedand dlspeed.
How can I modify the code to get these attributes instead of the hardcoded ones?
You have a method that returns this dictionary...
res = {
'ip': ip,
'latency': avg_latency,
'upspeed': u_bits_per_second,
'dlspeed': d_bits_per_second,
'upspeedtime': u_testtime,
'dlspeedtime': d_testtime,
'upspeeddate': u_testdate,
'dlspeeddate': d_testdate
}
And it is called infos, so use it
data = { "key":"xxxxxxxx",
"download":infos['dlspeed']
"upload":infos['upspeed'],
"latency":infos['latency']}

Categories