Return SQL result from function as array - python

Since the query returns more than 1 result, at the Get_results class how could i return the data_out as an array in order to iterate on the results of the query?
import psycopg2
import sys
class Get_results():
def db_call(self,query,dbHost,dbName,dbUser,dbPass):
try:
con = None
con = psycopg2.connect(host=dbHost, database=dbName,
user=dbUser, password=dbPass)
cur = con.cursor()
cur.execute(query)
data = cur.fetchall()
for data_out in data:
return data_out
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
finally:
if con:
con.close()
sql = " some sql "
w = Get_results()
for i in w.db_call(sql, dbHost, dbName, dbUser, dbPass):
print "The result is : " + i
For aditional info, when if i add print data right after data = cur.fetchall() i have the result:
[('The_Galaxy', 'The_Galaxy:star'),
('The_Galaxy', 'The_Galaxy:planet')]

The immediate answer is to change:
for data_out in data:
data_out result
to:
for data_out in data:
yield data_out
But you should look at using a with statement (if the DB API supports it), and simplifying the code - this could just be done by making a generator function (a class is OTT for this)

import psycopg2
import sys
class Get_results():
def db_call(self,query,dbHost,dbName,dbUser,dbPass):
try:
con = None
con = psycopg2.connect(host=dbHost, database=dbName,
user=dbUser, password=dbPass)
cur = con.cursor()
cur.execute(query)
data = cur.fetchall()
resultList = []
for data_out in data:
resultList.append(data_out[1])
return resultList
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
finally:
if con:
con.close()
sql = " some sql "
w = Get_results()
for i in w.db_call(sql, dbHost, dbName, dbUser, dbPass):
print "The result is : " + i

Related

psycopg2.connect issue after DatabaseError

After the DatabaseError is catched (try-except block) the previous data in the table is deleted. I cannot figure out this strange behaviour.
this is the code I used
def copy_from_stringio(df, table):
conn = None
try:
# read database configuration
params = config()
# connect to the PostgreSQL database
conn = psycopg2.connect(**params)
# create a new cursor
cur = conn.cursor()
# save dataframe to an in memory buffer
buffer = StringIO()
df.to_csv(buffer, index=False, header=False,sep=';')
buffer.seek(0)
#cursor = conn.cursor()
cur.copy_from(buffer, table, sep=";")
conn.commit()
print("copy_from_stringio() done")
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()

Python calling one function form another

I have written a little python script to get files in a directory, get a hash and then write them to a table.
The first part, getting the files and calculating the hash was easy. But now I added the function (write_record) to store the filename, log date and hash to a database. But I am struggling how to call it form the get_files function an write a record for each file in the directory
from datetime import datetime
from os import scandir
import os
import hashlib
import psycopg2
BLOCKSIZE = 65536
hasher = hashlib.sha256()
basepath = '.'
def convert_date(timestamp):
d = datetime.utcfromtimestamp(timestamp)
formated_date = d.strftime('%d%m%Y%H%M%S')
return formated_date
def get_hash(entry):
with open(entry, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
# print(hasher.hexdigest())
def get_files():
dir_entries = scandir('.')
for entry in dir_entries:
if entry.is_file():
info = entry.stat()
print(' %s %s %s' % (entry.name, convert_date(info.st_mtime),hasher.hexdigest()))
log_filename = entry.name
log_hashvalue = hasher.hexdigest()
log_date = convert_date(info.st_mtime)
return log_filename,log_hashvalue,log_date
# write_record()
def write_record():
log_filename,log_hashvalue,log_date = get_files()
try:
print(log_filename,log_hashvalue,log_date)
connection = psycopg2.connect(user="postgres",password="xxxxxxxx",host="xxx.xxx.xxx.xxx",port="5432",database="evidence_logging")
cursor = connection.cursor()
postgres_insert_query = """ INSERT INTO logfiles (log_name,log_date,log_hashvalue) VALUES (%s,%s,%s)"""
record_to_insert = (log_filename,log_date,log_hashvalue)
print(postgres_insert_query, record_to_insert)
cursor.execute(postgres_insert_query, record_to_insert)
connection.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into logfiles table")
except (Exception, psycopg2.Error) as error :
if(connection):
print("Failed to insert record into logfiles table", error)
finally:
#closing database connection.
if(connection):
cursor.close()
connection.close()
print("PostgreSQL connection is closed")
write_record()
Thanks in advance
Regards
Georg
In your code you are calling write_record() method this will insert only one file beacause get_files() method will return the first file not all the files.
first you need to call get_files() method instead of returning in this method you should call write_record() method with the values you are returning from get_files().
And do not close the connection after insertion of every record close the connection after insertion of all the records.
try this
from datetime import datetime
from os import scandir
import os
import hashlib
import psycopg2
BLOCKSIZE = 65536
hasher = hashlib.sha256()
basepath = '.'
connection = None
def convert_date(timestamp):
d = datetime.utcfromtimestamp(timestamp)
formated_date = d.strftime('%d%m%Y%H%M%S')
return formated_date
def get_hash(entry):
with open(entry, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
# print(hasher.hexdigest())
def get_files():
dir_entries = scandir('.')
for entry in dir_entries:
if entry.is_file():
info = entry.stat()
print(' %s %s %s' % (entry.name, convert_date(info.st_mtime),hasher.hexdigest()))
log_filename = entry.name
log_hashvalue = hasher.hexdigest()
log_date = convert_date(info.st_mtime)
write_record(log_filename,log_hashvalue,log_date)
#close the connection after writing all records
close_connection()
def write_record(log_filename,log_hashvalue,log_date):
try:
print(log_filename,log_hashvalue,log_date)
connection = psycopg2.connect(user="postgres",password="xxxxxxxx",host="xxx.xxx.xxx.xxx",port="5432",database="evidence_logging")
cursor = connection.cursor()
postgres_insert_query = """ INSERT INTO logfiles (log_name,log_date,log_hashvalue) VALUES (%s,%s,%s)"""
record_to_insert = (log_filename,log_date,log_hashvalue)
print(postgres_insert_query, record_to_insert)
cursor.execute(postgres_insert_query, record_to_insert)
connection.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into logfiles table")
except (Exception, psycopg2.Error) as error :
if(connection):
print("Failed to insert record into logfiles table", error)
finally:
cursor.close()
def close_connection():
if(connection):
connection.close()
print("PostgreSQL connection is closed")
get_files()

How to read a CSV file from s3 and write the content in RDS database table using python lambda function?

I have a CSV file Employee.csv in the S3 bucket with all info about employee: name, age, salary, designation.
I have to write a python lambda function to read this file and write in RDS db such as it should create a table as Employee, with columns name, age, salary, designation and rows will have the data.
The Employee.csv is just for example, actually it can be any csv file with any number of columns in it.
from __future__ import print_function
import boto3
import logging
import os
import sys
import uuid
import pymysql
import csv
import rds_config
rds_host = rds_config.rds_host
name = rds_config.db_username
password = rds_config.db_password
db_name = rds_config.db_name
logger = logging.getLogger()
logger.setLevel(logging.INFO)
try:
conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5)
except Exception as e:
logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
logger.error(e)
sys.exit()
logger.info("SUCCESS: Connection to RDS mysql instance succeeded")
s3_client = boto3.client('s3')
def handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
s3_client.download_file(bucket, key,download_path)
csv_data = csv.reader(file( download_path))
with conn.cursor() as cur:
for idx, row in enumerate(csv_data):
logger.info(row)
try:
cur.execute('INSERT INTO target_table(name, age, salary, designation)' \
'VALUES("%s", "%s", "%s", "%s")'
, row)
except Exception as e:
logger.error(e)
if idx % 100 == 0:
conn.commit()
conn.commit()
return 'File loaded into RDS:' + str(download_path)
Here is the code which is working for me now:
s3 = boto3.resource('s3')
file_object=event['Records'][0]
key=str(file_object['s3']['object']['key'])
obj = s3.Object(bucket, key)
content_lines=obj.get()['Body'].read().decode('utf-8').splitlines(True)
tableName= key.strip('folder/').strip('.csv')
with conn.cursor() as cur:
try:
cur.execute('TRUNCATE TABLE '+tableName)
except Exception as e:
print("ERROR: Unexpected error:Table does not exit.")
sys.exit()
header=True
for row in csv.reader(content_lines):
if(header):
numberOfColumns=len(row)
columnNames= str(row).replace('[','').replace(']','').replace("'",'')
print("columnNames:"+columnNames)
values='%s'
numberOfValues=len(values)
numberOfValues=1
while numberOfValues< numberOfColumns:
values=values+",%s"
numberOfValues+=1
print("INSERT into "+tableName+"("+columnNames+") VALUES("+values+")")
header=False
else:
try:
cur.execute('INSERT into '+tableName+'('+columnNames+') VALUES('+values+')', row)
except Exception as e:
raise e
conn.commit()

execute a large sql file sqlite

i need to execute 178 mg sql file (bdd RED: Rich_Epinions_Dataset_anonym )
i have this error message :MemoryError
i use sqlite and python here my code :
import sqlite3
def import():
print("Opened database ...")
conn = sqlite3.connect('F:\\PROJECT\\testDict.db')
print("Opened database successfully")
qry = open('F:\\PROJECT\\epinions_anonym.sql', 'r').read()
sqlite3.complete_statement(qry)
cursor = conn.cursor()
try:
cursor.executescript(qry)
except Exception as e:
MessageBoxW = ctypes.windll.user32.MessageBoxW
errorMessage = databaseFile + ': ' + str(e)
MessageBoxW(None, errorMessage, 'Error', 0)
cursor.close()
raise
import()

Storing and retrieving zip files in SQLite gives "Could not decode to UTF-8"

I have a table in SQLite 3 as follows and I am planning on using it to store a variety of files: txt, pdf, images and zip files.
CREATE TABLE zip (filename TEXT PRIMARYKEY NOT NULL, zipfile BLOB NOT NULL);
To store and retrieve I am experimenting with the following python code
#!env/bin/python
import sqlite3 as lite
import os
import sys
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Test/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])
When I test this on files like .txt, .py, .pdf etc., it works fine.
With Zip files, there is no error while storing into the table but an error while retrieving the file:
Could not decode to UTF-8 column 'zipfile' with text 'PK '
There seems to be some encoding or decoding issue.
I basically tried using the code from one of the questions
Insert binary file in SQLite database with Python
.
It worked originally for the pdf, png, jpg files. But I was still getting the error for Zip files. When I commented out the insertion and just ran the retrieval code it worked. Now the code below works.
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Downloads/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])

Categories