Python calling one function form another - python

I have written a little python script to get files in a directory, get a hash and then write them to a table.
The first part, getting the files and calculating the hash was easy. But now I added the function (write_record) to store the filename, log date and hash to a database. But I am struggling how to call it form the get_files function an write a record for each file in the directory
from datetime import datetime
from os import scandir
import os
import hashlib
import psycopg2
BLOCKSIZE = 65536
hasher = hashlib.sha256()
basepath = '.'
def convert_date(timestamp):
d = datetime.utcfromtimestamp(timestamp)
formated_date = d.strftime('%d%m%Y%H%M%S')
return formated_date
def get_hash(entry):
with open(entry, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
# print(hasher.hexdigest())
def get_files():
dir_entries = scandir('.')
for entry in dir_entries:
if entry.is_file():
info = entry.stat()
print(' %s %s %s' % (entry.name, convert_date(info.st_mtime),hasher.hexdigest()))
log_filename = entry.name
log_hashvalue = hasher.hexdigest()
log_date = convert_date(info.st_mtime)
return log_filename,log_hashvalue,log_date
# write_record()
def write_record():
log_filename,log_hashvalue,log_date = get_files()
try:
print(log_filename,log_hashvalue,log_date)
connection = psycopg2.connect(user="postgres",password="xxxxxxxx",host="xxx.xxx.xxx.xxx",port="5432",database="evidence_logging")
cursor = connection.cursor()
postgres_insert_query = """ INSERT INTO logfiles (log_name,log_date,log_hashvalue) VALUES (%s,%s,%s)"""
record_to_insert = (log_filename,log_date,log_hashvalue)
print(postgres_insert_query, record_to_insert)
cursor.execute(postgres_insert_query, record_to_insert)
connection.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into logfiles table")
except (Exception, psycopg2.Error) as error :
if(connection):
print("Failed to insert record into logfiles table", error)
finally:
#closing database connection.
if(connection):
cursor.close()
connection.close()
print("PostgreSQL connection is closed")
write_record()
Thanks in advance
Regards
Georg

In your code you are calling write_record() method this will insert only one file beacause get_files() method will return the first file not all the files.
first you need to call get_files() method instead of returning in this method you should call write_record() method with the values you are returning from get_files().
And do not close the connection after insertion of every record close the connection after insertion of all the records.
try this
from datetime import datetime
from os import scandir
import os
import hashlib
import psycopg2
BLOCKSIZE = 65536
hasher = hashlib.sha256()
basepath = '.'
connection = None
def convert_date(timestamp):
d = datetime.utcfromtimestamp(timestamp)
formated_date = d.strftime('%d%m%Y%H%M%S')
return formated_date
def get_hash(entry):
with open(entry, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
# print(hasher.hexdigest())
def get_files():
dir_entries = scandir('.')
for entry in dir_entries:
if entry.is_file():
info = entry.stat()
print(' %s %s %s' % (entry.name, convert_date(info.st_mtime),hasher.hexdigest()))
log_filename = entry.name
log_hashvalue = hasher.hexdigest()
log_date = convert_date(info.st_mtime)
write_record(log_filename,log_hashvalue,log_date)
#close the connection after writing all records
close_connection()
def write_record(log_filename,log_hashvalue,log_date):
try:
print(log_filename,log_hashvalue,log_date)
connection = psycopg2.connect(user="postgres",password="xxxxxxxx",host="xxx.xxx.xxx.xxx",port="5432",database="evidence_logging")
cursor = connection.cursor()
postgres_insert_query = """ INSERT INTO logfiles (log_name,log_date,log_hashvalue) VALUES (%s,%s,%s)"""
record_to_insert = (log_filename,log_date,log_hashvalue)
print(postgres_insert_query, record_to_insert)
cursor.execute(postgres_insert_query, record_to_insert)
connection.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into logfiles table")
except (Exception, psycopg2.Error) as error :
if(connection):
print("Failed to insert record into logfiles table", error)
finally:
cursor.close()
def close_connection():
if(connection):
connection.close()
print("PostgreSQL connection is closed")
get_files()

Related

Mysql / Maridb Python Connector is not loading data into table

# Module Imports
import mariadb
import sys
import csv
from pathlib import Path
def connect_to_mariaDB(databse, user, passwd):
# Connect to MariaDB Platform
try: conn = mariadb.connect(
user=user,
password=passwd,
host="localhost",
port=3306,
database=databse
)
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)
return conn
def check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur = conn.cursor()
cur.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{database}';")
for(table_name) in cur:
if table_name[0] == tableName:
if overwrite == "YES":
print("table exists - DROP TABLE")
cur.execute(f"DROP TABLE {tableName}")
return True
else:
return False
return True
def import_file_into_db_table_(
filename, database, user, passwd, tableName,
create_table_statement = "", overwrite = False):
conn = connect_to_mariaDB(database, user, passwd)
cur = conn.cursor()
if conn != None:
print(f"Connection successful to database {database}")
if check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur.execute(create_table_statement)
print("table is created")
path = f"{Path().absolute()}\\{filename}".replace("\\","/")
print(path)
load_data_statement = f"""LOAD DATA INFILE '{path}'
INTO TABLE {tableName}
FIELDS TERMINATED BY ';'
OPTIONALLY ENCLOSED BY '\"'
LINES TERMINATED BY '\\n'
IGNORE 1 LINES
"""
print(load_data_statement)
cur.execute(load_data_statement)
print("load data into table - successful")
else:
print("table exists - no permission to overwrite")
cur.execute("SELECT * FROM student_mat;")
for da in cur:
print(da)
# variables
filename = "student-mat.csv"
database = "dbs2021"
tableName = "student_mat"
# load the create_table_statement
create_table_statement = ""
path = f"{Path().absolute()}\\create_table_statement.txt"
with open(path, newline='') as file:
spamreader = csv.reader(file, delimiter='\n', quotechar='|')
for row in spamreader:
create_table_statement += row[0]
parameters_length = len(sys.argv)
if parameters_length == 3:
user, passwd = sys.argv[1], sys.argv[2]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, "YES")
elif parameters_length == 4:
user, passwd, overwrite = sys.argv[1], sys.argv[2], sys.argv[3]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, overwrite)
else:
print("wrong parameters\nTry -user -passwd or additional -overwrite")
The code checks if there is a table with the same name in the db and then potentially drops it, creates a new table and loads the data of the csv file into the table.
When executing the code it seems like everything is working but when going in the mariadb command prompt the created table is empty even though when outputting the table in the code it is filled.
By default MariaDB Connector/Python doesn't use autocommit mode.
You need either set autocommit=True when establishing the connection or you have to commit your changes with conn.commit().

How to read a CSV file from s3 and write the content in RDS database table using python lambda function?

I have a CSV file Employee.csv in the S3 bucket with all info about employee: name, age, salary, designation.
I have to write a python lambda function to read this file and write in RDS db such as it should create a table as Employee, with columns name, age, salary, designation and rows will have the data.
The Employee.csv is just for example, actually it can be any csv file with any number of columns in it.
from __future__ import print_function
import boto3
import logging
import os
import sys
import uuid
import pymysql
import csv
import rds_config
rds_host = rds_config.rds_host
name = rds_config.db_username
password = rds_config.db_password
db_name = rds_config.db_name
logger = logging.getLogger()
logger.setLevel(logging.INFO)
try:
conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5)
except Exception as e:
logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
logger.error(e)
sys.exit()
logger.info("SUCCESS: Connection to RDS mysql instance succeeded")
s3_client = boto3.client('s3')
def handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
s3_client.download_file(bucket, key,download_path)
csv_data = csv.reader(file( download_path))
with conn.cursor() as cur:
for idx, row in enumerate(csv_data):
logger.info(row)
try:
cur.execute('INSERT INTO target_table(name, age, salary, designation)' \
'VALUES("%s", "%s", "%s", "%s")'
, row)
except Exception as e:
logger.error(e)
if idx % 100 == 0:
conn.commit()
conn.commit()
return 'File loaded into RDS:' + str(download_path)
Here is the code which is working for me now:
s3 = boto3.resource('s3')
file_object=event['Records'][0]
key=str(file_object['s3']['object']['key'])
obj = s3.Object(bucket, key)
content_lines=obj.get()['Body'].read().decode('utf-8').splitlines(True)
tableName= key.strip('folder/').strip('.csv')
with conn.cursor() as cur:
try:
cur.execute('TRUNCATE TABLE '+tableName)
except Exception as e:
print("ERROR: Unexpected error:Table does not exit.")
sys.exit()
header=True
for row in csv.reader(content_lines):
if(header):
numberOfColumns=len(row)
columnNames= str(row).replace('[','').replace(']','').replace("'",'')
print("columnNames:"+columnNames)
values='%s'
numberOfValues=len(values)
numberOfValues=1
while numberOfValues< numberOfColumns:
values=values+",%s"
numberOfValues+=1
print("INSERT into "+tableName+"("+columnNames+") VALUES("+values+")")
header=False
else:
try:
cur.execute('INSERT into '+tableName+'('+columnNames+') VALUES('+values+')', row)
except Exception as e:
raise e
conn.commit()

how to copy specific columns through csv file to postgresql

import psycopg2
import time
def read_database():
conn = None
try:
conn = psycopg2.connect(database="capitadb", user="capita_user", password="capita_user",
host = "127.0.0.1", port = "5432")
cur = conn.cursor()
start_time = time.time()
cur.execute("COPY stagging(Activity_ID,F_Qtr,Fiscal_Week_Num,Manager,MBadge) FROM '/home/vivek/Downloads/dell_data.csv' DELIMITER',' CSV;;")
print("--- %s seconds ---" % (time.time() - start_time))
print("Operation done successfully")
conn.commit()
except Exception as e:
print("Error: %s" % e)
finally:
conn.close()
if __name__ == '__main__':
read_database()
Here we have 15 columns in csv file, but we want to copy only 4 columns. How will we achive that without extracting data in any file?
You will need to use COPY FROM STDIN functionality - http://initd.org/psycopg/docs/cursor.html#cursor.copy_from. You will be able to provide file-like object to that function. You can use itertools module for that
from itertools import chain, islice
class some_magic_adaptor(object):
def __init__(self, src):
self.src = chain.from_iterable(src)
def read(self, n):
return "".join(islice(self.src, None, n))
def read_csv():
for line in open(csv_filename):
yield transform_line(line)
file_like_object_for_postgresql = some_magic_adaptor(read_csv())

Storing and retrieving zip files in SQLite gives "Could not decode to UTF-8"

I have a table in SQLite 3 as follows and I am planning on using it to store a variety of files: txt, pdf, images and zip files.
CREATE TABLE zip (filename TEXT PRIMARYKEY NOT NULL, zipfile BLOB NOT NULL);
To store and retrieve I am experimenting with the following python code
#!env/bin/python
import sqlite3 as lite
import os
import sys
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Test/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])
When I test this on files like .txt, .py, .pdf etc., it works fine.
With Zip files, there is no error while storing into the table but an error while retrieving the file:
Could not decode to UTF-8 column 'zipfile' with text 'PK '
There seems to be some encoding or decoding issue.
I basically tried using the code from one of the questions
Insert binary file in SQLite database with Python
.
It worked originally for the pdf, png, jpg files. But I was still getting the error for Zip files. When I commented out the insertion and just ran the retrieval code it worked. Now the code below works.
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Downloads/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])

Return SQL result from function as array

Since the query returns more than 1 result, at the Get_results class how could i return the data_out as an array in order to iterate on the results of the query?
import psycopg2
import sys
class Get_results():
def db_call(self,query,dbHost,dbName,dbUser,dbPass):
try:
con = None
con = psycopg2.connect(host=dbHost, database=dbName,
user=dbUser, password=dbPass)
cur = con.cursor()
cur.execute(query)
data = cur.fetchall()
for data_out in data:
return data_out
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
finally:
if con:
con.close()
sql = " some sql "
w = Get_results()
for i in w.db_call(sql, dbHost, dbName, dbUser, dbPass):
print "The result is : " + i
For aditional info, when if i add print data right after data = cur.fetchall() i have the result:
[('The_Galaxy', 'The_Galaxy:star'),
('The_Galaxy', 'The_Galaxy:planet')]
The immediate answer is to change:
for data_out in data:
data_out result
to:
for data_out in data:
yield data_out
But you should look at using a with statement (if the DB API supports it), and simplifying the code - this could just be done by making a generator function (a class is OTT for this)
import psycopg2
import sys
class Get_results():
def db_call(self,query,dbHost,dbName,dbUser,dbPass):
try:
con = None
con = psycopg2.connect(host=dbHost, database=dbName,
user=dbUser, password=dbPass)
cur = con.cursor()
cur.execute(query)
data = cur.fetchall()
resultList = []
for data_out in data:
resultList.append(data_out[1])
return resultList
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
finally:
if con:
con.close()
sql = " some sql "
w = Get_results()
for i in w.db_call(sql, dbHost, dbName, dbUser, dbPass):
print "The result is : " + i

Categories