Below code is for extracting the Oracle data into xlsx format using Python,
Code run successfully but when i opened the xlsx file giving error:
"file format or file extension is not valid. Verify that the file has not been corrupted and that the file extension matches the format of the file."
What should i change in below code to get the data without excel crash.
import xlsxwriter
from xlsxwriter import Workbook
import cx_Oracle
import datetime
from datetime import date
dsn_tns = cx_Oracle.makedsn('HOST', 'port', sid='sid')
conn = cx_Oracle.connect(user=r'username', password='password', dsn=dsn_tns)
cursor = conn.cursor()
xlsx_file = open("path.xlsx", "w")
writer = xlsxwriter.Workbook("path.xlsx")
worksheet = writer.add_worksheet()
sql ='''
SELECT *
FROM ( SELECT STRGUID,ACTIVITYUSERID,ACTIVITYSESSIONID,ACTIVITYCODE,SERVERNAME,APPNAME,STARTTIME,ENDTIME,STRDESCRIPTION,
(To_Date('12/30/1899', 'MM/DD/YYYY HH24:MI:SS')+ STARTTIME)Decoded_Date
FROM tablename
)SUB
WHERE SUB.Decoded_Date between to_date('26-APR-2020', 'DD-MON-YYYY')
and to_date('26-JUN-2020', 'DD-MON-YYYY')
'''
cursor.execute(sql)
for r, row in enumerate(cursor.fetchall()):
for c, col in enumerate(row):
worksheet.write(r, c, col)
cursor.close()
conn.close()
xlsx_file.close()
I tried below code and working successfully for exporting the data from oracle into xlsx format using python:
import xlsxwriter
from xlsxwriter import Workbook
import cx_Oracle
import datetime
from datetime import date
import os
import logging
import sys
import getopt
import traceback
## Define Function
def writeToExcel(cur_sor):
workbook = xlsxwriter.Workbook('path.xlsx')
worksheet = workbook.add_worksheet("DATA") #Add a New Worksheet Name -
for row, row1 in enumerate(cur_sor.fetchall()):
for col, col1 in enumerate(row1):
worksheet.write(row, col, col1)
workbook.close()
def setSqlCommand():
sqlCommand = '''
SELECT *
FROM
( SELECT LROWNUM,DTIMESTAMP,LSCENARIO,LYEAR,LPERIOD,
LENTITY,LPARENT,LVALUE,LACCOUNT,LICP,LCUSTOM1,
LCUSTOM2,STRUSERNAME,STRSERVERNAME,
LACTIVITY,DDATAVALUE,BNODATA,
To_Date('12/30/1899','MM/DD/YYYY') +
DTIMESTAMP as Decoded_Date
FROM tablename
) SUB
WHERE SUB.Decoded_Date
<= to_date('28-JUN-2020', 'DD-MON-YYYY')
'''
return sqlCommand
# Function to Execute Sql commands over TNS
def runSqlTNS (sqlCommand, username, password , hostName, portNumber, sID):
dsn_tns = cx_Oracle.makedsn('HOST', 'PORT', sid='SERVICEID')
db = cx_Oracle.connect(user=r'USERNAME', password='PASSWORD', dsn=dsn_tns)
cursor = db.cursor()
cursor.execute(sqlCommand)
return cursor
def main(argv=None):
#Setup Logging Information
logging.basicConfig(filename='PATH/myapp.log', level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logging.info('Started')
username = 'USERNAME'
password = 'PASSWORD'
#Define dsn entries to create a tns connection
hostName = 'HOST'
portNumber = 'PORT'
sid = 'SERVICEID'
#
try:
sqlCommand = setSqlCommand()
except Exception as e:
logging.info('Function - sqlCommand - In Exception')
logging.info(traceback.print_exc())
try:
c = runSqlTNS(sqlCommand, username, password , hostName, portNumber, sid)
except Exception as e:
logging.info('Function - runSql In Exception')
logging.info(traceback.print_exc())
try:
writeToExcel(c) # Send the Cursor to writetoExcel Function
c.close()
except Exception as e:
logging.info('Function - writeToExcel In Exception')
logging.info(traceback.print_exc())
if __name__ == "__main__":
main(sys.argv)
Related
# Module Imports
import mariadb
import sys
import csv
from pathlib import Path
def connect_to_mariaDB(databse, user, passwd):
# Connect to MariaDB Platform
try: conn = mariadb.connect(
user=user,
password=passwd,
host="localhost",
port=3306,
database=databse
)
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)
return conn
def check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur = conn.cursor()
cur.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{database}';")
for(table_name) in cur:
if table_name[0] == tableName:
if overwrite == "YES":
print("table exists - DROP TABLE")
cur.execute(f"DROP TABLE {tableName}")
return True
else:
return False
return True
def import_file_into_db_table_(
filename, database, user, passwd, tableName,
create_table_statement = "", overwrite = False):
conn = connect_to_mariaDB(database, user, passwd)
cur = conn.cursor()
if conn != None:
print(f"Connection successful to database {database}")
if check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur.execute(create_table_statement)
print("table is created")
path = f"{Path().absolute()}\\{filename}".replace("\\","/")
print(path)
load_data_statement = f"""LOAD DATA INFILE '{path}'
INTO TABLE {tableName}
FIELDS TERMINATED BY ';'
OPTIONALLY ENCLOSED BY '\"'
LINES TERMINATED BY '\\n'
IGNORE 1 LINES
"""
print(load_data_statement)
cur.execute(load_data_statement)
print("load data into table - successful")
else:
print("table exists - no permission to overwrite")
cur.execute("SELECT * FROM student_mat;")
for da in cur:
print(da)
# variables
filename = "student-mat.csv"
database = "dbs2021"
tableName = "student_mat"
# load the create_table_statement
create_table_statement = ""
path = f"{Path().absolute()}\\create_table_statement.txt"
with open(path, newline='') as file:
spamreader = csv.reader(file, delimiter='\n', quotechar='|')
for row in spamreader:
create_table_statement += row[0]
parameters_length = len(sys.argv)
if parameters_length == 3:
user, passwd = sys.argv[1], sys.argv[2]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, "YES")
elif parameters_length == 4:
user, passwd, overwrite = sys.argv[1], sys.argv[2], sys.argv[3]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, overwrite)
else:
print("wrong parameters\nTry -user -passwd or additional -overwrite")
The code checks if there is a table with the same name in the db and then potentially drops it, creates a new table and loads the data of the csv file into the table.
When executing the code it seems like everything is working but when going in the mariadb command prompt the created table is empty even though when outputting the table in the code it is filled.
By default MariaDB Connector/Python doesn't use autocommit mode.
You need either set autocommit=True when establishing the connection or you have to commit your changes with conn.commit().
I'm new to python. I'm trying to request an URL (thanks to an id stored in a postgresql data base) which sends me zip folders with several files inside.
import psycopg2
import requests
url = "https://myurl/"
conn = psycopg2.connect(user="XXX", password="XXX", database="XXX", host="localhost", port="5432")
print("Successfully connected!")
cur = conn.cursor()
sql ="select id from public.base"
cur.execute(sql)
row = [item[0] for item in cur.fetchall()]
for d in row:
requests.post(url+d)
The requests.post(url+d) is working, i have a 200 response.
But I don't know how to do the following steps, that is to say to upload in my workspace these zip folders...
You could use the zipfile & io library for that, specifying your download location within a extractall, like so :)
from psycopg2 import (
connect,
OpertionalError,
)
from zipfile import (
BadZipFile,
ZipFile,
)
from io import BytesIO
import requests
def download_zip(url):
response = requests.get(url)
if response.ok:
try:
z = ZipFile(BytesIO(response.content))
z.extractall("/path/to/destination_directory")
except BadZipFile as ex:
print('Error: {}'.format(ex))
print('Download succeeded: {}'.format(url))
else:
print('Connection failed: {}'.format(url))
def main():
conn = connect(
user='XXX',
password='XXX',
database='XXX',
host='localhost',
port='5432',
)
try:
cur = conn.cursor()
cur.execute('select id from public.base')
except OperationalError:
exit(0)
row = [item[0] for item in cur.fetchall()]
for id in row:
download_zip('https://myurl/{}'.format(id))
print('Download completed')
if __name__ == '__main__':
main()
I have written a little python script to get files in a directory, get a hash and then write them to a table.
The first part, getting the files and calculating the hash was easy. But now I added the function (write_record) to store the filename, log date and hash to a database. But I am struggling how to call it form the get_files function an write a record for each file in the directory
from datetime import datetime
from os import scandir
import os
import hashlib
import psycopg2
BLOCKSIZE = 65536
hasher = hashlib.sha256()
basepath = '.'
def convert_date(timestamp):
d = datetime.utcfromtimestamp(timestamp)
formated_date = d.strftime('%d%m%Y%H%M%S')
return formated_date
def get_hash(entry):
with open(entry, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
# print(hasher.hexdigest())
def get_files():
dir_entries = scandir('.')
for entry in dir_entries:
if entry.is_file():
info = entry.stat()
print(' %s %s %s' % (entry.name, convert_date(info.st_mtime),hasher.hexdigest()))
log_filename = entry.name
log_hashvalue = hasher.hexdigest()
log_date = convert_date(info.st_mtime)
return log_filename,log_hashvalue,log_date
# write_record()
def write_record():
log_filename,log_hashvalue,log_date = get_files()
try:
print(log_filename,log_hashvalue,log_date)
connection = psycopg2.connect(user="postgres",password="xxxxxxxx",host="xxx.xxx.xxx.xxx",port="5432",database="evidence_logging")
cursor = connection.cursor()
postgres_insert_query = """ INSERT INTO logfiles (log_name,log_date,log_hashvalue) VALUES (%s,%s,%s)"""
record_to_insert = (log_filename,log_date,log_hashvalue)
print(postgres_insert_query, record_to_insert)
cursor.execute(postgres_insert_query, record_to_insert)
connection.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into logfiles table")
except (Exception, psycopg2.Error) as error :
if(connection):
print("Failed to insert record into logfiles table", error)
finally:
#closing database connection.
if(connection):
cursor.close()
connection.close()
print("PostgreSQL connection is closed")
write_record()
Thanks in advance
Regards
Georg
In your code you are calling write_record() method this will insert only one file beacause get_files() method will return the first file not all the files.
first you need to call get_files() method instead of returning in this method you should call write_record() method with the values you are returning from get_files().
And do not close the connection after insertion of every record close the connection after insertion of all the records.
try this
from datetime import datetime
from os import scandir
import os
import hashlib
import psycopg2
BLOCKSIZE = 65536
hasher = hashlib.sha256()
basepath = '.'
connection = None
def convert_date(timestamp):
d = datetime.utcfromtimestamp(timestamp)
formated_date = d.strftime('%d%m%Y%H%M%S')
return formated_date
def get_hash(entry):
with open(entry, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
# print(hasher.hexdigest())
def get_files():
dir_entries = scandir('.')
for entry in dir_entries:
if entry.is_file():
info = entry.stat()
print(' %s %s %s' % (entry.name, convert_date(info.st_mtime),hasher.hexdigest()))
log_filename = entry.name
log_hashvalue = hasher.hexdigest()
log_date = convert_date(info.st_mtime)
write_record(log_filename,log_hashvalue,log_date)
#close the connection after writing all records
close_connection()
def write_record(log_filename,log_hashvalue,log_date):
try:
print(log_filename,log_hashvalue,log_date)
connection = psycopg2.connect(user="postgres",password="xxxxxxxx",host="xxx.xxx.xxx.xxx",port="5432",database="evidence_logging")
cursor = connection.cursor()
postgres_insert_query = """ INSERT INTO logfiles (log_name,log_date,log_hashvalue) VALUES (%s,%s,%s)"""
record_to_insert = (log_filename,log_date,log_hashvalue)
print(postgres_insert_query, record_to_insert)
cursor.execute(postgres_insert_query, record_to_insert)
connection.commit()
count = cursor.rowcount
print (count, "Record inserted successfully into logfiles table")
except (Exception, psycopg2.Error) as error :
if(connection):
print("Failed to insert record into logfiles table", error)
finally:
cursor.close()
def close_connection():
if(connection):
connection.close()
print("PostgreSQL connection is closed")
get_files()
I have a CSV file Employee.csv in the S3 bucket with all info about employee: name, age, salary, designation.
I have to write a python lambda function to read this file and write in RDS db such as it should create a table as Employee, with columns name, age, salary, designation and rows will have the data.
The Employee.csv is just for example, actually it can be any csv file with any number of columns in it.
from __future__ import print_function
import boto3
import logging
import os
import sys
import uuid
import pymysql
import csv
import rds_config
rds_host = rds_config.rds_host
name = rds_config.db_username
password = rds_config.db_password
db_name = rds_config.db_name
logger = logging.getLogger()
logger.setLevel(logging.INFO)
try:
conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5)
except Exception as e:
logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
logger.error(e)
sys.exit()
logger.info("SUCCESS: Connection to RDS mysql instance succeeded")
s3_client = boto3.client('s3')
def handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
s3_client.download_file(bucket, key,download_path)
csv_data = csv.reader(file( download_path))
with conn.cursor() as cur:
for idx, row in enumerate(csv_data):
logger.info(row)
try:
cur.execute('INSERT INTO target_table(name, age, salary, designation)' \
'VALUES("%s", "%s", "%s", "%s")'
, row)
except Exception as e:
logger.error(e)
if idx % 100 == 0:
conn.commit()
conn.commit()
return 'File loaded into RDS:' + str(download_path)
Here is the code which is working for me now:
s3 = boto3.resource('s3')
file_object=event['Records'][0]
key=str(file_object['s3']['object']['key'])
obj = s3.Object(bucket, key)
content_lines=obj.get()['Body'].read().decode('utf-8').splitlines(True)
tableName= key.strip('folder/').strip('.csv')
with conn.cursor() as cur:
try:
cur.execute('TRUNCATE TABLE '+tableName)
except Exception as e:
print("ERROR: Unexpected error:Table does not exit.")
sys.exit()
header=True
for row in csv.reader(content_lines):
if(header):
numberOfColumns=len(row)
columnNames= str(row).replace('[','').replace(']','').replace("'",'')
print("columnNames:"+columnNames)
values='%s'
numberOfValues=len(values)
numberOfValues=1
while numberOfValues< numberOfColumns:
values=values+",%s"
numberOfValues+=1
print("INSERT into "+tableName+"("+columnNames+") VALUES("+values+")")
header=False
else:
try:
cur.execute('INSERT into '+tableName+'('+columnNames+') VALUES('+values+')', row)
except Exception as e:
raise e
conn.commit()
I have a table in SQLite 3 as follows and I am planning on using it to store a variety of files: txt, pdf, images and zip files.
CREATE TABLE zip (filename TEXT PRIMARYKEY NOT NULL, zipfile BLOB NOT NULL);
To store and retrieve I am experimenting with the following python code
#!env/bin/python
import sqlite3 as lite
import os
import sys
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Test/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])
When I test this on files like .txt, .py, .pdf etc., it works fine.
With Zip files, there is no error while storing into the table but an error while retrieving the file:
Could not decode to UTF-8 column 'zipfile' with text 'PK '
There seems to be some encoding or decoding issue.
I basically tried using the code from one of the questions
Insert binary file in SQLite database with Python
.
It worked originally for the pdf, png, jpg files. But I was still getting the error for Zip files. When I commented out the insertion and just ran the retrieval code it worked. Now the code below works.
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Downloads/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])