i need to execute 178 mg sql file (bdd RED: Rich_Epinions_Dataset_anonym )
i have this error message :MemoryError
i use sqlite and python here my code :
import sqlite3
def import():
print("Opened database ...")
conn = sqlite3.connect('F:\\PROJECT\\testDict.db')
print("Opened database successfully")
qry = open('F:\\PROJECT\\epinions_anonym.sql', 'r').read()
sqlite3.complete_statement(qry)
cursor = conn.cursor()
try:
cursor.executescript(qry)
except Exception as e:
MessageBoxW = ctypes.windll.user32.MessageBoxW
errorMessage = databaseFile + ': ' + str(e)
MessageBoxW(None, errorMessage, 'Error', 0)
cursor.close()
raise
import()
Related
After the DatabaseError is catched (try-except block) the previous data in the table is deleted. I cannot figure out this strange behaviour.
this is the code I used
def copy_from_stringio(df, table):
conn = None
try:
# read database configuration
params = config()
# connect to the PostgreSQL database
conn = psycopg2.connect(**params)
# create a new cursor
cur = conn.cursor()
# save dataframe to an in memory buffer
buffer = StringIO()
df.to_csv(buffer, index=False, header=False,sep=';')
buffer.seek(0)
#cursor = conn.cursor()
cur.copy_from(buffer, table, sep=";")
conn.commit()
print("copy_from_stringio() done")
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
# Module Imports
import mariadb
import sys
import csv
from pathlib import Path
def connect_to_mariaDB(databse, user, passwd):
# Connect to MariaDB Platform
try: conn = mariadb.connect(
user=user,
password=passwd,
host="localhost",
port=3306,
database=databse
)
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)
return conn
def check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur = conn.cursor()
cur.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{database}';")
for(table_name) in cur:
if table_name[0] == tableName:
if overwrite == "YES":
print("table exists - DROP TABLE")
cur.execute(f"DROP TABLE {tableName}")
return True
else:
return False
return True
def import_file_into_db_table_(
filename, database, user, passwd, tableName,
create_table_statement = "", overwrite = False):
conn = connect_to_mariaDB(database, user, passwd)
cur = conn.cursor()
if conn != None:
print(f"Connection successful to database {database}")
if check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur.execute(create_table_statement)
print("table is created")
path = f"{Path().absolute()}\\{filename}".replace("\\","/")
print(path)
load_data_statement = f"""LOAD DATA INFILE '{path}'
INTO TABLE {tableName}
FIELDS TERMINATED BY ';'
OPTIONALLY ENCLOSED BY '\"'
LINES TERMINATED BY '\\n'
IGNORE 1 LINES
"""
print(load_data_statement)
cur.execute(load_data_statement)
print("load data into table - successful")
else:
print("table exists - no permission to overwrite")
cur.execute("SELECT * FROM student_mat;")
for da in cur:
print(da)
# variables
filename = "student-mat.csv"
database = "dbs2021"
tableName = "student_mat"
# load the create_table_statement
create_table_statement = ""
path = f"{Path().absolute()}\\create_table_statement.txt"
with open(path, newline='') as file:
spamreader = csv.reader(file, delimiter='\n', quotechar='|')
for row in spamreader:
create_table_statement += row[0]
parameters_length = len(sys.argv)
if parameters_length == 3:
user, passwd = sys.argv[1], sys.argv[2]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, "YES")
elif parameters_length == 4:
user, passwd, overwrite = sys.argv[1], sys.argv[2], sys.argv[3]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, overwrite)
else:
print("wrong parameters\nTry -user -passwd or additional -overwrite")
The code checks if there is a table with the same name in the db and then potentially drops it, creates a new table and loads the data of the csv file into the table.
When executing the code it seems like everything is working but when going in the mariadb command prompt the created table is empty even though when outputting the table in the code it is filled.
By default MariaDB Connector/Python doesn't use autocommit mode.
You need either set autocommit=True when establishing the connection or you have to commit your changes with conn.commit().
I have a CSV file Employee.csv in the S3 bucket with all info about employee: name, age, salary, designation.
I have to write a python lambda function to read this file and write in RDS db such as it should create a table as Employee, with columns name, age, salary, designation and rows will have the data.
The Employee.csv is just for example, actually it can be any csv file with any number of columns in it.
from __future__ import print_function
import boto3
import logging
import os
import sys
import uuid
import pymysql
import csv
import rds_config
rds_host = rds_config.rds_host
name = rds_config.db_username
password = rds_config.db_password
db_name = rds_config.db_name
logger = logging.getLogger()
logger.setLevel(logging.INFO)
try:
conn = pymysql.connect(rds_host, user=name, passwd=password, db=db_name, connect_timeout=5)
except Exception as e:
logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
logger.error(e)
sys.exit()
logger.info("SUCCESS: Connection to RDS mysql instance succeeded")
s3_client = boto3.client('s3')
def handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = event['Records'][0]['s3']['object']['key']
download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
s3_client.download_file(bucket, key,download_path)
csv_data = csv.reader(file( download_path))
with conn.cursor() as cur:
for idx, row in enumerate(csv_data):
logger.info(row)
try:
cur.execute('INSERT INTO target_table(name, age, salary, designation)' \
'VALUES("%s", "%s", "%s", "%s")'
, row)
except Exception as e:
logger.error(e)
if idx % 100 == 0:
conn.commit()
conn.commit()
return 'File loaded into RDS:' + str(download_path)
Here is the code which is working for me now:
s3 = boto3.resource('s3')
file_object=event['Records'][0]
key=str(file_object['s3']['object']['key'])
obj = s3.Object(bucket, key)
content_lines=obj.get()['Body'].read().decode('utf-8').splitlines(True)
tableName= key.strip('folder/').strip('.csv')
with conn.cursor() as cur:
try:
cur.execute('TRUNCATE TABLE '+tableName)
except Exception as e:
print("ERROR: Unexpected error:Table does not exit.")
sys.exit()
header=True
for row in csv.reader(content_lines):
if(header):
numberOfColumns=len(row)
columnNames= str(row).replace('[','').replace(']','').replace("'",'')
print("columnNames:"+columnNames)
values='%s'
numberOfValues=len(values)
numberOfValues=1
while numberOfValues< numberOfColumns:
values=values+",%s"
numberOfValues+=1
print("INSERT into "+tableName+"("+columnNames+") VALUES("+values+")")
header=False
else:
try:
cur.execute('INSERT into '+tableName+'('+columnNames+') VALUES('+values+')', row)
except Exception as e:
raise e
conn.commit()
I have a table in SQLite 3 as follows and I am planning on using it to store a variety of files: txt, pdf, images and zip files.
CREATE TABLE zip (filename TEXT PRIMARYKEY NOT NULL, zipfile BLOB NOT NULL);
To store and retrieve I am experimenting with the following python code
#!env/bin/python
import sqlite3 as lite
import os
import sys
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Test/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])
When I test this on files like .txt, .py, .pdf etc., it works fine.
With Zip files, there is no error while storing into the table but an error while retrieving the file:
Could not decode to UTF-8 column 'zipfile' with text 'PK '
There seems to be some encoding or decoding issue.
I basically tried using the code from one of the questions
Insert binary file in SQLite database with Python
.
It worked originally for the pdf, png, jpg files. But I was still getting the error for Zip files. When I commented out the insertion and just ran the retrieval code it worked. Now the code below works.
def insertfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
_f = open(_filename,'rb')
_split = os.path.split(_filename)
_file = _split[1]
_blob = _f.read()
cur.execute('INSERT INTO zip (filename,zipfile) VALUES (?,?)', (_file,lite.Binary(_blob)))
_f.close()
con.commit()
cur.close()
con.close()
except Exception as ex:
print ex
def getfile(_filename):
try:
con = lite.connect('histogram.db', detect_types=lite.PARSE_DECLTYPES)
con.row_factory = lite.Row
cur = con.cursor()
cur.execute('PRAGMA foreign_keys=ON;')
cur.execute('SELECT zipfile from zip where filename = ?', (_filename,))
_files = cur.fetchall()
if len(_files) > 0:
_file = open('Downloads/'+ _filename,'wb')
_file.write(_files[0]['zipfile'])
_file.close()
cur.close()
con.close()
except Exception as ex:
print ex
if __name__ == '__main__':
print 'works'
insertfile(sys.argv[1])
getfile(os.path.split(sys.argv[1])[1])
Since the query returns more than 1 result, at the Get_results class how could i return the data_out as an array in order to iterate on the results of the query?
import psycopg2
import sys
class Get_results():
def db_call(self,query,dbHost,dbName,dbUser,dbPass):
try:
con = None
con = psycopg2.connect(host=dbHost, database=dbName,
user=dbUser, password=dbPass)
cur = con.cursor()
cur.execute(query)
data = cur.fetchall()
for data_out in data:
return data_out
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
finally:
if con:
con.close()
sql = " some sql "
w = Get_results()
for i in w.db_call(sql, dbHost, dbName, dbUser, dbPass):
print "The result is : " + i
For aditional info, when if i add print data right after data = cur.fetchall() i have the result:
[('The_Galaxy', 'The_Galaxy:star'),
('The_Galaxy', 'The_Galaxy:planet')]
The immediate answer is to change:
for data_out in data:
data_out result
to:
for data_out in data:
yield data_out
But you should look at using a with statement (if the DB API supports it), and simplifying the code - this could just be done by making a generator function (a class is OTT for this)
import psycopg2
import sys
class Get_results():
def db_call(self,query,dbHost,dbName,dbUser,dbPass):
try:
con = None
con = psycopg2.connect(host=dbHost, database=dbName,
user=dbUser, password=dbPass)
cur = con.cursor()
cur.execute(query)
data = cur.fetchall()
resultList = []
for data_out in data:
resultList.append(data_out[1])
return resultList
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
finally:
if con:
con.close()
sql = " some sql "
w = Get_results()
for i in w.db_call(sql, dbHost, dbName, dbUser, dbPass):
print "The result is : " + i