I'm trying to insert some data from a text file (fields terminated by ";") to a table in MySQL using Python. Inserting one by one takes too long, so I decided to write a loop to insert a large block of rows at once, but not the whole file (no memory available for that). The table has 21 columns.
import datetime
import mysql.connector
print(datetime.datetime.now())
with open("/backup/backup/backupDB/csv/file.txt", "r", encoding = "latin-1") as data:
dbconn = mysql.connector.connect(
host = "server", user = "user", password = "password", port = 3306
)
cur = dbconn.cursor(prepared = True)
cur.execute("SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema='schema' AND table_name='table'")
iterColumn = cur.fetchall()
columns = str(iterColumn).replace("(","").replace(",)","").replace("[","(").replace("]",")").replace("'","")
next(data)
cur = dbconn.cursor()
block = 5000
y = []
try:
while True:
for x in data:
x = x.split(";")
y.append(tuple(x))
if len(y) == block:
break
cur.executemany("insert ignore into schema.table " + columns + " values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", y)
dbconn.commit()
y=[]
except:
print(datetime.datetime.now())
The code above works, but it caps the insert to 60000 rows, exactly, although I have 210000+ in my file.
Where am I doing wrong?
Related
# Module Imports
import mariadb
import sys
import csv
from pathlib import Path
def connect_to_mariaDB(databse, user, passwd):
# Connect to MariaDB Platform
try: conn = mariadb.connect(
user=user,
password=passwd,
host="localhost",
port=3306,
database=databse
)
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)
return conn
def check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur = conn.cursor()
cur.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{database}';")
for(table_name) in cur:
if table_name[0] == tableName:
if overwrite == "YES":
print("table exists - DROP TABLE")
cur.execute(f"DROP TABLE {tableName}")
return True
else:
return False
return True
def import_file_into_db_table_(
filename, database, user, passwd, tableName,
create_table_statement = "", overwrite = False):
conn = connect_to_mariaDB(database, user, passwd)
cur = conn.cursor()
if conn != None:
print(f"Connection successful to database {database}")
if check_if_table_exists_and_overwrite(conn, tableName, database, overwrite):
cur.execute(create_table_statement)
print("table is created")
path = f"{Path().absolute()}\\{filename}".replace("\\","/")
print(path)
load_data_statement = f"""LOAD DATA INFILE '{path}'
INTO TABLE {tableName}
FIELDS TERMINATED BY ';'
OPTIONALLY ENCLOSED BY '\"'
LINES TERMINATED BY '\\n'
IGNORE 1 LINES
"""
print(load_data_statement)
cur.execute(load_data_statement)
print("load data into table - successful")
else:
print("table exists - no permission to overwrite")
cur.execute("SELECT * FROM student_mat;")
for da in cur:
print(da)
# variables
filename = "student-mat.csv"
database = "dbs2021"
tableName = "student_mat"
# load the create_table_statement
create_table_statement = ""
path = f"{Path().absolute()}\\create_table_statement.txt"
with open(path, newline='') as file:
spamreader = csv.reader(file, delimiter='\n', quotechar='|')
for row in spamreader:
create_table_statement += row[0]
parameters_length = len(sys.argv)
if parameters_length == 3:
user, passwd = sys.argv[1], sys.argv[2]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, "YES")
elif parameters_length == 4:
user, passwd, overwrite = sys.argv[1], sys.argv[2], sys.argv[3]
import_file_into_db_table_(filename, database, user, passwd, tableName, create_table_statement, overwrite)
else:
print("wrong parameters\nTry -user -passwd or additional -overwrite")
The code checks if there is a table with the same name in the db and then potentially drops it, creates a new table and loads the data of the csv file into the table.
When executing the code it seems like everything is working but when going in the mariadb command prompt the created table is empty even though when outputting the table in the code it is filled.
By default MariaDB Connector/Python doesn't use autocommit mode.
You need either set autocommit=True when establishing the connection or you have to commit your changes with conn.commit().
I'm trying to put the data file of schools.data which is just a file listing many universities. It says 'type' object is not subscriptable in terminal. Here is the code
import urllib
import sqlite3
import json
import time
import ssl
conn = sqlite3.connect('universityrawdata.sqlite')
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS Universitylocations (address TEXT, geodata TEXT)''')
fh = open("schools.data")
count = 0
for line in fh:
if count > 200:
print ('Retrieved 200 locations, restart to retrieve more')
break
address = line.strip()
print('')
cur.execute("SELECT geodata FROM Universitylocations WHERE address= ?",(bytes[address]))
print("Resolving", data)
url = fh + urllib.urlencode({"sensor":"false", "address": address})
print("Retrieving", url)
uh = urllib.urlopen(url, context=scontext)
data = uh.read()
print('Retrieved',len(data),'characters',data[:20].replace('\n',''))
count = count + 1
try:
js = json.loads(str(data))
except:
continue
if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'ZERO_RESULTS') :
print('==== Failed to Retrieve ====')
print (data)
continue
cur.execute('''INSERT INTO Universitylocations (address, geodata) VALUES (?, ?)''', (bytes[address],bytes[data]))
conn.commit()
if count % 10 == 0 :
print('Pausing for a bit...')
time.sleep(5)
print("Run process.py to read the data on a database")
Can anyone help? I've been having this issue for a while.
This line is culprit:
cur.execute("SELECT geodata FROM Universitylocations WHERE address= ?",(bytes[address]))
change bytes[address] with (address,). Means:
cur.execute('''SELECT geodata FROM Universitylocations WHERE address= ?''',(address,))
Check what your data type is the database.
I am a tester. As part of my tests, I have to extract data from two DB2 databases. One resides on z/OS, and the other resides on AS400
When I call the functions sequentially, the second one fails with TypeError: "Class com.ibm.db2.jcc.DB2Driver" or "com.ibm.as400.access.AS400JDBCDriver" regardless of the order in which function is called.
Could you please help me understand why the jaydebeapi.connect statement in the second statement fails with Class Not Found? Did I forget to close something (I used context manager for the connection)?
Individually, the functions work fine. But when I call one after the other, the second one fails. Please help.
Python Version: 3.7.0
JayDeBeApi==1.2.3
JPype1==1.1.2
Here's the function I wrote to extract data from DB2 database on zOS
def query_zos_db2_database(hostname, port, database, username, password, sql_stmt, output_file_path, delimiter="|", header_flag=True):
"""
Function to connect to a DB2 database hosted on z/OS Mainframe
:param hostname:
:param port:
:param database:
:param username:
:param password:
:param sql_stmt:
:param output_file_path:
:param delimiter:
:param header_flag:
:return:
"""
my_name = "query_zos_db2_database()"
st = timeit.default_timer()
print("Entered :{}".format(my_name))
curr_dir = os.getcwd()
jar_file_path = "{}/../drivers/db2jcc/db2jcc4.jar".format(curr_dir)
jar_file_path = os.path.abspath(jar_file_path)
print(jar_file_path)
if not os.path.exists(jar_file_path):
logger.error("File not found: {}".format(jar_file_path))
exit(1)
list_credentials = list()
list_credentials.append(username)
list_credentials.append(password)
jdbc_conn_str = "jdbc:db2://{0}:{1}/{2}".format(hostname, port, database)
print("Before connection isJVMStarted: {}".format(jpype.isJVMStarted()))
with jaydebeapi.connect('com.ibm.db2.jcc.DB2Driver', jdbc_conn_str, list_credentials, jar_file_path) as conn:
# print(conn)
cursor = conn.cursor()
cursor.execute(sql_stmt)
print("After connection isJVMStarted: {}".format(jpype.isJVMStarted()))
if os.path.exists(output_file_path):
os.remove(output_file_path)
with codecs.open(output_file_path, 'wb', encoding='utf-8') as fout:
csvwriter = csv.writer(fout, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL)
if header_flag is True or header_flag == 'True':
if sys.version_info.major == 2:
column_names = [item[0].encode('utf-8') for item in cursor.description]
print(column_names)
else:
column_names = [item[0] for item in cursor.description]
csvwriter.writerow(column_names)
for row in cursor.fetchall():
try:
if sys.version_info.major == 2:
columns = [unicode(column or '') for column in row]
else:
columns = [str(column or '') for column in row]
out_rec = delimiter.join(columns)
fout.write(u"{}\n".format(out_rec))
except UnicodeEncodeError:
print("UnicodeEncodeError- Skipping this record {}".format(row))
et = timeit.default_timer() - st
print("Exited: {}; Elapsed Time: {} s".format(my_name, et))
Here's the function I write to extract data from DB2 database on AS400:
def query_as400_db2_database(hostname, port, database, username, password, sql_stmt, output_file_path, delimiter="|", header_flag=True):
"""
Function to connect to a DB2 database hosted on AS400 Mainframe
:param hostname:
:param port:
:param database:
:param username:
:param password:
:param sql_stmt:
:param output_file_path:
:param delimiter:
:param header_flag:
:return:
"""
my_name = "query_as400_db2_database()"
st = timeit.default_timer()
print("Entered: {}".format(my_name))
curr_dir = os.getcwd()
jar_file_path = "{}/../drivers/jt400/jt400.jar".format(curr_dir)
jar_file_path = os.path.abspath(jar_file_path)
print(jar_file_path)
if not os.path.exists(jar_file_path):
logger.error("File not found: {}".format(jar_file_path))
exit(1)
list_credentials = list()
list_credentials.append(username)
list_credentials.append(password)
jdbc_conn_str = "jdbc:as400://{};prompt=false;translate binary=true;naming=system".format(hostname)
print("Before connection isJVMStarted: {}".format(jpype.isJVMStarted()))
with jaydebeapi.connect('com.ibm.as400.access.AS400JDBCDriver', jdbc_conn_str, list_credentials, jar_file_path) as conn:
# print(conn)
print("After connection isJVMStarted: {}".format(jpype.isJVMStarted()))
cursor = conn.cursor()
cursor.execute(sql_stmt)
if os.path.exists(output_file_path):
os.remove(output_file_path)
with codecs.open(output_file_path, 'wb', encoding='utf-8') as fout:
csvwriter = csv.writer(fout, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL)
if header_flag is True or header_flag == 'True':
if sys.version_info.major == 2:
column_names = [item[0].encode('utf-8') for item in cursor.description]
print(column_names)
else:
column_names = [item[0] for item in cursor.description]
csvwriter.writerow(column_names)
for row in cursor.fetchall():
try:
if sys.version_info.major == 2:
columns = [unicode(column or '') for column in row]
else:
columns = [str(column or '') for column in row]
out_rec = delimiter.join(columns)
fout.write(u"{}\n".format(out_rec))
except UnicodeEncodeError:
print("UnicodeEncodeError- Skipping this record {}".format(row))
et = timeit.default_timer() - st
print("Exited: {}; Elapsed Time: {} s".format(my_name, et))
For that particular error case there are two potential error causes.
1- Usually in the Java world the java.lang.RuntimeException: Class driverClassName not found exception is raised when we hadn't add the jar file's path to the CLASSPATH environment variable and that's what was exactly attempted to be explained here
2- (the most likely one) It seems like Jaydebeapi doesn't support the establishment of multiple connections for different data sources at the same time.
I'm using an open source piece of python code that basically pulls in a location of an entity and saves these details to a DB in real time. lets call it scanner the scanner program. DB file it saves it to is a sqlite file: db.sqlite.
As this is happening my piece of code in question is searching the db file every 45 seconds performing a select statement to find a certain value. This will work a couple of times but after running for a couple of minutes concurrently with the scanner program they run into a DB lock error:
sqlite3.OperationalError: database is locked
So what can I do to my code to ensure this lock does not happen. I cannot change how the scanner program accesses the DB. Only my program.
Any help here would be great. I've seen timeouts mentioned along with threading but I am not sure on either.
from datetime import datetime
import sqlite3
import time
import json
import tweepy
def get_api(cfg):
auth = tweepy.OAuthHandler(cfg['consumer_key'], cfg['consumer_secret'])
auth.set_access_token(cfg['access_token'], cfg['access_token_secret'])
return tweepy.API(auth)
# Fill in the values noted in previous step here
cfg = {
"consumer_key" : "X",
"consumer_secret" : "X",
"access_token" : "X",
"access_token_secret" : "X"
}
with open('locales/pokemon.en.json') as f:
pokemon_names = json.load(f)
currentid = 1
pokemonid = 96 #test
while 1==1:
conn = sqlite3.connect('db.sqlite')
print "Opened database successfully";
print "Scanning DB....";
time.sleep(1)
cur = conn.execute("SELECT * FROM sightings WHERE pokemon_id = ? and id > ?", (pokemonid, currentid))
row = cur.fetchone()
if row is None:
print "No Pokemon Found \n "
time.sleep(1)
while row is not None:
#get pokemon name
name = pokemon_names[str(pokemonid)]
#create expiry time
datestr = datetime.fromtimestamp(row[3])
dateoutput = datestr.strftime("%H:%M:%S")
#create location
location = "https://www.google.com/maps/place/%s,%s" % (row[5], row[6])
#inform user
print "%s found! - Building tweet! \n" % (name)
time.sleep(1)
#create tweet
buildtweet = "a wild %s spawned in #Dublin - It will expire at %s. %s #PokemonGo \n "%(name, dateoutput, location)
#print tweet
#log
print buildtweet
currentid = row[0]
time.sleep(1)
#send tweet
api = get_api(cfg)
tweet = buildtweet
try:
status = api.update_status(status=tweet)
print "sent!"
except:
pass
print "this tweet failed \n"
time.sleep(30)
row = cur.fetchone()
cur.close()
conn.close()
print "Waiting..... \n "
time.sleep(45)
conn.close()
i got this 2 python script
import serial
serial = serial.Serial("/dev/ttyUSB0", baudrate=9600)
code = ''
while True:
data = serial.read()
if data == '\r':
print(code)
code = ''
else:
code = code + data
and
import time
import datetime
import MySQLdb
localtime = time.localtime(time.time())
day = localtime.tm_wday
time = localtime.tm_hour
print day
print time
data = 'DOSEN1'
db = MySQLdb.connect("localhost", "root", "", "skripsi")
cur = db.cursor()
cond1 = "SELECT nama_dosen FROM dosen WHERE kode_dosen = '%s'" %data
cur.execute(cond1)
hitung = cur.rowcount
res1 = cur.fetchall()
for row in res1:
nama_dosen = row[0]
if hitung == 1:
res1 = nama_dosen
elif hitung != 1:
print "Dosen tidak Terdaftar"
how can i join this 2 script so that the data = 'DOSEN1' can be replaced with the RFID tag number?
i really new to this programming languange, really need help. thanks
Assuming that print(code) gives you the value for data in the second script, something like this should work:
import serial
import time
import datetime
import MySQLdb
serial = serial.Serial("/dev/ttyUSB0", baudrate=9600)
db = MySQLdb.connect("localhost", "root", "", "skripsi")
cur = db.cursor()
code = ''
while True:
data = serial.read()
if data == '\r':
print(code)
localtime = time.localtime(time.time())
day = localtime.tm_wday
time = localtime.tm_hour
print day
print time
cond1 = "SELECT nama_dosen FROM dosen WHERE kode_dosen = '%s'" %code
cur.execute(cond1)
hitung = cur.rowcount
res1 = cur.fetchall()
for row in res1:
nama_dosen = row[0]
if hitung == 1:
res1 = nama_dosen
elif hitung != 1:
print "Dosen tidak Terdaftar"
code = ''
else:
code = code + data