Error using utf-8 filenames in python script - python

I have a seemingly impossible conundrum and hope that you guys can help point me in the right direction. I have been coming to and leaving this project for weeks now and I think it is about time that I solve it, with your help hopefully.
I am making a script which is supposed to read a bunch of .xls excel files from a directory structure, parse their contents and load it into a mysql database. Now, in the main function, a list of (croatian) file names gets passed to the xlrd, and that is where the problem lies.
The environment is up to date FreeBSD 9.1.
I get the following error when executing the script:
mars:~/20130829> python megascript.py
Python version: 2.7.5
Filesstem encoding is: UTF-8
Removing error.log if it exists...
It doesn't.
Done!
Connecting to database...
Done!
MySQL database version: 5.6.13
Loading pilots...
Done!
Loading tehnicians...
Done!
Loading aircraft registrations...
Done!
Loading file list...
Done!
Processing files...
/2006/1_siječanj.xls
Traceback (most recent call last):
File "megascript.py", line 540, in <module>
main()
File "megascript.py", line 491, in main
data = readxlsfile(files, 'UPIS', piloti, tehnicari, helikopteri)
File "megascript.py", line 129, in readxlsfile
workbook = open_workbook(f)
File "/usr/local/lib/python2.7/site-packages/xlrd-0.9.2-py2.7.egg/xlrd/__init__.py", line 394, in open_workbook
f = open(filename, "rb")
IOError: [Errno 2] No such file or directory: u'/2006/1_sije\u010danj.xls'
I have included the complete output to make the code fow easier to follow.
I suppose that the problem is in xlrd not accepting utf-8 file list. I'm not sure how to get around that without messing around with xlrd code though. Any ideas?
Here goes the code:
#! /usr/bin/env/python
# -#*- coding: utf-8 -*-
import os, sys, getopt, codecs, csv, MySQLdb, platform
from mmap import mmap,ACCESS_READ
from xlrd import open_workbook, xldate_as_tuple
# Define constants
NALET_OUT = ''
PUTNICI_OUT = ''
DB_HOST = 'localhost'
DB_USER = 'user'
DB_PASS = 'pass'
DB_DATABASE = 'eth'
START_DIR = u'./'
ERROR_FILE = START_DIR + 'mega_error.log'
# Functions
def isNumber(s):
# Check if a string could be a number
try:
float(s)
return True
except ValueError:
return False
def getMonth(f):
# Izvuci mjesec iz imena datoteke u formatu "1_sijecanj.xls"
temp = os.path.basename(f)
temp = temp.split('_')
mjesec = int(temp[0])
return mjesec
def getYear(f):
# Izvuci godinu iz path
f = f.split('/')
godina = f[-2]
return godina
def databaseVersion(cur):
# Print Mysql database version
try:
cur.execute("SELECT VERSION()")
result = cur.fetchone()
except MySQLdb.Error, e:
try:
print "MySQL Error [%d]: %s]" % (e.args[0], e.args[1])
except IndexError:
print "MySQL Error: %s" % (e.args[0], e.args[1])
print "MySQL database version: %s" % result
def getQuery(cur, sql_query):
# Perform passed query on passed database
try:
cur.execute(sql_query)
result = cur.fetchall()
except MySQLdb.Error, e:
try:
print "MySQL Error [%d]: %s]" % (e.args[0], e.args[1])
except IndexError:
print "MySQL Error: %s" % (e.args[0], e.args[1])
return result
def getFiles():
files = []
# Find subdirectories
for i in [x[0] for x in os.walk(START_DIR)]:
if (i != '.' and isNumber(os.path.basename(i))):
# Find files in subdirectories
for j in [y[2] for y in os.walk(i)]:
# For every file in file list
for y in j:
fn, fe = os.path.splitext(y)
is_mj = fn.split("_")
if(fe == '.xls' and y.find('_') and isNumber(is_mj[0])):
mj = fn.split('_')
files.append(i.lstrip('.') + "/" + y)
# Sort list cronologically
files.sort(key=lambda x: getMonth(x))
files.sort(key=lambda x: getYear(x))
return files
def errhandle(f, datum, var, vrijednost, ispravka = "NULL"):
# Get error information, print it on screen and write to error.log
f = unicode(str(f), 'utf-8')
datum = unicode(str(datum), 'utf-8')
var = unicode(str(var), 'utf-8')
try:
vrijednost = unicode(str(vrijednost.decode('utf-8')), 'utf-8')
except UnicodeEncodeError:
vrijednost = vrijednost
ispravka = unicode(str(ispravka), 'utf-8')
err_f = codecs.open(ERROR_FILE, 'a+', 'utf-8')
line = f + ": " + datum + " " + var + "='" + vrijednost\
+ "' Ispravka='" + ispravka + "'"
#print "%s" % line
err_f.write(line)
err_f.close()
def readxlsfile(files, sheet, piloti, tehnicari, helikopteri):
# Read xls file and return a list of rows
data = []
nalet = []
putn = []
id_index = 0
# For every file in list
for f in files:
print "%s" % f
temp = f.split('/')
godina = str(temp[-2])
temp = os.path.basename(f).split('_')
mjesec = str(temp[0])
workbook = open_workbook(f)
sheet = workbook.sheet_by_name('UPIS')
# For every row that doesn't contain '' or 'POSADA' or 'dan' etc...
for ri in range(sheet.nrows):
if sheet.cell(ri,1).value!=''\
and sheet.cell(ri,2).value!='POSADA'\
and sheet.cell(ri,1).value!='dan'\
and (sheet.cell(ri,2).value!=''):
temp = sheet.cell(ri, 1).value
temp = temp.split('.')
dan = temp[0]
# Datum
datum = "'" + godina + "-" + mjesec + "-" + dan + "'"
# Kapetan
kapetan = ''
kapi=''
if sheet.cell(ri, 2).value == "":
kapetan = "NULL"
else:
kapetan = sheet.cell(ri, 2).value
if kapetan[-1:] == " ":
errhandle(f, datum, 'kapetan', kapetan, kapetan[-1:])
kapetan = kapetan[:-1]
if(kapetan):
try:
kapi = [x[0] for x in piloti if x[2].lower() == kapetan]
kapi = kapi[0]
except ValueError:
errhandle(f, datum, 'kapetan', kapetan, '')
kapetan = ''
except IndexError:
errhandle(f, datum, 'kapetan', kapetan, '')
kapi = 'NULL'
else:
kapi="NULL"
# Kopilot
kopilot = ''
kopi = ''
if sheet.cell(ri, 3).value == "":
kopi = "NULL"
else:
kopilot = sheet.cell(ri, 3).value
if kopilot[-1:] == " ":
errhandle(f, datum,'kopilot', kopilot,\
kopilot[:-1])
if(kopilot):
try:
kopi = [x[0] for x in piloti if x[2].lower() == kopilot]
kopi = kopi[0]
except ValueError:
errhandle(f, datum,'kopilot', kopilot, '')
except IndexError:
errhandle(f, datum, 'kopilot', kopilot, '')
kopi = 'NULL'
else:
kopi="NULL"
# Teh 1
teh1 = ''
t1i = ''
if sheet.cell(ri, 4).value=='':
t1i = 'NULL'
else:
teh1 = sheet.cell(ri, 4).value
if teh1[-1:] == " ":
errhandle(f, datum,'teh1', teh1, teh1[:-1])
teh1 = 'NULL'
if(teh1):
try:
t1i = [x[0] for x in tehnicari if x[2].lower() == teh1]
t1i = t1i[0]
except ValueError:
errhandle(f, datum,'teh1', teh1, '')
except IndexError:
errhandle(f, datum, 'teh1', teh1, '')
t1i = 'NULL'
else:
t1i="NULL"
# Teh 2
teh2=''
t2i=''
if sheet.cell(ri, 5).value=='':
t2i = "NULL"
else:
teh2 = sheet.cell(ri, 5).value
if teh2[-1:] == " ":
errhandle(f, datum,'teh2', teh2, teh2[-1:])
teh2 = ''
if(teh2):
try:
t2i = [x[0] for x in tehnicari if x[2].lower() == teh2]
t2i = t2i[0]
except ValueError:
errhandle(f, datum,'teh2', teh2, 'NULL')
t2i = 'NULL'
except IndexError:
errhandle(f, datum,'teh2', teh2, 'NULL')
t2i = 'NULL'
else:
t2i="NULL"
# Oznaka
oznaka = ''
heli = ''
if sheet.cell(ri, 6).value=="":
oznaka = errhandle(f, datum, "helikopter", oznaka, "")
else:
oznaka = str(int(sheet.cell(ri, 6).value))
try:
heli = [x[0] for x in helikopteri if x[0] == oznaka]
except ValueError:
errhandle(f, datum, 'helikopter', oznaka, '')
except IndexError:
errhandle(f, datum, 'helikopter', oznaka, '')
heli = ''
# Uvjeti
uvjeti = sheet.cell(ri, 9).value
# Letova
letova_dan = 0
letova_noc = 0
letova_ifr = 0
letova_sim = 0
if sheet.cell(ri, 7).value == "":
errhandle(f, datum, 'letova', letova, '')
else:
letova = str(int(sheet.cell(ri, 7).value))
if uvjeti=="vfr":
letova_dan = letova
elif uvjeti=="ifr":
letova_ifr = letova
elif uvjeti=="sim":
letova_sim = letova
else:
letova_noc = letova
#Block time
bt_dan = "'00:00:00'"
bt_noc = "'00:00:00'"
bt_ifr = "'00:00:00'"
bt_sim = "'00:00:00'"
try:
bt_tpl = xldate_as_tuple(sheet.cell(ri, 8).value, workbook.datemode)
bt_m = bt_tpl[4]
bt_h = bt_tpl[3]
bt = "'" + str(bt_h).zfill(2)+":"+str(bt_m)+":00'"
except ValueError or IndexError:
errhandle(f, datum, 'bt', sheet.cell(ri,8).value, '')
if uvjeti[:3]=="vfr":
bt_dan = bt
elif uvjeti[:3]=="ifr":
bt_ifr = bt
elif uvjeti[:3]=="sim":
bt_sim = bt
elif uvjeti[:2] == "no":
bt_noc = bt
else:
errhandle(f, datum, 'uvjeti', uvjeti, '')
# Vrsta leta
vrsta = "'" + sheet.cell(ri, 10).value + "'"
# Vjezba
vjezba = 'NULL';
try:
vjezba = sheet.cell(ri, 11).value
if vjezba == '':
# Too many results
#errhandle(f, datum, 'vjezba', vjezba, '')
vjezba = 'NULL'
if vjezba == "?":
errhandle(f, datum, 'vjezba', str(vjezba), '')
vjezba = 'NULL'
if str(vjezba) == 'i':
errhandle(f, datum, 'vjezba', str(vjezba), '')
vjezba = 'NULL'
if str(vjezba)[-1:] == 'i':
errhandle(f, datum, 'vjezba', str(vjezba),\
str(vjezba).rstrip('i'))
vjezba = str(vjezba).rstrip('i')
if str(vjezba).find(' i ') != -1:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(' i ')[0])
vjezba = str(vjezba).split(' i ')
vjezba = vjezba[0]
if str(vjezba)[-1:] == 'm':
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).rstrip('m'))
vjezba = str(vjezba).rstrip('m')
if str(vjezba).find(';') != -1:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(';')[0])
temp = str(vjezba).split(';')
vjezba = temp[0]
if str(vjezba).find('/') != -1:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split('/')[0])
temp = str(vjezba).split('/')
vjezba = temp[0]
if str(vjezba).find('-') != -1:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split('-')[0])
temp = str(vjezba).split('-')
vjezba = temp[0]
if str(vjezba).find(',') != -1:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(',')[0])
temp = str(vjezba).split(',')
vjezba = temp[0]
if str(vjezba).find('_') != -1:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split('_')[0])
temp = str(vjezba).split('_')
vjezba = temp[0]
if str(vjezba) == 'bo':
errhandle(f, datum, 'vjezba', str(vjezba), '')
vjezba = 'NULL'
if str(vjezba).find(' ') != -1:
if str(vjezba) == 'pp 300':
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(' ')[1])
temp = str(vjezba).split(' ')
vjezba = temp[1]
else:
errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(' ')[0])
temp = str(vjezba).split(' ')
vjezba = temp[0]
if str(vjezba) == 'pp':
errhandle(f, datum, 'vjezba', str(vjezba), '')
vjezba = ''
except UnicodeEncodeError:
errhandle(f, datum, 'Unicode error! vjezba', vjezba, '')
if vjezba != 'NULL':
vjezba = int(float(vjezba))
# Visinska slijetanja
# Putnici
vp1 = str(sheet.cell(ri, 12).value)
bp1 = str(sheet.cell(ri, 13).value)
vp2 = str(sheet.cell(ri, 14).value)
bp2 = str(sheet.cell(ri, 15).value)
# Teret
teret = ''
teret = str(sheet.cell(ri, 16).value)
if teret == '':
teret = 0
# Baja
baja = ''
if sheet.cell(ri, 17).value == '':
baja = 0
else:
baja = int(sheet.cell(ri, 17).value) / 2 # dodano /2 da se dobiju tone
# Redosljed csv
id_index = id_index + 1
row = [id_index, datum, kapi, kopi, t1i, t2i, oznaka,\
letova, letova_dan, letova_noc, letova_ifr,\
letova_sim, bt, bt_dan, bt_noc, bt_ifr,\
bt_sim, vrsta, vjezba, teret, baja]
row = [str(i) for i in row]
nalet.append(row)
putn = []
if bp1 != '':
put = [id_index, vp1, bp1]
putn.append(put)
if bp2 != '':
put = [id_index, vp2, bp2]
putn.append(put)
data.append(nalet)
data.append(putn)
return data
def main():
# Python version
print "\nPython version: %s \n" % platform.python_version()
# Print filesystem encoding
print "Filesstem encoding is: %s" % sys.getfilesystemencoding()
# Remove error file if exists
print "Removing error.log if it exists..."
try:
os.remove(ERROR_FILE)
print "It did."
except OSError:
print "It doesn't."
pass
print "Done!"
# Connect to database
print "Connecting to database..."
db = MySQLdb.connect(DB_HOST, DB_USER, DB_PASS, DB_DATABASE,\
use_unicode=True, charset='utf8')
cur=db.cursor()
print "Done!"
# Database version
databaseVersion(cur)
# Load pilots, tehnicians and helicopters from db
print "Loading pilots..."
sql_query = "SELECT eth_osobnici.id, eth_osobnici.ime,\
eth_osobnici.prezime FROM eth_osobnici RIGHT JOIN \
eth_letacka_osposobljenja ON eth_osobnici.id=\
eth_letacka_osposobljenja.id_osobnik WHERE \
eth_letacka_osposobljenja.vrsta_osposobljenja='kapetan' \
OR eth_letacka_osposobljenja.vrsta_osposobljenja='kopilot'"
#piloti = []
#piloti = getQuery(cur, sql_query)
piloti=[]
temp = []
temp = getQuery(cur, sql_query)
for row in temp:
piloti.append(row)
print "Done!"
print "Loading tehnicians..."
sql_query = "SELECT eth_osobnici.id, eth_osobnici.ime,\
eth_osobnici.prezime FROM eth_osobnici RIGHT JOIN \
eth_letacka_osposobljenja ON eth_osobnici.id=\
eth_letacka_osposobljenja.id_osobnik WHERE \
eth_letacka_osposobljenja.vrsta_osposobljenja='tehničar 1' \
OR eth_letacka_osposobljenja.vrsta_osposobljenja='tehničar 2'"
tehnicari=[]
temp = []
temp = getQuery(cur, sql_query)
for row in temp:
tehnicari.append(row)
print "Done!"
print "Loading aircraft registrations..."
sql_query = "SELECT id FROM eth_helikopteri"
helikopteri=[]
temp = []
temp = getQuery(cur, sql_query)
for row in temp:
helikopteri.append(row)
print "Done!"
# Get file names to process
print "Loading file list..."
files = getFiles()
print "Done!"
# Process all files from array
print "Processing files..."
data = readxlsfile(files, 'UPIS', piloti, tehnicari, helikopteri)
print "Done!"
# Enter new information in database
result = 0
print "Reseting database..."
sql_query = "DELETE FROM eth_nalet"
cur.execute(sql_query)
db.commit()
sql_query = "ALTER TABLE eth_nalet AUTO_INCREMENT=0"
cur.execute(sql_query)
db.commit()
print "Done!"
print "Loading data in 'eth_nalet'..."
for row in data[0]:
sql_query = """INSERT INTO eth_nalet (id, datum, kapetan,
kopilot, teh1, teh2, registracija, letova_uk, letova_dan,
letova_noc, letova_ifr, letova_sim, block_time, block_time_dan,
block_time_noc, block_time_ifr, block_time_sim, vrsta_leta,
vjezba, teret, baja) VALUES (%s)""" % (", ".join(row))
cur.execute(sql_query)
db.commit()
print "Done!"
print "Loading data in 'eth_putnici'..."
for row in data[1]:
sql_query = """INSERT INTO eth_putnici (id_leta,
vrsta_putnika, broj_putnika) VALUES (%s)""" % (", ".join(row))
cur.execute(sql_query)
db.commit()
print "Done!"
# Close the database connection
print "Closing database connection..."
if cur:
cur.close()
if db:
db.close()
print "Database closed!"
if __name__ == '__main__':
main()
I apologize for not translating comments in the code, it was an old project of mine and I tend to make comments in english now. If something needs explanation please fire away.
The funny thing is that if I print the file list to the screen, they display just fine. But when they get passed to the xlrd they don't seem to be in the right format.
Respectfully,
me

I finally managed to find an error! It wasn't due to encoding error after all. It was a logic error.
In function getFiles() I stripped the leading "." from file list, and didn't strip "./" as I ought to. So, naturally file names were "/2006/1_siječanj.xls" instead of "2006/1_siječanj.xls" as they should be. It was an IOError and not not UnicodeEncodeError. And result of my oversight was that the script tried to find an absolute path instead of a relative path.
Well this was embarrassing. Thank you guys, hope this post helps someone else pay more attention to the error types python throws at us.

It looks like xlrd isn't converting the Unicode type to a local encoded type before trying to open the file. Python has guessed that the filesystem name encoding is UTF-8 and has correctly converted the č to the correct Unicode point.
There's two ways to fix this:
Try encoding the Unicode filename before asking xlrd to open it with:
workbook = open_workbook(f.encode(sys.getfilesystemencoding() ) )
Use raw 8bit filenames and don't convert filenames to Unicode
START_DIR = './'
IMHO, option 2 is probably safer in-case filenames haven't been written with UTF-8 filenames.
UPD
Note, os.walk returns Unicode strings when the given path is a Unicode string. A normal string path will return binary strings. This is the same behaviour as os.listdir (http://docs.python.org/2/library/os.html#os.listdir).
Example:
$ ls
€.txt
$ python
>>> import os
>>> os.listdir(".")
['\xe2\x82\xac.txt']
>>> os.listdir(u".")
[u'\u20ac.txt']
(e282 = UTF-8 €)
Remember: In Unix, unlike Windows, filenames do not contain encoding hints. Filenames are simply 8bit strings. You need to know what encoding they were created with if you want to convert them to a different encoding.

Related

How to run an .sql file full of commands as a command line argument to a python program?

I have a .sql file full of commands that I want to run through my python program. When I enter each line into the terminal individually, my program works as expected, as it has methods of parsing through the individual line entry to do what it needs to do. However, when I run the program (pa_2.py) by typing it into the terminal as such:
python3 pa_2.py < PA2_test.sql
the program doesn't read each line correctly and malfunctions, my guess being that it is unable to parse through the sql file correctly. The expected output is:
-- Database CS457_PA2 created.
-- Using database CS457_PA2.
-- Table Product created.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- pid int|name varchar(20)|price float
-- 1|Gizmo|19.99
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- 4|MultiTouch|199.99
-- 5|SuperGizmo|49.99
-- 1 record modified.
-- 2 records modified.
-- pid int|name varchar(20)|price float
-- 1|Gizmo|14.99
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- 4|MultiTouch|199.99
-- 5|Gizmo|14.99
-- 2 records deleted.
-- 1 record deleted.
-- pid int|name varchar(20)|price float
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- name varchar(20)|price float
-- SingleTouch|149.99
When I type each command as an individual line after typing in the terminal:
python3 pa_2.py
I get the expected output. However, when I run the script in the command line as:
python3 pa_2.py < PA2_test.sql
the output I get is:
created.CS457_PA2
because it does not exist.7_PA2
Created table Product.
1 new record inserted.
1 new record inserted.
1 new record inserted.
1 new record inserted.
1 new record inserted.
because it does not exist.uct
0 records modified.
0 records modified.
because it does not exist.uct
0 records modified.
Traceback (most recent call last):
File "/Users/isaac_reilly/Desktop/College/UNR/Classes/Year 3 2022-2023/Semester 1 2022/CS 457 Database Managemant Systems/Project 3/pa_2.py", line 79, in <module>
tablefunctions.deleteData(user_input, currentdb)
File "/Users/isaac_reilly/Desktop/College/UNR/Classes/Year 3 2022-2023/Semester 1 2022/CS 457 Database Managemant Systems/Project 3/tablefunctions.py", line 114, in deleteData
if float(splitter[4]) > float(searchText):
ValueError: could not convert string to float: '19.99)'
I want it to know that the end of each line is a semicolon ";". How would I use PA2_test.sql as a command line argument and run each line as expected? Below is the .sql file, as well as the rest of my program.
PA2_test.sql:
CREATE DATABASE CS457_PA2;
USE CS457_PA2;
CREATE TABLE Product (pid int, name varchar(20), price float);
INSERT INTO Product values(1, 'Gizmo', 19.99);
INSERT INTO Product values(2, 'PowerGizmo', 29.99);
INSERT INTO Product values(3, 'SingleTouch', 149.99);
INSERT INTO Product values(4, 'MultiTouch', 199.99);
INSERT INTO Product values(5, 'SuperGizmo', 49.99);
SELECT * FROM Product;
UPDATE Product set name = 'Gizmo' where name = 'SuperGizmo';
UPDATE Product set price = 14.99 where name = 'Gizmo';
SELECT * FROM Product;
DELETE FROM Product where name = 'Gizmo';
DELETE FROM Product where price > 150;
SELECT * FROM Product;
SELECT name, price FROM Product where pid != 2;
.EXIT
pa_2.py:
import fileinput
import sys
import dbfunctions
import tablefunctions
import selections
currentdb = None
user_input = None
TableList = [None]
#Loop continously prompts the terminal for an input from the user and then decides what to do based on input.
while (user_input != ".EXIT"):
user_input = input()
#print(user_input)
#States that all commands must end with a ';' if user types invalid command
if ";" not in user_input and user_input != ".EXIT":
print("Invalid command, all commands must end with ';'")
#Creates a database
if "CREATE DATABASE" in user_input:
dbName = dbfunctions.inputCleaner("CREATE DATABASE ", user_input)
dbfunctions.create_db(dbName)
#Deletes a database
if "DROP DATABASE" in user_input:
dbName = dbfunctions.inputCleaner("DROP DATABASE ", user_input)
dbfunctions.remove_db(dbName)
#Creates a table using attributes inputted by user
if "CREATE TABLE" in user_input:
tInput = dbfunctions.inputCleaner("CREATE TABLE ", user_input)
tableName = tInput.split()[0]
tablefunctions.createTable(tInput, tableName, currentdb)
#Deletes a table
if "DROP TABLE" in user_input:
tableName = dbfunctions.inputCleaner("DROP TABLE ", user_input)
tablefunctions.dropTable(tableName, currentdb)
#Modifies a table using attributes inputted by the user
if "ALTER TABLE" in user_input:
rawInput = dbfunctions.inputCleaner("ALTER TABLE ", user_input)
tablefunctions.alterTable(rawInput, currentdb)
#Sets current working database
if "USE" in user_input:
dbName = dbfunctions.inputCleaner("USE ", user_input)
#print(dbName)
currentdb = dbName
dbfunctions.finddb(currentdb)
#print("Using database " + currentdb)
#elif dbfunctions.finddb(currentdb) == 0:
#print("Unable to use database " + dbName + " because it does not exist.")
#Selects data from a user specified table and prints contents to terminal
if "SELECT" in user_input:
selections.selectSpecified(user_input, currentdb)
#Inserts given data into a specified table
if "INSERT INTO" in user_input:
dataInput = dbfunctions.inputCleaner("INSERT INTO ", user_input)
tableName = dataInput.split()[0]
tablefunctions.insertData(dataInput, tableName, currentdb)
#Changes data in table as specified
if "UPDATE" in user_input:
tablefunctions.updateData(user_input, currentdb)
#Deletes data from table as specified
if "DELETE FROM" in user_input:
tablefunctions.deleteData(user_input, currentdb)
dbfunctions.py:
import os
import subprocess
import shlex
import shutil
#Removes semicolon and given phrase from input
def inputCleaner(removePhrase, input):
cleaned = input.replace(";", "")
return cleaned.replace(removePhrase, "")
#Function used to create specified database (local directory)
def create_db(dbName):
try:
#Tries making directory
os.makedirs(dbName)
print("Database " + dbName + " created.")
except FileExistsError:
#Checks to see if directory already exists, throws exception if it does
print("!Failed to create database " + dbName + " because it already exists.")
#Function used to remove specified database (local directory)
def remove_db(dbName):
#Checks to see if specified directory exists and deletes if it does
if os.path.exists(dbName):
shutil.rmtree(dbName)
print("Database " + dbName + " deleted.")
#If selected directory does not exists, prints an error message to the screen
else:
print("!Failed to delete " + dbName + " because it does not exist.")
#Checks to make sure that specified database exists
def finddb(dbName):
if dbName in subprocess.run(['ls', '|', 'grep', dbName], capture_output = True, text = True).stdout:
print("Using database " + dbName)
else:
print("Unable to use database", dbName,"because it does not exist.")
def getOperand(op):
operand = None
if (op == '='):
operand = 0
elif (op == '!='):
operand = -3
elif (op == '<'):
operand = -1
elif (op == '>'):
operand = 1
return operand
tablefunctions.py:
import subprocess
import os
#Checks to make sure that specified table exists
def findtable(tableName, currentdb):
if tableName in subprocess.run(['ls', currentdb, '|', 'grep', tableName], capture_output = True, text = True).stdout:
return 1
else:
return 0
#Creates table with specified headers
def createTable(dataInput, tableName, currentdb):
unformattedAttributes = dataInput.replace(tableName, "")
tableAttributes1 = unformattedAttributes[2:]
tableAttributes2 = tableAttributes1[:-1]
formattedAttributes = tableAttributes2.split(",")
if (currentdb != None):
if findtable(tableName, currentdb) == 0:
os.system(f'touch {currentdb}/{tableName}.txt')
filename = currentdb + '/' + tableName + '.txt'
fedit = open(filename, 'w')
fedit.write(" |".join(formattedAttributes))
fedit.close()
print(f"Created table {tableName}.")
else:
print("!Failed to create table " + tableName + " because it already exists.")
else:
print("Please specify which database to use.")
#Deletes specified table
def dropTable(tableName, currentdb):
if (currentdb != None):
if findtable(tableName, currentdb) != 0:
os.system(f'rm {currentdb}/{tableName}.txt')
print("Table " + tableName + " deleted.")
else:
print("!Failed to delete " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Inserts data into specified table
def insertData(dataInput, tableName, currentdb):
unformattedInput = dataInput.replace(tableName, "")
cleanedInput1 = unformattedInput.replace("'", "")
cleanedInput2 = cleanedInput1.replace(" ", "")
unformattedAttributes = cleanedInput2[7:-1]
formattedAttributes = unformattedAttributes.split(",")
if (currentdb != None):
if findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'a')
fedit.write("\n" + " | ".join(formattedAttributes))
fedit.close()
print("1 new record inserted.")
else:
print("!Failed to insert data into " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Modifies a table using attributes inputted by the user
def alterTable(rawInput, currentdb):
tableName = rawInput.split()[0]
alterCmd = rawInput.split()[1]
alterAttribute1 = rawInput.replace(tableName, "")
alterAttribute2 = alterAttribute1.replace(alterCmd, "")
newAttr = alterAttribute2[2:]
if (currentdb != None):
if findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'a')
fedit.write(f" | {newAttr}")
fedit.close()
print("Table " + tableName + " modified.")
else:
print("!Failed to modify " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Removes data from specified table
def deleteData(user_input, currentdb):
if (currentdb != None):
cleanedInput1 = user_input[12:-1]
cleanedInput2 = cleanedInput1.replace("'", "")#Cleans input
tableName = cleanedInput2.split()[0]
if findtable(tableName, currentdb) != 0:
replaceText = ""
searchText = cleanedInput2.split()[4]
searchCategory = cleanedInput2.split()[2]
with open(f'{currentdb}/{tableName}.txt', 'r') as file:
count = 0
replacement = ""
if cleanedInput2.split()[3] == "=":
#Loops line by line for keywords
for line in file:
line = line.strip()
splitter = line.split()#Puts line into list elements
if searchText == splitter[2]: #If elements matches search text
updatedLine = "" #delete it
count += 1 #Keeps track of number of edits
else:
updatedLine = line + "\n" #Reads line unchanged if specified data is not present
replacement = replacement + updatedLine
if cleanedInput2.split()[3] == ">":
lineCount = 0
for line in file:
line = line.strip()
splitter = line.split()
if lineCount == 0:
lineCount += 1
updatedLine = line + "\n"
else:
if float(splitter[4]) > float(searchText):
updatedLine = ""
count += 1
else:
updatedLine = line + "\n"
replacement = replacement + updatedLine
if cleanedInput2.split()[3] == "<":
lineCount = 0
for line in file:
line = line.strip()
splitter = line.split()
if lineCount == 0:
lineCount += 1
updatedLine = line + "\n"
else:
if float(splitter[4]) < float(searchText):
updatedLine = ""
count += 1
else:
updatedLine = line + "\n"
replacement = replacement + updatedLine
file.close()
with open(f'{currentdb}/{tableName}.txt', 'w') as file:
file.write(replacement)
file.close()
if count == 1:
print(str(count) + " record modified.")
else:
print(str(count) + " records modified.")
else:
print("!Failed to update " + tableName + " table because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
def updateData(user_input,currentdb):
if (currentdb != None):
cleanedInput1 = user_input[7:-1]
cleanedInput2 = cleanedInput1.replace("'", "")
tableName = cleanedInput2.split()[0]
if findtable(tableName, currentdb) != 0:
replaceText = cleanedInput2.split()[4]
searchText = cleanedInput2.split()[8]
replaceCategory = cleanedInput2.split()[2]
searchCategory = cleanedInput2.split()[6]
with open(f'{currentdb}/{tableName}.txt', 'r') as file:
count = 0
replacement = ""
if (replaceCategory == searchCategory):#if both columns being referenced are the same
for line in file:
line = line.strip()
if searchText in line:
updatedLine = line.replace(searchText, replaceText)
count += 1
else:
updatedLine = line
replacement = replacement + updatedLine + "\n"
else:
for line in file:
splitter = line.split()
splitter[4] = replaceText
line = line.strip()
if searchText == splitter[2]:
updatedLine = " ".join(splitter)
count += 1
else:
updatedLine = line
replacement = replacement + updatedLine + "\n"
file.close()
with open(f'{currentdb}/{tableName}.txt', 'w') as file:
file.write(replacement)
file.close()
if count == 1:
print(str(count) + " record modified.")
else:
print(str(count) + " records modified.")
else:
print("!Failed to update " + tableName + " table because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
selections.py:
import tablefunctions
import dbfunctions
def selectAll(tableName, currentdb):
if currentdb == None:
print("No specified database, enter 'USE <database_name>;'")
else:
if tablefunctions.findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'r')
print(fedit.read())
fedit.close()
else:
print("!Failed to query table " + tableName + " because it does not exist.")
def selectSpecified(user_input, currentdb):
if "SELECT * FROM" in user_input:
tableName = dbfunctions.inputCleaner("SELECT * FROM ", user_input)
selectAll(tableName, currentdb)
else:
if "SELECT" in user_input:
selLower = user_input[7:-1]
selection = user_input[7:-1]
elif "select" in user_input:
selection = user_input[7:-1]
# Gathering list of variables
selectColumns = selection.replace(",", "").split()
selectColumns = selectColumns[:selectColumns.index("FROM")]
# Table name
tableName = selection.split()[len(selectColumns)+1]
# Gathering what to filter by
whereColumn = selection.split()[len(selectColumns)+3]
whereRecord = selection.split()[len(selectColumns)+5]
operand = dbfunctions.getOperand(selection.split()[len(selectColumns)+4])
if currentdb != None:
if tablefunctions.findtable(tableName, currentdb):
f = open(f'{currentdb}/{tableName}.txt', 'r')
file = f.readlines()
f.close()
selectColumnNums = []
columnNameString = ""
listToReturn = []
count = 0
for line in file:
if (count == 0): # Headers
# Finding the indexes of select and where columns
columnList = line.split()
columnListWithTypes = columnList.copy()
del columnListWithTypes[2::3]
del columnList[1::3]
columnCount = 0
# If variable is found in table, record its index
for word in columnList:
if word in selectColumns:
selectColumnNums.append(columnCount)
if (word == whereColumn):
whereColumnNum = columnCount
columnCount += 1
# Creating a custom table header for the selected columns
for index in selectColumnNums:
columnNameString += f"{columnListWithTypes[index]} {columnListWithTypes[index+1]} | "
queryHeader = columnNameString[:-3]
listToReturn.append(queryHeader)
if (count > 0): # Values
tupleDetails = line.split()
# Determines what to do with each row
def querySpecificHelper():
# Creates the row output
def queryStringMaker():
queryString = ""
for index in selectColumnNums:
queryString += f"{tupleDetails[index]} | "
queryResult = queryString[:-3]
listToReturn.append(queryResult)
if (operand == 0): # Equality
# The type checking here handles strings and numbers separately
# Ex. 150 or 150.00 would not find 150.00 or 150, respectively
if (type(tupleDetails[whereColumnNum]) is str):
if (tupleDetails[whereColumnNum] == whereRecord):
queryStringMaker()
elif (type(tupleDetails[whereColumnNum]) is not str):
if (float(tupleDetails[whereColumnNum]) == float(whereRecord)):
queryStringMaker()
elif (operand == 1): # Greater than
if (float(tupleDetails[whereColumnNum]) > float(whereRecord)):
queryStringMaker()
elif (operand == -1): # Less than
if (float(tupleDetails[whereColumnNum]) < float(whereRecord)):
queryStringMaker()
elif (operand == -3): # Inequality
if (type(tupleDetails[whereColumnNum]) is str):
if (tupleDetails[whereColumnNum] != whereRecord):
queryStringMaker()
elif (type(tupleDetails[whereColumnNum]) is not str):
if (float(tupleDetails[whereColumnNum]) != float(whereRecord)):
queryStringMaker()
querySpecificHelper()
count += 1
for line in listToReturn: # Prints table
print(line)
else:
print(f"Could not query table {tableName} because it does not exist.")
else:
print("Please specify which database to use.")

FileNotFoundError: [Errno 2] No such file or directory: 'o'

I'm getting this error message when using yield.
When I remove the yield results and yield timeout the code works fine without the error message
I don't know what is directory or file 'o' since I'm not using it in any way in the code.
here is my full code:
import gradio as gr
import ipaddress
import requests
from requests.auth import HTTPBasicAuth
import os
import string
from datetime import date, datetime
####SETTING UP DATE AND TIME WITH ISRAELI FORMAT###
current_date = date.today()
current_month = current_date.strftime('%B')
current_year = current_date.strftime('%Y')
date_reformat = current_date.strftime('%d/%m/%y')
current_day = current_date.strftime('%d')
###SWITCH###
def switch_ver(ip):
with open('switches_successful_results.txt','w') as switches_successful, open('switches_failed_results.txt', 'w') as switches_failed:
ip_addr = ip.split()
for i in ip_addr:
ip_addr = list(ipaddress.ip_network(i))
try:
basic=HTTPBasicAuth('some','password')
login = requests.post('http://'+i+':80/rest/v7/login-sessions', auth=basic)
cookie = login.cookies
get_ver = requests.get('http://'+i+':80/rest/v7/system/status', cookies=cookie)
get_ver = get_ver.json()
get_ver = get_ver['firmware_version']
get_ver = get_ver
with open('switches_successful_results.txt', 'a+') as sw:
results = 'Switch version for {} is: {} \n'.format(i, get_ver)
sw.write(results)
yield results
except requests.exceptions.ConnectTimeout:
timeout = 'Could not connect to switch: '+i+' REQUEST TIMED OUT\n'
with open('switches_failed_results.txt', 'a+') as sw:
sw.write(timeout)
yield timeout
with open('switches_successful_results.txt','r') as switches_successful, open('switches_failed_results.txt', 'r') as switches_failed:
summary = switches_failed.read() + switches_successful.read()
return (summary),['switches_successful_results.txt', 'switches_failed_results.txt']
###IPBlockerK###
def block_ip(ip):
duplicate_ips = []
blocked_ips = []
invalid_ips = []
with open('fortigate_ips.txt','r+') as f, open('fortigate_urls.txt', 'r+') as u:
fortigate_ips = f.read()
fortigate_urls = u.read()
ip_addr = ip.split()
for i in ip_addr:
try:
list(ipaddress.ip_network(i))
if i in fortigate_ips:
duplicate_ips.append(i)
elif ipaddress.ip_address(i).is_private:
invalid_ips.append(i)
else:
blocked_ips.append(i)
f.write(i + '\n')
except ValueError:
if i in fortigate_ips or i in fortigate_urls:
duplicate_ips.append(i)
elif i[0] in string.ascii_letters or i[0] == '*':
blocked_ips.append(i)
u.write(i + '\n')
else:
invalid_ips.append(i)
current_time = datetime.now()
current_time = current_time.strftime('%H:%M:%S')
if os.path.exists(current_year) == False:
os.makedirs(current_year + '\\'+ current_month + '\\' + current_day)
os.chdir(current_year+ '\\' + current_month +'\\'+ current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
elif os.path.exists(current_year) == True and os.path.exists(current_year + '\\'+ current_month) == False:
os.chdir(current_year)
os.makedirs(current_month + '\\' + current_day)
os.chdir(current_month +'\\'+ current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
elif os.path.exists(current_year) == True and os.path.exists(current_year + '\\'+ current_month) == True and os.path.exists(current_year + '\\'+ current_month + '\\' + current_day) == False:
os.chdir(current_year + '\\'+ current_month)
os.mkdir(current_day)
os.chdir(current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
else:
os.chdir(current_year + '\\' + current_month + '\\' + current_day)
with open('Blocked_IPs.txt', 'a+') as Blocked_IPs:
to_file = ('###############{}###############\n'.format(current_time)+'\n'.join(blocked_ips))+'\n'
Blocked_IPs.write(to_file)
os.chdir('D:\\programs\\Python310\\Projects\\net_sec')
blocked_ips_result = 'Following IP\s or URLs were Blocked!: \n'+'\n'.join(blocked_ips) +'\n'
duplicate_ips_result = 'Skipped!...Found duplicates IP\s for: \n'+'\n'.join(duplicate_ips) +'\n'
invalid_ips_result = 'Skipped!..Invalid IP\s for \n'+'\n'.join(invalid_ips) +'\n'
with open('fortigate_ips.txt', 'r') as f, open('fortigate_urls.txt', 'r') as u:
current_commit_stats = len(blocked_ips)
ips_stats = len(f.readlines())
urls_stats = len(u.readlines())
total_stats = ips_stats + urls_stats
if bool(duplicate_ips) == True and bool(blocked_ips) == False:
print(1)
return duplicate_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
elif bool(duplicate_ips) == True and bool(blocked_ips) == True and bool(invalid_ips) == True:
print(2)
return invalid_ips_result + duplicate_ips_result + blocked_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
elif bool(invalid_ips) == True and bool(blocked_ips) == True:
print(3)
return invalid_ips_result + blocked_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
elif bool(invalid_ips) == True and bool(blocked_ips) == True:
print(4)
return invalid_ips_result + blocked_ips_result, current_commit_stats, ips_stats, urls_stats, total_stats
else:
print(5)
return (blocked_ips_result), current_commit_stats, ips_stats, urls_stats, total_stats
###GRADIO GUI###
#f = open('fortigate_ips.txt', 'r')
#fortigate = (f.read().split())
#f.close()
with gr.Blocks(title = 'Switcher') as switches_ver:
gr.Markdown('Welcome to IPBlocker')
with gr.Tab(label = 'IPBlocker'):
with gr.Row():
with gr.Column():
ips_to_block = gr.Textbox(label = "IPs", lines = 10, placeholder=('Please fill Ips to block'))
block_btn = gr.Button('Block')
#ip_lookup = gr.Dropdown(fortigate)
with gr.Column():
output_textbox = gr.Textbox(label = "Results", lines=10)
with gr.Row():
current_commit_stats = gr.Textbox(label = 'Current IP\s or URLs added to block:')
forti_ips_stats = gr.Textbox(label = 'Total blocked IP\s on Fortigate: ')
forti_urls_stats = gr.Textbox(label = 'Total URLs blocked on Fortigate')
forti_total_stats = gr.Textbox(label = 'Total blocked IP\s and URLs on Fortigate')
block_btn.click(fn=block_ip, inputs = ips_to_block, outputs = [output_textbox, current_commit_stats, forti_ips_stats, forti_urls_stats, forti_total_stats])
with gr.Tab(label = 'Switcher'):
with gr.Row():
with gr.Column():
switch_box = gr.Textbox(label = 'Switches', lines = 10, placeholder='Please fill switches IPs...')
show_ver = gr.Button('Show current switches version')
upgrade_ver = gr.Button('Upgrade selected switches')
with gr.Column():
output_textbox = gr.Textbox(label='Results',lines = 10)
output_file = gr.File(['switches_successful_results.txt', 'switches_failed_results.txt'])
show_ver.click(fn=switch_ver, inputs = switch_box, outputs = [output_textbox, output_file])
upgrade_ver.click(fn=block_ip, inputs = ips_to_block, outputs=[output_textbox, output_file])
switches_ver.queue(concurrency_count=20, max_size=20).launch()
full error traceback:
Traceback (most recent call last):
File "D:\programs\Python310\lib\site-packages\gradio\routes.py", line 273, in run_predict
output = await app.blocks.process_api(
File "D:\programs\Python310\lib\site-packages\gradio\blocks.py", line 757, in process_api
predictions = self.postprocess_data(fn_index, result["prediction"], state)
File "D:\programs\Python310\lib\site-packages\gradio\blocks.py", line 721, in postprocess_data
block.postprocess(prediction_value)
File "D:\programs\Python310\lib\site-packages\gradio\components.py", line 2147, in postprocess
"name": processing_utils.create_tmp_copy_of_file(
File "D:\programs\Python310\lib\site-packages\gradio\processing_utils.py", line 323, in create_tmp_copy_of_file
shutil.copy2(file_path, file_obj.name)
File "D:\programs\Python310\lib\shutil.py", line 434, in copy2
copyfile(src, dst, follow_symlinks=follow_symlinks)
File "D:\programs\Python310\lib\shutil.py", line 254, in copyfile
with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: 'o'
The 'o' came from the timeout text "Could not connect..."
From what I understand about gradio, the result, for both yield and return seems to be processed to outputs, which is output_textbox and output_file
As the yield result is timeout (similar goes for results yield case):
output_textbox = timeout[0] = 'C'
output_file = timeout[1] = 'o'
If you want to remove the errors, you should change the yield result to be compatible to the outputs.
For example:
yield timeout, ['switches_successful_results.txt', 'switches_failed_results.txt']
If you are using yield you can iterate only once. It doesn't keep data on memory for all time. Check this out: https://stackoverflow.com/a/231855/17318894

Python SQLite3 - cursor.execute - no error

This is a piece of code which needs to perform the follow functionality:
Dump all table names in a database
From each table search for a column with either Latitude or Longitude in
Store these co-ords as a json file
The code was tested and working on a single database. However once it was put into another piece of code which calls it with different databases it now is not entering line 49. However there is no error either so I am struggling to see what the issue is as I have not changed anything.
Code snippet line 48 is the bottom line -
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
I am running this in the /tmp/ dir due to an earlier error with sqlite not working outside the temp.
Any questions please ask them.
Thanks!!
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3
import os
import sys
filename = sys.argv[1]
def validateFile(filename):
filename, fileExt = os.path.splitext(filename)
print ("[Jconsole] Python: Filename being tested - " + filename)
if fileExt == '.db':
databases(filename)
elif fileExt == '.json':
jsons(fileExt)
elif fileExt == '':
blank()
else:
print ('Unsupported format')
print (fileExt)
def validate(number):
try:
number = float(number)
if -90 <= number <= 180:
return True
else:
return False
except ValueError:
pass
def databases(filename):
dbName = sys.argv[2]
print (dbName)
idCounter = 0
mainList = []
lat = 0
lon = 0
with sqlite3.connect(filename) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
print ("YAY1")
table = tablerow[0]
cursor.execute('SELECT * FROM {t}'.format(t=table))
for row in cursor:
print(row)
print ("YAY")
tempList = []
for field in row.keys():
tempList.append(str(field))
tempList.append(str(row[field]))
for i in tempList:
if i in ('latitude', 'Latitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
idCounter += 1
tempList.append(idCounter)
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lat = next_item
if i in ('longitude', 'Longitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lon = next_item
result = '{ "id": ' + str(idCounter) \
+ ', "content": "' + dbName + '", "title": "' \
+ str(lat) + '", "className": "' + str(lon) \
+ '", "type": "box"},'
mainList.append(result)
file = open('appData.json', 'a')
for item in mainList:
file.write('%s\n' % item)
file.close()
# {
# ...."id": 1,
# ...."content": "<a class='thumbnail' href='./img/thumbs/thumb_IMG_20161102_151122.jpg'>IMG_20161102_151122.jpg</><span><img src='./img/thumbs/thumb_IMG_20161102_151122.jpg' border='0' /></span></a>",
# ...."title": "50.7700721944444",
# ...."className": "-0.8727045",
# ...."start": "2016-11-02 15:11:22",
# ...."type": "box"
# },
def jsons(filename):
print ('JSON')
def blank():
print ('blank')
validateFile(filename)
Fixed.
The issue was up here
filename, fileExt = os.path.splitext(filename)
The filename variable was being overwritten without the file extension so when SQLite searched it didn't find the file.
Strange no error appeared but it is fixed now by changing the filename var to filename1.

How to retrieve column values by column name in python whit cx_Oracle

I'm programming a script that connects to an Oracle database and get the results into a log file. I want to get a output like this:
FEC_INCLUSION = 2005-08-31 11:43:48,DEBITO_PENDIENTE = None,CAN_CUOTAS = 1.75e-05,COD_CUENTA = 67084,INT_TOTAL = None,CAN_CUOTAS_ANTERIOR = None,COD_INVERSION = 1,FEC_MODIFICACION = 10/04/2012 09:45:22,SAL_TOT_ANTERIOR = None,CUOTA_COMISION = None,FEC_ULT_CALCULO = None,MODIFICADO_POR = CTAPELA,SAL_TOTAL = 0.15,COD_TIPSALDO = 1,MONTO_COMISION = None,COD_EMPRESA = 1,SAL_INFORMATIVO = None,COD_OBJETIVO = 5,SAL_RESERVA = None,INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM = None,CREDITO_PENDIENTE = None,SAL_PROM = 0.0,
FEC_INCLUSION = 2005-08-31 11:43:49,DEBITO_PENDIENTE = None,CAN_CUOTAS = 0.0,COD_CUENTA = 67086,INT_TOTAL = None,CAN_CUOTAS_ANTERIOR = None,COD_INVERSION = 9,FEC_MODIFICACION = 25/02/2011 04:38:52,SAL_TOT_ANTERIOR = None,CUOTA_COMISION = None,FEC_ULT_CALCULO = None,MODIFICADO_POR = OPEJAMO,SAL_TOTAL = 0.0,COD_TIPSALDO = 1,MONTO_COMISION = None,COD_EMPRESA = 1,SAL_INFORMATIVO = None,COD_OBJETIVO = 5,SAL_RESERVA = None,INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM = None,CREDITO_PENDIENTE = None,SAL_PROM = 0.0,
I created a dictionary with the query results:
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
Then I created this function which finally save the results into my log:
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
This is the main function:
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
data = cursor.fetchall()
WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
But I realized that it works if I use a query that fetch few records, however if I try to fetch many records my script never ends.
This is my output when I executed a query with 5000 records. As you can see it takes too long.
jballesteros#SplunkPorvenir FO_TIPSALDOS_X_CUENTA]$ python db_execution.py
Starting connection: 5636
GetLastCheckpoint function took 0.073 ms
GetLastCheckpoint function took 0.025 ms
ReadQuery function took 0.084 ms
File does not exist, writing new log file
DictFactory function took 23.050 ms
ReadCheckpointName function took 0.079 ms
WriteCheckpoint function took 0.204 ms
WriteLog function took 45112.133 ms
GetInfo function took 46193.033 ms
I'm pretty sure you know a much better way to do what I am trying to do.
This is the complete code:
#!/usr/bin/env python
# encoding: utf-8
import re
import sys
try:
import cx_Oracle
except:
print "Error: Oracle module required to run this plugin."
sys.exit(0)
import datetime
import re
import commands
import os
from optparse import OptionParser
import csv
import time
#################################
#### Database Variables ####
#################################
Config = {
"host" : "",
"user" : "",
"password" : "",
"instance" : "",
"port" : "",
}
Query = {
"sql" : "",
"checkpoint_datetype" : "",
"checkpoint_name" : "",
}
dir = '/home/jballesteros/PENS2000/FO_TIPSALDOS_X_CUENTA/'
connection_dir = '/home/jballesteros/PENS2000/Connection'
checkpoint_file = dir + 'checkpoint.conf'
log_file = '/var/log/Pens2000/FO_TIPSALDOS_X_CUENTA.csv'
internal_log = '/var/log/Pens2000/internal.log'
query = dir + 'query'
sys.path.append(os.path.abspath(connection_dir))
from db_connect_pool import *
def Timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print "%s function took %0.3f ms" % (f.func_name,(time2- time1)*1000.0)
return ret
return wrap
#Timing
def InternalLogWriter(message):
now = datetime.datetime.now()
log = open(internal_log, 'a')
log.write("%s ==> %s" % (now.strftime("%Y-%m-%d %H:%M:%S"),message))
log.close()
return
#Timing
def GetLastCheckpoint():
global cur_checkpoint
conf = open(checkpoint_file, 'r')
cur_checkpoint = conf.readline()
cur_checkpoint = cur_checkpoint.rstrip('\n')
cur_checkpoint = cur_checkpoint.rstrip('\r')
conf.close()
#Timing
def ReadQuery():
global cur_checkpoint
GetLastCheckpoint()
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
sql = Query["sql"]
checkpoint_datetype = Query["checkpoint_datetype"]
checkpoint_name = Query["checkpoint_name"]
if (checkpoint_datetype == "DATETIME"):
sql = sql + " AND " + checkpoint_name + " >= " + "TO_DATE('%s','YYYY-MM-DD HH24:MI:SS') ORDER BY %s" % (cur_checkpoint,checkpoint_name)
if (checkpoint_datetype == "NUMBER"):
sql = sql + " AND " + checkpoint_name + " > " + "%s ORDER BY %s" % (cur_checkpoint,checkpoint_name)
qr.close()
return str(sql)
#Timing
def ReadCheckpointName():
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
checkpoint_name = Query["checkpoint_name"]
return str(checkpoint_name)
#Timing
def LocateCheckPoint(description):
description
checkpoint_name = ReadCheckpointName()
#print checkpoint_name
#print description
startcounter = 0
finalcounter = 0
flag = 0
for d in description:
prog = re.compile(checkpoint_name)
result = prog.match(d[0])
startcounter = startcounter + 1
if result:
finalcounter = startcounter - 1
counterstr = str(finalcounter)
print "Checkpoint found in the array position number: " + counterstr
flag = 1
if (flag == 0):
print "Checkpoint did not found"
return finalcounter
#Timing
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
#Timing
def WriteCsvLog(log_file,header,data):
checkpoint_index = LocateCheckPoint(header)
file_exists = os.path.isfile(log_file)
with open(log_file,'ab') as csv_file:
headers = [i[0] for i in header]
csv_writer = csv.writer(csv_file,delimiter='|')
if not file_exists:
print "File does not exist, writing new CSV file"
csv_writer.writerow(headers) # Writing headers once
for d in data:
csv_writer.writerow(d)
cur_checkpoint = d[checkpoint_index]
cur_checkpoint = str(cur_checkpoint)
WriteCheckpoint(cur_checkpoint,checkpoint_file)
csv_file.close()
#Timing
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
#prin #string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
#Timing
def WriteCheckpoint(cur_checkpoint,conf_file):
conf = open(conf_file,'w')
conf.write(cur_checkpoint)
conf.close()
#Timing
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
#data = cursor.fetchall()
#WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
def __main__():
parser = OptionParser()
parser.add_option("-c","--change- password",dest="pass_to_change",help="Change the password for database connection",metavar="1")
(options, args) = parser.parse_args()
if (options.pass_to_change):
UpdatePassword()
else:
GetInfo()
__main__()
This is a query sample:
SELECT COD_EMPRESA, COD_TIPSALDO, COD_INVERSION, COD_CUENTA, COD_OBJETIVO, CAN_CUOTAS, SAL_TOTAL, INT_TOTAL, SAL_RESERVA, APORTE_PROM, SAL_PROM, COSTO_PROM, SAL_TOT_ANTERIOR, FEC_ULT_CALCULO, INCLUIDO_POR, FEC_INCLUSION, MODIFICADO_POR, TO_CHAR(FEC_MODIFICACION,'DD/MM/YYYY HH24:MI:SS') AS FEC_MODIFICACION, CUOTA_COMISION, MONTO_COMISION, SAL_INFORMATIVO, CREDITO_PENDIENTE, DEBITO_PENDIENTE, CAN_CUOTAS_ANTERIOR FROM FO.FO_TIPSALDOS_X_CUENTA WHERE ROWNUM <=100000 AND FEC_INCLUSION >= TO_DATE('2005-08-31 11:43:49','YYYY-MM-DD HH24:MI:SS') ORDER BY FEC_INCLUSION
PS: I've really been searching in google and this forum about my question but I haven't found anything similar.

Python xlrd- Unicode error

I'm reading my data from the excel file and then writing it into the DB in Django. I'm using python xlrd module
I'm getting the following error:-
'ascii' codec can't encode character u'\xc1' in position 6: ordinal not in range(128)
I've tried all the solutions like
1) I was using str(variable) . Removed it. Now storing the value as it is in the DB.
2) Tried wb = open_workbook('static/'+filename, encoding_override="utf_16_le")
3) .encode(error=replace)
But nothing worked. How am I supposed to write-off this error?
Here is my code
def __init__(self, arm_id, dsp_name, DSP, hubcode, Pincode, pptl,state):
self.arm_id = arm_id
self.dsp_name = dsp_name
self.DSP = DSP.zfill(2)
self.hubcode = hubcode
self.Pincode = Pincode
self.pptl = pptl
self.state = state
wb = open_workbook('static/'+filename, encoding_override="utf_16_le")
for sheet in wb.sheets():
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
items = []
arm_list = []
pptl_list = []
pptlcode_list = []
count = 1
status = 0
for row in range(1, number_of_rows):
values = []
for col in range(number_of_columns):
value = (sheet.cell(row,col).value)
try: value = str(int(value))
except ValueError: pass
finally: values.append(value)
item = Excel(*values)
count +=1
arm_id = item.arm_id
if arm_id not in arm_list:
description = 'Arm'+arm_id
arm_obj = Arm(arm_id = arm_id, description = description)
arm_obj.save()
arm_list.append(arm_id)
pptl_id = (item.pptl)
if pptl_id not in pptl_list:
try :
pptl_obj = PPTLconfig.objects.get(pptl_id = pptl_id)
pptl_obj.arm_id = arm_obj
pptl_obj.hubcode = hubcode
except :
description = 'PPTL'+pptl_id
pptl_obj = PPTLconfig(pptl_id = pptl_id, description = description , arm_id = arm_obj, hubcode = (item.hubcode))
finally :
pptl_obj.save()
pptl_list.append(pptl_id)
code = []
for factors in SORTATION_FACTORS:
if factors == 'DSP': code.append((item.DSP))
elif factors == 'Pincode': code.append((item.Pincode))
elif factors == 'DG': code.append((item.state).zfill(4))
code = ','.join(code)
if code not in pptlcode_list :
try :
code_obj = PPTLcode.objects.get(code = code)
code_obj.pconf_id = pptl_obj
except : code_obj = PPTLcode(code=code, pconf_id=pptl_obj)
finally :
code_obj.save()
pptlcode_list.append(code)
else :
error = "Duplicate PPTLcode " + code + " at Row " + str(count)
status = 1
delete_data(1)
return (status,error)
###############Add ArmPrinterMapping ######################
arm_obj_list = Arm.objects.all()
for arm_obj in arm_obj_list:
printer_name = 'Arm'+str(arm_obj.arm_id)
ap_mapping = ArmPrinterMapping(arm_id = arm_obj, printer_name = printer_name)
ap_mapping.save()
return (0,0)
set default encoding to utf8, it should work then
reload(sys)
sys.setdefaultencoding('utf8')

Categories