Python Sqlite3 passing BLOB to user-defined function gives None - python

I am trying to pass the data from a BLOB column into a user defined function, but it shows up in that function as None. Is this just a quirk, or am I doing somethign wrong?
The BLOB data are jpegs I put into an sqlite3 database using Python 2.7.12. The schema for the table is CREATE TABLE data (md5sum TEXT PRIMARY KEY, data BLOB);
import sqlite3
import hashlib
def p_data(x):
print [x]
return x
def p_md(x):
print [x]
return x
db=sqlite3.connect('tagged.db')
db.text_factory = str
db.create_function('P_DATA', 1, p_data)
db.create_function('P_MD', 1, p_md)
r = db.execute('SELECT P_MD(md5sum),P_DATA(data),data FROM data LIMIT 1')
for i in r:
print
# Don't want to print the thousands of bytes in i[-1]
print i[:1]
print hashlib.md5(i[-1]).hexdigest()
python test.py
[u'3040158ef2c323aaa63da499fc821d77']
[None]
('3040158ef2c323aaa63da499fc821d77', None)
3040158ef2c323aaa63da499fc821d77
Edit
I boiled down my main program into the scripts below to make it easier to share an exact equivalent of what I am running. The test binary data (image) is my avatar image.
Easy initializer for tagged.db
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
PRAGMA writable_schema=ON;
INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)VALUES('table','files','files',0,'CREATE VIRTUAL TABLE "files" USING fts3(fname TEXT, orig_name TEXT, tags TEXT, md5sum TEXT)');
CREATE TABLE 'files_content'(docid INTEGER PRIMARY KEY, 'c0fname', 'c1orig_name', 'c2tags', 'c3md5sum');
CREATE TABLE 'files_segments'(blockid INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE 'files_segdir'(level INTEGER,idx INTEGER,start_block INTEGER,leaves_end_block INTEGER,end_block INTEGER,root BLOB,PRIMARY KEY(level, idx));
CREATE TABLE data (md5sum TEXT PRIMARY KEY, data BLOB);
CREATE INDEX idx1 on data(md5sum);
PRAGMA writable_schema=OFF;
COMMIT;
Contents of test.py
import sqlite3
import hashlib
def p_data(x):
print 'inside p_data'
print '\t', [x]
return x
def p_md(x):
print 'inside p_md'
print '\t', [x]
return x
db=sqlite3.connect('tagged.db')
db.text_factory = str
db.create_function('P_DATA', 1, p_data)
db.create_function('P_MD', 1, p_md)
fname = '4f880f29b27d5b2c14399512b7155f96.png'
with open(fname, 'rb') as f:
data = f.read()
md = hashlib.md5(data).hexdigest()
db.execute('INSERT INTO files VALUES (?,?,?,?)', ('testname.png', fname, 'testtag', md))
db.execute('INSERT INTO data VALUES (?,?)', (md, data))
# Don't commit so tagged.db will be empty each time test.py is started
r = db.execute('SELECT P_MD(md5sum),P_DATA(data),data FROM data LIMIT 1')
for i in r:
print
print i[:1]
print hashlib.md5(i[-1]).hexdigest()
print
print 'Testing "SELECT typeof(data) FROM data LIMIT 1"'
r = db.execute('SELECT typeof(data) FROM data LIMIT 1')
print r.fetchone()
print
print 'Testing "SELECT p_data("Have some text")'
r = db.execute("SELECT p_data('Have some text')")
v = r.fetchone()
print v
print [str(v[0])]
print
print 'Testing "SELECT p_data(x\'112233\')"'
r = db.execute("SELECT p_data(x'112233')")
v = r.fetchone()
print v
Run python test.py
inside p_md
[u'c337ae2a8ebd84b7e50240d875b2729e']
inside p_data
[None]
('c337ae2a8ebd84b7e50240d875b2729e',)
c337ae2a8ebd84b7e50240d875b2729e
Testing "SELECT typeof(data) FROM data LIMIT 1"
('text',)
Testing "SELECT p_data("Have some text")
inside p_data
[u'Have some text']
('Have some text',)
['Have some text']
Testing "SELECT p_data(x'112233')"
inside p_data
[<read-write buffer ptr 0x7f31080cad18, size 3 at 0x7f31080cacd8>]
(<read-write buffer ptr 0x7f31080cad18, size 3 at 0x7f31080cacd8>,)
['\x11"3']

In test.py you'll find that type(data) is str, meaning it's inserted as text and likely getting confused about encoding when you read it back out.
The solution is to insert an sqlite3.Binary instead:
db.execute('INSERT INTO data VALUES (?,?)', (md, sqlite3.Binary(data)))

Related

how to ingest a table specification file in .txt form and create a table in sqlite?

I tried a few different ways, below but having trouble a) removing the width and b) removing the \n with a comma. I have a txt file like the below and I want to take that information and create a table in sqlite (all using python)
"field",width, type
name, 15, string
revenue, 10, decimal
invoice_date, 10, string
amount, 2, integer
Current python code - trying to read in the file, and get the values to pass in the sql statement below
import os
import pandas as pd
dir_path = os.path.dirname(os.path.realpath(__file__))
file = open(str(dir_path) + '/revenue/revenue_table_specifications.txt','r')
lines = file.readlines()
table = lines[2::]
s = ''.join(str(table).split(','))
x = s.replace("\n", ",").strip()
print(x)
sql I want to pass in
c = sqlite3.connect('rev.db') #connnect to DB
try:
c.execute('''CREATE TABLE
revenue_table (information from txt file,
information from txt file,
....)''')
except sqlite3.OperationalError: #i.e. table exists already
pass
This produces something that will work.
def makesql(filename):
s = []
for row in open(filename):
if row[0] == '"':
continue
parts = row.strip().split(", ")
s.append( f"{parts[0]} {parts[2]}" )
return "CREATE TABLE revenue_table (\n" + ",\n".join(s) + ");"
sql = makesql( 'x.csv' )
print(sql)
c.execute( sql )

How can I (safely) convert a dictionary into an UPDATE statement for sqlite in Python?

Say for example, I have a table of students, and I have a Python dictionary
mydict = {"fname" : "samwise", "lname" : "gamgee", "age" : 13}
How can I safely generate a Python function that can UPDATE this into my student table? (In my use-case I can safely assume that the student already exists in the table, AND I KNOW the id already)
I have created a function that achieves this functionality, but I can't help but think it's a bit crude, and perhaps open to SQL injection attacks
def sqlite_update(conn, table, data, pkeyname, pkeyval):
set_lines = ""
for k,v in data.items():
set_lines += "{} = '{}', ".format(k,v)
set_lines = set_lines[:-2] #remove space and comma from last item
sql = "UPDATE {0} SET {1} WHERE {2} = '{3}'"
statement = sql.format(table, set_lines, pkeyname, pkeyval)
conn.execute(statement)
conn.commit()
And to update I just call
sqlite_update(conn, "student", mydict, "id", 1)
As I assume you are using sqlalchemy. In this case, you can use sqlalchemy.sql.text function which escapes strings if required.
You can try to adjust your function as below.
from sqlalchemy.sql import text
def sqlite_update(conn, table, data, pkeyname, pkeyval):
set_lines = ",".join([f"{k}=:{k}" for k in data.keys()])
statement = text(f"UPDATE {table} SET {set_lines} WHERE {pkeyname} = :pkeyval")
args = dict(data)
args["pkeyval"] = pkeyval
conn.execute(statement, args)
conn.commit()
For more details, refer to sqlalchemy official documentation on text function.
EDIT
As for sqlite3 connection you can do basically the same thing as above, with slight changes.
def sqlite_update(conn, table, data, pkeyname, pkeyval):
set_lines = ",".join([f"{k}=:{k}" for k in data.keys()])
statement = f"UPDATE {table} SET {set_lines} WHERE {pkeyname} = :pkeyval"
args = dict(data)
args["pkeyval"] = pkeyval
conn.execute(statement, args)
conn.commit()
Refer to sqlite3 execute
This is indeed widely opened to SQL injection, because you build the query as a string including its values, instead of using a parameterized query.
Building a parameterized query with Python is easy:
def sqlite_update(conn, table, data, pkeyname, pkeyval):
query = f"UPDATE {table} SET " + ', '.join(
"{}=?".format(k) for k in data.keys()) + f" WHERE {pkeyname}=?"
# uncomment next line for debugging
# print(query, list(data.values()) + [pkeyval])
conn.execute(query, list(data.values()) + [pkeyval])
With your example, the query displayed by the debug print line is:
UPDATE student SET fname=?, lname=?, age=? WHERE id=?
with the following values list: ['samwise', 'gamgee', 13, 1]
But beware, to be fully protected from SQL injection, you should sanitize the table and field names to ensure they contain no dangerous characters like ;

Why pymysql not insert record into table?

I am pretty new in python developing. I have a long python script what "clone" a database and add additional stored functions and procedures. Clone means copy only the schema of DB.These steps work fine.
My question is about pymysql insert exection:
I have to copy some table contents into the new DB. I don't get any sql error. If I debug or print the created INSERT INTO command is correct (I've tested it in an sql editor/handler). The insert execution is correct becuse the result contain the exact row number...but all rows are missing from destination table in dest.DB...
(Ofcourse DB_* variables have been definied!)
import pymysql
liveDbConn = pymysql.connect(DB_HOST, DB_USER, DB_PWD, LIVE_DB_NAME)
testDbConn = pymysql.connect(DB_HOST, DB_USER, DB_PWD, TEST_DB_NAME)
tablesForCopy = ['role', 'permission']
for table in tablesForCopy:
with liveDbConn.cursor() as liveCursor:
# Get name of columns
liveCursor.execute("DESCRIBE `%s`;" % (table))
columns = '';
for column in liveCursor.fetchall():
columns += '`' + column[0] + '`,'
columns = columns.strip(',')
# Get and convert values
values = ''
liveCursor.execute("SELECT * FROM `%s`;" % (table))
for result in liveCursor.fetchall():
data = []
for item in result:
if type(item)==type(None):
data.append('NULL')
elif type(item)==type('str'):
data.append("'"+item+"'")
elif type(item)==type(datetime.datetime.now()):
data.append("'"+str(item)+"'")
else: # for numeric values
data.append(str(item))
v = '(' + ', '.join(data) + ')'
values += v + ', '
values = values.strip(', ')
print("### table: %s" % (table))
testDbCursor = testDbConn.cursor()
testDbCursor.execute("INSERT INTO `" + TEST_DB_NAME + "`.`" + table + "` (" + columns + ") VALUES " + values + ";")
print("Result: {}".format(testDbCursor._result.message))
liveDbConn.close()
testDbConn.close()
Result is:
### table: role
Result: b"'Records: 16 Duplicates: 0 Warnings: 0"
### table: permission
Result: b'(Records: 222 Duplicates: 0 Warnings: 0'
What am I doing wrong? Thanks!
You have 2 main issues here:
You don't use conn.commit() (which would be either be liveDbConn.commit() or testDbConn.commit() here). Changes to the database will not be reflected without committing those changes. Note that all changes need committing but SELECT, for example, does not.
Your query is open to SQL Injection. This is a serious problem.
Table names cannot be parameterized, so there's not much we can do about that, but you'll want to parameterize your values. I've made multiple corrections to the code in relation to type checking as well as parameterization.
for table in tablesForCopy:
with liveDbConn.cursor() as liveCursor:
liveCursor.execute("SELECT * FROM `%s`;" % (table))
name_of_columns = [item[0] for item in liveCursor.description]
insert_list = []
for result in liveCursor.fetchall():
data = []
for item in result:
if item is None: # test identity against the None singleton
data.append('NULL')
elif isinstance(item, str): # Use isinstance to check type
data.append(item)
elif isinstance(item, datetime.datetime):
data.append(item.strftime('%Y-%m-%d %H:%M:%S'))
else: # for numeric values
data.append(str(item))
insert_list.append(data)
testDbCursor = testDbConn.cursor()
placeholders = ', '.join(['`%s`' for item in insert_list[0]])
testDbCursor.executemany("INSERT INTO `{}.{}` ({}) VALUES ({})".format(
TEST_DB_NAME,
table,
name_of_columns,
placeholders),
insert_list)
testDbConn.commit()
From this github thread, I notice that executemany does not work as expected in psycopg2; it instead sends each entry as a single query. You'll need to use execute_batch:
from psycopg2.extras import execute_batch
execute_batch(testDbCursor,
"INSERT INTO `{}.{}` ({}) VALUES ({})".format(TEST_DB_NAME,
table,
name_of_columns,
placeholders),
insert_list)
testDbConn.commit()
How to insert data into table using python pymsql
Find my solution below
import pymysql
import datetime
# Create a connection object
dbServerName = "127.0.0.1"
port = 8889
dbUser = "root"
dbPassword = ""
dbName = "blog_flask"
# charSet = "utf8mb4"
conn = pymysql.connect(host=dbServerName, user=dbUser, password=dbPassword,db=dbName, port= port)
try:
# Create a cursor object
cursor = conn.cursor()
# Insert rows into the MySQL Table
now = datetime.datetime.utcnow()
my_datetime = now.strftime('%Y-%m-%d %H:%M:%S')
cursor.execute('INSERT INTO posts (post_id, post_title, post_content, \
filename,post_time) VALUES (%s,%s,%s,%s,%s)',(5,'title2','description2','filename2',my_datetime))
conn.commit()
except Exception as e:
print("Exeception occured:{}".format(e))
finally:
conn.close()

how to read from csv file and store data in sqlite3 using python

i have a python class readCSVintoDB that read from csv file and store data into sqlite 3 database.
note :
the csv file includes many fields so i just need 3 of them.
until now i am able to read csv file and stored into dataframe using pandas. but how to store the dataframe into the database.
error displayed :
File "C:\Users\test\Documents\Python_Projects\readCSV_DB.py", line 15,
in init self.importCSVintoDB() File
"C:\Users\test\Documents\Python_Projects\readCSV_DB.py", line 60, in
importCSVintoDB INSERT INTO rduWeather VALUES (?,?,?,?)''', i)
sqlite3.IntegrityError: datatype mismatch
when i tried to print i in the for loop it display the header name date
readCSV_DB :
import sqlite3
import pandas as pd
import os
class readCSVintoDB():
def __init__(self):
'''
self.csvobj = csvOBJ
self.dbobj = dbOBJ
'''
self.importCSVintoDB()
def importCSVintoDB(self):
userInput= input("enter the path of the csv file: ")
csvfile = userInput
df = pd.read_csv(csvfile,sep=';')
#print("dataFrame Headers is {0}".format(df.columns))# display the Headers
dp = (df[['date','temperaturemin','temperaturemax']])
print(dp)
'''
check if DB file exist
if no create an empty db file
'''
if not(os.path.exists('./rduDB.db')):
open('./rduDB.db','w').close()
'''
connect to the DB and get a connection cursor
'''
myConn = sqlite3.connect('./rduDB.db')
dbCursor = myConn.cursor()
'''
Assuming i need to create a table of (Name,FamilyName,age,work)
'''
dbCreateTable = '''CREATE TABLE IF NOT EXISTS rduWeather
(id INTEGER PRIMARY KEY,
Date varchar(256),
TemperatureMin FLOAT,
TemperatureMax FLOAT)'''
dbCursor.execute(dbCreateTable)
myConn.commit()
'''
insert data into the database
'''
for i in dp:
print(i)
dbCursor.execute('''
INSERT INTO rduWeather VALUES (?,?,?,?)''', i)
#myInsert=dbCursor.execute('''insert into Info ('Name','FA','age','work')
#VALUES('georges','hateh',23,'None')''')
myConn.commit()
mySelect=dbCursor.execute('''SELECT * from rduWeather WHERE (id = 10)''')
print(list(mySelect))
myConn.close()
test1 = readCSVintoDB()
If you want to write a single row (e.g: reg = (...)) try this function:
def write_datarow(conn, cols, reg):
''' Create a new entry (reg) into the rduWeather table
input: conn (class SQLite connection)
input: cols (list)
Table columns names
input: reg (tuple)
Data to be written as a row
'''
sql = 'INSERT INTO rduWeather({}) VALUES({})'.format(', '. join(cols),'?, '*(len(cols)-1)+'?')
cur = conn.cursor()
# Execute the SQL query
cur.execute(sql, reg)
# Confirm
conn.commit()
return
But if you had multiple rows reg = [(...),...,(...)] then use:
def write_datarow(conn, cols, reg):
''' Create a new entry (reg) into the rduWeather table
input: conn (class SQLite connection)
input: cols (list)
Table columns names
input: reg (list of tuples)
List of rows to be written
'''
sql = 'INSERT INTO rduWeather({}) VALUES({})'.format(', '. join(cols),'?, '*(len(cols)-1)+'?')
cur = conn.cursor()
# Execute the SQL query
cur.executemany(sql, reg)
# Confirm
conn.commit()
return
After your edition now I saw the problem. You commit the SQL query outside the for-loop.
Code this:
for i in dp:
dbCursor.execute(''' INSERT INTO rduWeather VALUES (?,?,?,?)''', i)
myConn.commit()

Decode a string: Python

I built a string with a tuple like this:
t = tuple(data)
querysring="INSERT INTO %s VALUES %s "%(table,t)
When I print the string the result is:
INSERT INTO AGENT VALUES ('Bock', 'Fran\\xc3\\xa7ois Bock', 'Individual', 'fb****#mail.com')
But I want something like this:
INSERT INTO AGENT VALUES ('Bock', 'François Bock', 'Individual', 'fb****#mail.com')
It is possible to decode the string ?
I'm using Python2.x but I can use Python3.x
I try this:
querysring=u"INSERT INTO %s VALUES %s "%(table,t)
print(ftfy.fix_text(querysring))
But it's not working
I think your issue is superficial and related to how print displays lists and list items differently. The printed output of a list is in ascii even if the items within the list are correctly encoded in utf-8. First, using chardet library:
from chardet.universaldetector import UniversalDetector
a = ['Bock', 'François Bock']
detector = UniversalDetector()
detector.feed(str(a))
detector.close()
print "Encoding for the str(list): ", detector.result
detector = UniversalDetector()
detector.feed(a[1])
detector.close()
print "Encoding for list[1]: ", detector.result
print "The whole list: ", a
print "Item in list: ", a[1]
Aside from the off-putting printouts, it's possible to still write to the database with the correct encoding with a parameterized query. The last part of the below code writes to a file to confirm that the data encoding is preserved:
import sqlite3
conn = sqlite3.connect(":memory:")
conn.text_factory = str
c = conn.cursor()
c.execute("CREATE TABLE IF NOT EXISTS testing(test1 TEXT, test2 TEXT)")
conn.commit()
my_tuple = 'Bock', 'François Bock'
table = 'testing'
placeholders = ', '.join('?' for item in my_tuple)
query = "INSERT INTO {} VALUES ({})".format(table, placeholders)
c.execute(query, my_tuple)
c.execute("SELECT * FROM testing")
all_data = c.fetchone()
# Check the printouts
print all_data
print all_data[1]
# For good measure, write them to a file
with open('check_output.txt', 'w') as outfile:
outfile.write(', '.join(item for item in all_data))

Categories