Store Gtk.Textbuffer in SQL database. Encoding troubles - python

I'm working on a note taking app using python2/Gtk3/Glade.
The notes are stored in a MySQL Database and displayed in a TextView widget.
I can load/store/display plain text fine. However I want the ability to add images to the note page, and store them in the Database.so the data has to be serialised and I'm having some trouble figuring out how to encode/decode the serialised data going in and out of the Database. I'm getting unicode start byte errors. If was working with files I could just open the file in binary mode, but I'm storing as a string in a Database. I've tried encoding/decoding as UTF-8 and ASCII using bytes() and string.encode()[see the sample code below] and a few other ways but none work.
I am using this function to add the image to the textview buffer:
def _AddImagetoNode(self,oWidget):
filenm = None
seliter = self.GetTreeSelection(self.treeview)
filenm = self.FileOpenDiag("Select an Image To Insert.","Image","*.png,*.jpg,*.bmp")
if filenm == None:
return()
#filenm = "/home/drift/Pictures/a.png"
buf = self.dataview.get_buffer()
pixbuf = GdkPixbuf.Pixbuf.new_from_file(filenm)
#pixbuf.scale_simple(dest_width, dest_height, gtk.gdk.INTERP_BILINEAR)
buf.insert_pixbuf(buf.get_end_iter(), pixbuf)
self.dataview.set_buffer(buf)
self.dataview.show()
This is the function that stores the textview buffer:
def SaveDataView(self):
global DataViewNode
global DataViewIsImage
if len(self.GetProjectName()) == 0:
return()
buf = self.dataview.get_buffer()
format = buf.register_serialize_tagset()
data2 = buf.serialize(buf, format, buf.get_start_iter(), buf.get_end_iter())
#convert bytes(data) to string
data = data2.decode(encoding='UTF-8') #<< i think my problem is here
print("save b4 decode >>>>>>:%s"%data2)
sql = "UPDATE " + self.GetProjectName() + " SET tDataPath=%s WHERE tNodeID=%s"
val = (data, DataViewNode)
self.cursor.execute(sql,val)
self.mariadb_connection.commit()
This is the function that loads the Buffer:
def UpdateDataView(self, nodeid):
global DataViewNode
#global DataViewIsFile
DataViewNode=nodeid
if self.GetProjectName() != None and DataViewNode != None:
self.dataview.set_sensitive(True)
else:
self.dataview.set_sensitive(False)
self.dataview.show()
return()
buf = self.dataview.get_buffer()
buf.set_text('')
enc = self.DbGetNodeData(nodeid)
#convert string(enc) to bytes
data = enc.encode(encoding='UTF-8')#<<< i think my problem is here
print("update after decode >>>>>>>>>: %s"%data)
########### load
format = buf.register_deserialize_tagset()
buf.deserialize(buf, format, buf.get_end_iter(),data)
#buf.set_text(enc)
self.dataview.set_buffer(buf)
self.dataview.show()
I'm using mysql.connector to connect to a mariadb.
This is the sql connection string:
self.mariadb_connection = mariadb.connect(user='box', password='box', host='localhost', database='Boxer',charset='utf8')
This is the error im getting.
Traceback (most recent call last): File "Boxer.py", line 402, in
_TreeSelectionChanged
self.SaveDataView() File "Boxer.py", line 334, in SaveDataView
data = data2.decode(encoding='UTF-8') UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb4 in position 174: invalid start byte
Traceback (most recent call last): File "Boxer.py", line 398, in
_DataViewLostFocus
self.SaveDataView() File "Boxer.py", line 334, in SaveDataView
data = data2.decode(encoding='UTF-8') UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb4 in position 174: invalid start byte
With this code I can add/edit plain text in the text view and successfully save/load it but as soon as I add the image, I'm get the encoding errors. Any help would be appreciated.

Here is a more complete example:
def example (self):
#retrieve info from first textview
buf = self.builder.get_object('textbuffer1')
format = buf.register_serialize_tagset()
data = buf.serialize(buf, format, buf.get_start_iter(), buf.get_end_iter())
#run db update to prove it can be inserted into a database
db = psycopg2.connect(database= 'silrep_restore3', host='192.168.0.101',
user='postgres', password = 'true',
port = '5432')
c = db.cursor()
c.execute("UPDATE products SET byt = %s WHERE id = 1", (psycopg2.Binary(data),))
#append info to second treeview as a proof of concept
c.execute("SELECT byt FROM products WHERE id = 1")
data = c.fetchone()[0]
buf = self.builder.get_object('textbuffer2')
format = buf.register_deserialize_tagset()
buf.deserialize(buf, format, buf.get_end_iter(), data)
Since you are using MySQL, I recommend reading this article about inserting and retrieving data like you are.
For my example I used a bytea column. In MySQL this is may be a BLOB or BINARY type.
P.S. Sorry for not having a complete MySQL example in my answer. I would have posted a comment, but comments are pathetic for proper formatting.

Got it workings. thanks to theGtknerd your answer was the key. for anyone else having trouble with this i ended up using the BLOB type for the MySQL field type for the column im working with. I tried BINARY[it returnd malformed serialize data] AND VARBINARY [wouldnt even allow me to create the table] so i ended up using the LONGBLOB type.
here is the working code for anyone that needs it.
def UpdateDataView(self, nodeid):
global DataViewNode
#global DataViewIsFile
DataViewNode=nodeid
if self.GetProjectName() != None and DataViewNode != None:
self.dataview.set_sensitive(True)
else:
self.dataview.set_sensitive(False)
self.dataview.show()
return()
buf = self.dataview.get_buffer()
buf.set_text('')
data = self.DbGetNodeData(nodeid)
if data =='':
return()
format = buf.register_deserialize_tagset()
buf.deserialize(buf, format, buf.get_end_iter(),data)
self.dataview.set_buffer(buf)
self.dataview.show()
def SaveDataView(self):
global DataViewNode
global DataViewIsImage
if len(self.GetProjectName()) == 0:
return()
buf = self.dataview.get_buffer()
enc = buf.get_text(buf.get_start_iter(),buf.get_end_iter(),False)
self.AddData2Db(DataViewNode,enc)
format = buf.register_serialize_tagset()
data = buf.serialize(buf, format, buf.get_start_iter(), buf.get_end_iter())
sql = "UPDATE " + self.GetProjectName() + " SET tDataPath=%s WHERE tNodeID=%s"
val = (data, DataViewNode)
self.cursor.execute(sql,val)
self.mariadb_connection.commit()
and im using this to create the table
sql = "CREATE TABLE %s (tParentNodeID TEXT,tNodeTxt TEXT,tNodeID TEXT,tDataPath LONGBLOB)" %pName
self.cursor.execute(sql)
self.mariadb_connection.commit()

Related

Data not decrypting correctly - data from csv

My input ciphertext from my csv doesnt seem to be decrypting properly. It decrypts as a random string of bytes. I've checked my key and IV and they are exactly the same from encryption, Its just the decryption that doesnt seem to work properly.
I wondered if the way I have put my encrypted data into my csv, or retrieved it is the issue? maybe it alters the bytes etc? if not im stumped. I've been on this issue for days, help!
Program works like this:
User inputs credentials -- encrypt -- generate unique ID and hash values and stores in db -- store ciphertext in csv // user inputs ID -- matches in db and fetches encryption key stored with ID -- Fetches matching ID ciphertext from CSV, puts into pandas dataframe and decrypts ciphertext with key
def decoder():
from Crypto.Cipher import AES
import hashlib
from secrets import token_bytes
cursor.execute(
'''
Select enc_key FROM Login where ID = (?);
''',
(L_ID_entry.get(), ))
row = cursor.fetchone()
if row is not None:
keys = row[0]
#design padding function for encryption
def padded_text(data_in):
while len(data_in)% 16 != 0:
data_in = data_in + b"0"
return data_in
#calling stored key from main file and reverting back to bytes
key_original = keys
print(key_original)
print("Key original above")
mode = AES.MODE_CBC
#model
cipher = AES.new(key_original, mode, IV2.encode('utf8'))
print(IV2)
print("IV2 above")
#padding data
p4 = padded_text(df1.tobytes())
p5 = padded_text(df2.tobytes())
p6 = padded_text(df3.tobytes())
#decrypting data
d_fname = cipher.decrypt(p4)
d_sname = cipher.decrypt(p5)
d_email = cipher.decrypt(p6)
print(d_fname)
print(d_sname)
print(d_email)
#connecting to db
try:
conn = sqlite3.connect('login_details.db')
cursor = conn.cursor()
print("Connected to SQLite")
except sqlite3.Error as error:
print("Failure, error: ", error)
finally:
#downloading txt from dropbox and converting to dataframe to operate on
import New_user
import ast
_, res = client.files_download("/user_details/enc_logins.csv")
with io.BytesIO(res.content) as csvfile:
with open("enc_logins.csv", 'rb'):
df = pd.read_csv(csvfile, names=['ID', 'Fname', 'Sname', 'Email'], encoding='utf-8')
newdf = df[df['ID'] == L_ID_entry.get()]
print(newdf)
df1 = newdf['Fname'].values
df2 = newdf['Sname'].values
df3 = newdf['Email'].values
print(df1)
print(df2)
print(df3)
decoder()

How to display a BLOB object (image) from sqlite3 with Python

I saved an image as a BLOB in a sqlite3 database column profile - I summon the function insertBLOB with relevant info:
sqliteConnection = sqlite3.connect('image_try.db')
cursor = sqliteConnection.cursor()
cursor.execute("""CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY,
fullname TEXT,
username TEXT,
profile BLOB)""")
def convertToBinaryData(filename):
with open(filename, 'rb') as file:
blobData = file.read()
return blobData
def insertBLOB(name, username, photo):
sqliteConnection = sqlite3.connect('image_try.db')
sqliteConnection.text_factory = str
cursor = sqliteConnection.cursor()
sqlite_insert_blob_query = """ INSERT INTO images
(fullname, username, profile) VALUES (?, ?, ?)"""
empPhoto = convertToBinaryData(photo)
data_tuple = (name, username, empPhoto)
cursor.execute(sqlite_insert_blob_query, data_tuple)
sqliteConnection.commit()
I tried to access the image file (so I could display it in a Label) like this - by summoning the function readBlobData:
def writeTofile(data):
# Convert binary data to proper format and write it on Hard Disk
this = open(data, 'rb')
this.open(io.BytesIO(base64.b64decode(data)))
return this
def readBlobData(empId):
try:
sqliteConnection = sqlite3.connect('image_try.db')
sqliteConnection.text_factory = str
cursor = sqliteConnection.cursor()
sql_fetch_blob_query = """SELECT * from images where id = ?"""
cursor.execute(sql_fetch_blob_query, (empId,))
record = cursor.fetchall()
profile = record[0][3] #Blob object
profile = writeTofile(profile)
image = ImageTk.PhotoImage(profile)
image_label = Label(root, image=image)
image_label.photo = image
image_label.pack()
cursor.close()
when I summon the function readBlobData I get this Error:
Traceback (most recent call last):
File "C:/Users/hilab/PycharmProjects/dafyProject/addimage.py", line 90, in
<module>
readBlobData(1)
File "C:/Users/hilab/PycharmProjects/dafyProject/addimage.py", line 67, in
readBlobData
profile = writeTofile(profile)
File "C:/Users/hilab/PycharmProjects/dafyProject/addimage.py", line 51, in
writeTofile
this = open(data, 'rb')
TypeError: file() argument 1 must be encoded string without NULL bytes, not str
Do you have any idea what seems to be the problem? and how can I fix it? How can I access the BLOB object from the SQLite database and present it???
The traceback is telling us that something is going wrong in the writeToFile function, specifically when we try to open a file:
profile = writeTofile(profile)
File "C:/Users/hilab/PycharmProjects/dafyProject/addimage.py", line 51, in
writeTofile
this = open(data, 'rb')
TypeError: file() argument 1 must be encoded string without NULL bytes, not str
The value that we are passing to the function is the binary image data read from the database
profile = record[0][3]
In the function, we are trying to use this binary data as the name of the file that we are going to read from to get the binary data in some format.
def writeTofile(data):
# Convert binary data to proper format and write it on Hard Disk
this = open(data, 'rb')
this.open(io.BytesIO(base64.b64decode(data)))
return this
tkinter.PhotoImage expects the path to a file, according to its documentation, so we have to create a file from the image bytes.:
def writeTofile(data):
# Write it to the Hard Disk
# (ideally with a suitable name and extension)
filename = 'myfile.img'
with open('myfile.img', 'wb') as f:
f.write(data)
return filename
And in readBlobData:
image = ImageTk.PhotoImage(file=profile)
And then all should be well.

psycopg2.DataError: invalid byte sequence

Here's what I am trying to do. I am trying to input an .csv file to be input into postgres database. I am using psycopg2 and cur_copy_export to do this. However, i'm hit with the error as below. What should i do to overcome this error?
Thanks in Advance
Error:
cur.copy_expert(sql=copy_sql, file=myfile)
psycopg2.DataError: invalid byte sequence for encoding "UTF8": 0xdf 0x65
CONTEXT: COPY agents, line 1117
My code as below:
//open file from Amazon S3 Bucket
opener = urllib.URLopener()
myurl=("Amazon S3 bucket URL" + srcbucketid + "/" + file_name)
myfile=opener.open(myurl)
copy_sql = """ COPY agents (
UniqueId,
Code,
CountryCode,
DefaultCommissionRate,
ReportingName)
FROM stdin WITH CSV HEADER DELIMITER as ',' QUOTE '\b' NULL AS ''"""
cur.copy_expert(sql=copy_sql, file=myfile)
My database encoding is in "UTF8" format. I cannot change it as its a production database for now.
copy_source = {'Bucket': srcbucketid, 'Key': file_name}
client.copy(copy_source, srcbucketid, 'tmp/{}'.format(file_name))
key = ('s3://'+srcbucketid+'tmp/'+file_name)
print(key)
BLOCKSIZE = 1024*1024
with s3.open('s3://'+srcbucketid+'/'+file_name, 'rb') as inf:
with s3.open('s3://'+srcbucketid+'/tmp/'+file_name, 'wb') as ouf:
while True:
data = inf.read(BLOCKSIZE)
if not data: break
converted = data.decode('latin1').encode('utf-8')
ouf.write(converted)

Getting strings from database with encoding problems in outfile

i'm tryng to get some tweet data from a MySql database.
I've got tons of encoding errors while i was developing this code. This last for is the only way i got for running the code and getting this outfile full with \uxx characters all around, as you can see here:
[{..., "lang_tweet": "es", "text_tweet": "Recuerdo un d\u00eda de, *llamada a la 1:45*, \"Micho, me va a dar algo, estoy temblando, me tome un moster y un balium... Que me muero.!!\",...},...]
I've been here around and around trying different solutions, but the thing is that i got really confused with the abstraction of coding and encoding.
What can i do for fixing this?
Or maybe would be easier to just grab the dirty JSON and 'parse' it decoding those characters manually.
If you want take a look to the code i'm using to querying the db:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pymysql
import collections
import json
conn = pymysql.connect(host='localhost', user='sut', passwd='r', db='tweetsjun2016')
cur = conn.cursor()
cur.execute("""
SELECT * FROM 20160607_tweets
WHERE 20160607_tweets.creation_date >= '2016-06-07 10:51'
AND 20160607_tweets.creation_date <= '2016-06-07 11:51'
AND 20160607_tweets.lang_tweet = "es"
AND 20160607_tweets.has_keyword = 1
AND 20160607_tweets.rt = 0
LIMIT 20
""")
objects_list = []
for row in cur:
d = collections.OrderedDict()
d['download_date'] = row[1]
d['creation_date'] = row[2]
d['id_user'] = row[5]
d['favorited'] = row[7]
d['lang_tweet'] = row[10]
d['text_tweet'] = row[11].decode('latin1')
d['rt'] = row[12]
d['rt_count'] = row[13]
d['has_keyword'] = row[19]
objects_list.append(d)
# print(row[11].decode('latin1')) <- looks perfect, it prints with accents and fine
j = json.dumps(objects_list, default=date_handler, encoding='latin1')
objects_file = "test23" + "_dicts"
f = open(objects_file,'w')
print >> f, j
cur.close()
conn.close()
If i delete the *.decode('latin1') method from all it's applications i get this error:
Traceback (most recent call last):
File "test.py", line 51, in <module>
j = json.dumps(objects_list, default=date_handler)
File "C:\Users\Vichoko\Anaconda2\lib\json\__init__.py", line 251, in dumps
sort_keys=sort_keys, **kw).encode(obj)
File "C:\Users\Vichoko\Anaconda2\lib\json\encoder.py", line 207, in encode
chunks = self.iterencode(o, _one_shot=True)
File "C:\Users\Vichoko\Anaconda2\lib\json\encoder.py", line 270, in iterencode
return _iterencode(o, 0)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xed in position 13: invalid continuation byte
I really can't figure out the way the string is comming from the db to my script.
Thanks for reading, any idea would be thankfull.
Edit1:
Here you can see how the JSON files are being exported with the codification error in the text text_tweet key-val:
https://github.com/Vichoko/real-time-twit/blob/master/auto_labeling/json/tweets_sismos/tweetsago20160.json
Try passing the charset keyword argument to connect as shown in the example on pymysql's github.
When using json_encode, add this extra parameter:
$t = json_encode($s, JSON_UNESCAPED_UNICODE);
That will give you í instead of \u00ed.
(Don't use regexps, don't use decode functions, etc., they will only dig your hole deeper.)

Upload and parse csv file with "universal newline" in python on Google App Engine

I'm uploading a csv/tsv file from a form in GAE, and I try to parse the file with python csv module.
Like describe here, uploaded files in GAE are strings.
So I treat my uploaded string a file-like object :
file = self.request.get('catalog')
catalog = csv.reader(StringIO.StringIO(file),dialect=csv.excel_tab)
But new lines in my files are not necessarily '\n' (thanks to excel..), and it generated an error :
Error: new-line character seen in unquoted field - do you need to open the file in universal-newline mode?
Does anyone know how to use StringIO.StringIO to treat strings like files open in universal-newline?
How about:
file = self.request.get('catalog')
file = '\n'.join(file.splitlines())
catalog = csv.reader(StringIO.StringIO(file),dialect=csv.excel_tab)
or as pointed out in the comments, csv.reader() supports input from a list, so:
file = self.request.get('catalog')
catalog = csv.reader(file.splitlines(),dialect=csv.excel_tab)
or if in the future request.get supports read modes:
file = self.request.get('catalog', 'rU')
catalog = csv.reader(StringIO.StringIO(file),dialect=csv.excel_tab)
The solution described here should work. By defining an iterator class as follows, which loads the blob 1MB at a time, splits the lines using .splitlines() and then feeds lines to the CSV reader one at a time, the newlines can be handled without having to load the whole file into memory.
class BlobIterator:
"""Because the python csv module doesn't like strange newline chars and
the google blob reader cannot be told to open in universal mode, then
we need to read blocks of the blob and 'fix' the newlines as we go"""
def __init__(self, blob_reader):
self.blob_reader = blob_reader
self.last_line = ""
self.line_num = 0
self.lines = []
self.buffer = None
def __iter__(self):
return self
def next(self):
if not self.buffer or len(self.lines) == self.line_num + 1:
self.buffer = self.blob_reader.read(1048576) # 1MB buffer
self.lines = self.buffer.splitlines()
self.line_num = 0
# Handle special case where our block just happens to end on a new line
if self.buffer[-1:] == "\n" or self.buffer[-1:] == "\r":
self.lines.append("")
if not self.buffer:
raise StopIteration
if self.line_num == 0 and len(self.last_line) > 0:
result = self.last_line + self.lines[self.line_num] + "\n"
else:
result = self.lines[self.line_num] + "\n"
self.last_line = self.lines[self.line_num + 1]
self.line_num += 1
return result
Then call this like so:
blob_reader = blobstore.BlobReader(blob_key)
blob_iterator = BlobIterator(blob_reader)
reader = csv.reader(blob_iterator)

Categories