Why is Pickle not serializing my array of classes? - python

I'm trying to serialize a large number of custom classes to disk using python's pickle. However, the classes aren't serializing properly.
Having looked at the docs it's my understanding what I'm trying to do ought to work, but perhaps it's not simply because I'm not understanding something.
My classes are defined at the top-level of the module. And no PicklingError exception is being fired when trying to pickle.
Here is my sample code. Uncomment Save() to serialize; uncomment Load() to load. When loading, the Synonyms array of Term isn't being populated, but the Main object of Term is being deserialized. You can see this by inspecting the "loadedTerms" object being returned from the Load() function.
What am I doing wrong? Thanks.
import pickle
class Entry:
Text = ""
def __init__(self, text):
self.Text = text
class Term:
Main = None
Synonyms = []
def Save():
term = Term()
term.Main = Entry("Dog")
term.Synonyms.append(Entry("Canine"))
term.Synonyms.append(Entry("Pursue"))
term.Synonyms.append(Entry("Follow"))
term.Synonyms.append(Entry("Plague"))
terms = []
terms.append(term)
with open('output.pickle', 'wb') as p:
pickle.dump(terms, p)
def Load():
loadedTerms = []
with open('output.pickle', 'rb') as p:
loadedTerms = pickle.load(p)
return loadedTerms
#Save()
#terms = Load()

Pickle only saves the instance attributes of a class, but Synonyms is a list, defined at class level. You should create the list in a __init__-method:
import pickle
class Entry:
def __init__(self, text):
self.Text = text
class Term:
def __init__(self):
self.Main = None
self.Synonyms = []
def Save():
term = Term()
term.Main = Entry("Dog")
term.Synonyms.append(Entry("Canine"))
term.Synonyms.append(Entry("Pursue"))
term.Synonyms.append(Entry("Follow"))
term.Synonyms.append(Entry("Plague"))
terms = []
terms.append(term)
with open('output.pickle', 'wb') as p:
pickle.dump(terms, p)
def Load():
with open('output.pickle', 'rb') as p:
loadedTerms = pickle.load(p)
return loadedTerms

Related

access class object data sent from client to server over socket [duplicate]

I'm trying to serialize a large number of custom classes to disk using python's pickle. However, the classes aren't serializing properly.
Having looked at the docs it's my understanding what I'm trying to do ought to work, but perhaps it's not simply because I'm not understanding something.
My classes are defined at the top-level of the module. And no PicklingError exception is being fired when trying to pickle.
Here is my sample code. Uncomment Save() to serialize; uncomment Load() to load. When loading, the Synonyms array of Term isn't being populated, but the Main object of Term is being deserialized. You can see this by inspecting the "loadedTerms" object being returned from the Load() function.
What am I doing wrong? Thanks.
import pickle
class Entry:
Text = ""
def __init__(self, text):
self.Text = text
class Term:
Main = None
Synonyms = []
def Save():
term = Term()
term.Main = Entry("Dog")
term.Synonyms.append(Entry("Canine"))
term.Synonyms.append(Entry("Pursue"))
term.Synonyms.append(Entry("Follow"))
term.Synonyms.append(Entry("Plague"))
terms = []
terms.append(term)
with open('output.pickle', 'wb') as p:
pickle.dump(terms, p)
def Load():
loadedTerms = []
with open('output.pickle', 'rb') as p:
loadedTerms = pickle.load(p)
return loadedTerms
#Save()
#terms = Load()
Pickle only saves the instance attributes of a class, but Synonyms is a list, defined at class level. You should create the list in a __init__-method:
import pickle
class Entry:
def __init__(self, text):
self.Text = text
class Term:
def __init__(self):
self.Main = None
self.Synonyms = []
def Save():
term = Term()
term.Main = Entry("Dog")
term.Synonyms.append(Entry("Canine"))
term.Synonyms.append(Entry("Pursue"))
term.Synonyms.append(Entry("Follow"))
term.Synonyms.append(Entry("Plague"))
terms = []
terms.append(term)
with open('output.pickle', 'wb') as p:
pickle.dump(terms, p)
def Load():
with open('output.pickle', 'rb') as p:
loadedTerms = pickle.load(p)
return loadedTerms

Python class recording attributes without specifying self ?

I have a question regarding a Python class I use in Blender. Basically, I wonder how the class works because some attributes are recorded without me specifically writing self.value = something. Here's the code:
class DialogOperator(bpy.types.Operator):
bl_idname = "object.dialog_operator"
bl_label = "Save/Load animation"
saving = bpy.props.BoolProperty(name="Save ? Else load.")
path_to_anim = bpy.props.StringProperty(name="Path to folder")
anim_name = bpy.props.StringProperty(name="Animation name:")
# path_to_anim += "/home/mehdi/Blender/Scripts/"
def execute(self, context):
# print('This is execute with: Saving: {} Name:{}'.format(self.saving, self.path_to_anim))
if self.saving:
self.launch_save()
message = 'Animation {} saved at {}'.format(self.anim_name, self.path_to_anim)
else:
self.launch_load()
message = 'Animation {} loaded'.format(self.anim_name)
self.report({'INFO'}, message)
return {'FINISHED'}
def invoke(self, context, event):
wm = context.window_manager
return wm.invoke_props_dialog(self)
def launch_load(self):
full_path = self.path_to_anim + self.anim_name
target_armature = Humanoid(bpy.data.objects['Armature'])
load_all(full_path, target_armature, 'LastLoaded')
def launch_save(self):
full_path = self.path_to_anim + self.anim_name
source_armature = Humanoid(bpy.data.objects['Armature'])
curves = source_armature.get_curves()
save_all(curves, source_armature,full_path)
Now, how come saving, path_to_anim and anim_name are considered as attributes (I'm able to call them in execute() and launch()) even though I did not write self.saving = saving
Thanks !
This is because saving,path_to_anim and anim_name are class attributes. They are defined for the class and not for a particular instance. They are shared among the instances. Here is a link for further explanation class-instance-attributes-python

Class and other argument as parameters for method where method defined to take on parameter

i have a code below
from dejavu import Dejavu
from dejavu.recognize import FileRecognizer, MicrophoneRecognizer
djv = Dejavu(config)
Recognize audio from a file
song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
below is from dejavu/recognize.py
class BaseRecognizer(object):
def __init__(self, dejavu):
self.dejavu = dejavu
self.Fs = fingerprint.DEFAULT_FS
def _recognize(self, *data):
matches = []
for d in data:
matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
return self.dejavu.align_matches(matches)
def recognize(self):
pass # base class does nothing
class FileRecognizer(BaseRecognizer):
def __init__(self, dejavu):
super(FileRecognizer, self).__init__(dejavu)
def recognize_file(self, filename):
frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
t = time.time()
match = self._recognize(*frames)
t = time.time() - t
if match:
match['match_time'] = t
return match
def recognize(self, filename):
return self.recognize_file(filename)
I don't undersand how recognize takes class Filereognizer and also filename where recognizer is defined as
def recognize(self, filename):
and takes only filename as a parameter. can someone explain how this work and what it actually does???
thanks to #daniel here is my follow up question
def recognize(self, recognizer, *options, **kwoptions):
r = recognizer(self)
return r.recognize(*options, **kwoptions)
this is recognizer under Djavu class and I am thinking "self" from line "r = recognizer(self)" is what make recognizer(under Dejavu class) can receive FileRecognizer class as a parameter right?
this is from dejavu(audiofingerprint software) github and link is below :
https://github.com/worldveil/dejavu

How to save code doing it with OOP?

I'm writing a program to extract some data from txt files with regular expressions.
I'm new in OOP and want to save reiterative code. I want to retrieve about 15 data in each txt file, so I wrote a Class definition for each data. The patters to match can come in several formats, so I'll need to try several regex patters. By now, I only implements one regex patterns by data, but in future I need to try more in order to match the specific format used in that txt file, I plan to use a list with de patterns for each data.
I've just wrote 3 classes, but I've realized that I'm repeating too much code. So, I believe that I'm doing something wrong.
import re
import os
import glob
import csv
class PropertyNumber(object):
pattern_text = "(?<=FINCA Nº: )\w{3,6}"
regex_pattern = re.compile(pattern_text)
def __init__(self, str):
self.text_to_search = str
self.text_found = ""
def search_p_number(self):
matched_p_number = PropertyNumber.regex_pattern.search(self.text_to_search)
print(matched_p_number)
self.text_found = matched_p_number.group()
return self.text_found
class PropertyCoefficient(object):
pattern_text = "(?<=Participación: )[0-9,]{1,8}"
regex_pattern = re.compile(pattern_text)
def __init__(self, str):
self.text_to_search = str
self.text_found = ""
def search_p_coefficient(self):
matched_p_coefficient = PropertyCoefficient.regex_pattern.search(self.text_to_search)
print(matched_p_coefficient)
self.text_found = matched_p_coefficient.group()
return self.text_found
class PropertyTaxIDNumber(object):
pattern_text = "(?<=Referencia Catastral: )\d{7}[A-Z]{2}\d{4}[A-Z]\d{4}[A-Z]{2}"
regex_pattern = re.compile(pattern_text)
def __init__(self, str):
self.text_to_search = str
self.text_found = ""
def search_tax_id(self):
matched_p_taxidnumber = PropertyTaxIDNumber.regex_pattern.search(self.text_to_search)
print(matched_p_taxidnumber)
self.text_found = matched_p_taxidnumber.group()
return self.text_found
def scan_txt_report(fli):
data_retrieved = []
file_input = open(fli, mode='r', encoding='utf-8')
property_report = file_input.read()
property_number = PropertyNumber(property_report)
data_retrieved.append(property_number.search_p_number())
property_coefficient = PropertyCoefficient(property_report)
data_retrieved.append(property_coefficient.search_p_coefficient())
property_tax_id_number = PropertyTaxIDNumber(property_report)
data_retrieved.append(property_tax_id_number.search_tax_id())
return data_retrieved
def main():
if os.path.exists("./notas_simples/ns_txt"):
os.chdir("./notas_simples/ns_txt")
list_txt_files = glob.glob("*.txt")
print(list_txt_files)
with open("..\..\listado_de_fincas.csv", mode='w', newline='') as fiout:
file_writer = csv.writer(fiout, delimiter=';')
for file_name_input in list_txt_files:
data_line = scan_txt_report(file_name_input)
file_writer.writerow(data_line)
if __name__ == '__main__':
main()
# TODO Idufir: "(?<=IDUFIR: )\d{14}"
# TODO calle: "(?<=Calle ).*" Break down in street name and number of address
# TODO piso: "(?<=piso ).*," Break down in floor number and door number (or letter), without boundaries
# TODO titularidad: "(?<=TITULARIDAD\n\n).*" Break down in owner name, VAT number, % and domai type.
As you can see above, the 3 classes I've already wrote: PropertyNumber(object), PropertyCoefficient(object) and PropertyTaxIDNumber(object), has a lot of repeated code. Thus, when I add some regex patterns to each class will be worse.
Yes, you are repeating much of your code, and yes, it is a sign of a weak design. I'll take this as an OOP exercise, because this is an overkill.
First, we can see that the only difference between the different classes is their essence, and their regex pattern. So we can have a base class which handles all the repetitive code. Now each subclass simply handles the different pattern:
class BaseProperty(object):
def __init__(self, search_str, pattern):
self.text_to_search = search_str
self.text_found = ""
self.regex_pattern = re.compile(pattern)
def search_property(self):
matched_property = self.regex_pattern.search(self.text_to_search)
print(matched_property)
self.text_found = matched_property.group()
return self.text_found
class PropertyNumber(BaseProperty):
def __init__(self, search_str):
super(PropertyNumber, self).__init__(search_str, "(?<=FINCA Nº: )\w{3,6}")
class PropertyCoefficient(BaseProperty):
def __init__(self, search_str):
super(PropertyCoefficient, self).__init__(search_str, "(?<=Participación: )[0-9,]{1,8}")
Second, it doesn't appear that you're actually using the self.text_found field, so why store it? Now you can init all the properties in a single place, and make your scan_txt_report much simpler.
class BaseProperty(object):
def __init__(self, pattern):
self.regex_pattern = re.compile(pattern)
def search_property(self, search_str):
matched_property = self.regex_pattern.search(search_str)
print(matched_property)
return matched_property.group()
...
class PropertyCoefficient(BaseProperty):
def __init__(self):
super(PropertyCoefficient, self).__init__("(?<=Participación: )[0-9,]{1,8}")
properties = [PropertyNumber(), PropertyCoefficient(), ...]
def scan_txt_report(fli):
file_input = open(fli, mode='r', encoding='utf-8')
property_report = file_input.read()
data_retrieved = [prop.search_property(property_report) for prop in properties]
return data_retrieved
And unless you add some specific functionality for each subclass, you can even let go of the specific properties classes, and just do like this:
properties = [BaseProperty("(?<=FINCA Nº: )\w{3,6}"), BaseProperty("(?<=Participación: )[0-9,]{1,8}")]
And one last thing - please see the comment by #JonClements - it's a bad idea to use reserved words (such as str) as variable names.
There is no need for so many classes.It can be done via two classes.
Class Property(object,regex):
#def __init__ ...
#def prepare (This method will prepare return compiled form of regex
Class Search(object,compiled_regex):
#def __init__ ...
#def search ... (same function as now)
def scan_txt_report(fli):
data_retrieved = []
file_input = open(fli, mode='r', encoding='utf-8')
#take a csv containing all the regex.
#for all regex call property and search classes.keep appending results as well.
return data_retrieved
This way the only thing we need to change is the csv.The program remains intact and tested.
For adding new regex's the csv needs to be updated.

Class variable dictionary not saving with pickle.dump in python 2.7

I am using pickle to save an object graph by dumping the root. When I load the root it has all the instance variables and connected object nodes. However I am saving all the nodes in a class variable of type dictionary. The class variable is full before being saved but after I unpickle the data it is empty.
Here is the class I am using:
class Page():
__crawled = {}
def __init__(self, title = '', link = '', relatedURLs = []):
self.__title = title
self.__link = link
self.__relatedURLs = relatedURLs
self.__related = []
#property
def relatedURLs(self):
return self.__relatedURLs
#property
def title(self):
return self.__title
#property
def related(self):
return self.__related
#property
def crawled(self):
return self.__crawled
def crawl(self,url):
if url not in self.__crawled:
webpage = urlopen(url).read()
patFinderTitle = re.compile('<title>(.*)</title>')
patFinderLink = re.compile('<link rel="canonical" href="([^"]*)" />')
patFinderRelated = re.compile('<li><a href="([^"]*)"')
findPatTitle = re.findall(patFinderTitle, webpage)
findPatLink = re.findall(patFinderLink, webpage)
findPatRelated = re.findall(patFinderRelated, webpage)
newPage = Page(findPatTitle,findPatLink,findPatRelated)
self.__related.append(newPage)
self.__crawled[url] = newPage
else:
self.__related.append(self.__crawled[url])
def crawlRelated(self):
for link in self.__relatedURLs:
self.crawl(link)
I save it like such:
with open('medTwiceGraph.dat','w') as outf:
pickle.dump(root,outf)
and I load it like such:
def loadGraph(filename): #returns root
with open(filename,'r') as inf:
return pickle.load(inf)
root = loadGraph('medTwiceGraph.dat')
All the data loads except for the class variable __crawled.
What am I doing wrong?
Python doesn't really pickle class objects. It simply saves their names and where to find them. From the documentation of pickle:
Similarly, classes are pickled by named reference, so the same
restrictions in the unpickling environment apply. Note that none of
the class’s code or data is pickled, so in the following example the
class attribute attr is not restored in the unpickling environment:
class Foo:
attr = 'a class attr'
picklestring = pickle.dumps(Foo)
These restrictions are why picklable functions and classes must be
defined in the top level of a module.
Similarly, when class instances are pickled, their class’s code and
data are not pickled along with them. Only the instance data are
pickled. This is done on purpose, so you can fix bugs in a class or
add methods to the class and still load objects that were created with
an earlier version of the class. If you plan to have long-lived
objects that will see many versions of a class, it may be worthwhile
to put a version number in the objects so that suitable conversions
can be made by the class’s __setstate__() method.
In your example you could fix your problems changing __crawled to be an instance attribute or a global variable.
By default pickle will only use the contents of self.__dict__ and not use self.__class__.__dict__ which is what you think you want.
I say, "what you think you want" because unpickling an instance should not mutate class level sate.
If you want to change this behavior then look at __getstate__ and __setstate__ in the docs
For anyone interested, what I did was make a superclass Graph which contained an instance variable __crawled and moved my crawling functions into Graph. Page now only contains attributes describing the page and its related pages. I pickle my instance of Graph which contains all my instances of Page. Here is my code.
from urllib import urlopen
#from bs4 import BeautifulSoup
import re
import pickle
###################CLASS GRAPH####################
class Graph(object):
def __init__(self,roots = [],crawled = {}):
self.__roots = roots
self.__crawled = crawled
#property
def roots(self):
return self.__roots
#property
def crawled(self):
return self.__crawled
def crawl(self,page,url):
if url not in self.__crawled:
webpage = urlopen(url).read()
patFinderTitle = re.compile('<title>(.*)</title>')
patFinderLink = re.compile('<link rel="canonical" href="([^"]*)" />')
patFinderRelated = re.compile('<li><a href="([^"]*)"')
findPatTitle = re.findall(patFinderTitle, webpage)
findPatLink = re.findall(patFinderLink, webpage)
findPatRelated = re.findall(patFinderRelated, webpage)
newPage = Page(findPatTitle,findPatLink,findPatRelated)
page.related.append(newPage)
self.__crawled[url] = newPage
else:
page.related.append(self.__crawled[url])
def crawlRelated(self,page):
for link in page.relatedURLs:
self.crawl(page,link)
def crawlAll(self,obj,limit = 2,i = 0):
print 'number of crawled pages:', len(self.crawled)
i += 1
if i > limit:
return
else:
for rel in obj.related:
print 'crawling', rel.title
self.crawlRelated(rel)
for rel2 in obj.related:
self.crawlAll(rel2,limit,i)
def loadGraph(self,filename):
with open(filename,'r') as inf:
return pickle.load(inf)
def saveGraph(self,obj,filename):
with open(filename,'w') as outf:
pickle.dump(obj,outf)
###################CLASS PAGE#####################
class Page(Graph):
def __init__(self, title = '', link = '', relatedURLs = []):
self.__title = title
self.__link = link
self.__relatedURLs = relatedURLs
self.__related = []
#property
def relatedURLs(self):
return self.__relatedURLs
#property
def title(self):
return self.__title
#property
def related(self):
return self.__related
####################### MAIN ######################
def main(seed):
print 'doing some work...'
webpage = urlopen(seed).read()
patFinderTitle = re.compile('<title>(.*)</title>')
patFinderLink = re.compile('<link rel="canonical" href="([^"]*)" />')
patFinderRelated = re.compile('<li><a href="([^"]*)"')
findPatTitle = re.findall(patFinderTitle, webpage)
findPatLink = re.findall(patFinderLink, webpage)
findPatRelated = re.findall(patFinderRelated, webpage)
print 'found the webpage', findPatTitle
#root = Page(findPatTitle,findPatLink,findPatRelated)
G = Graph([Page(findPatTitle,findPatLink,findPatRelated)])
print 'crawling related...'
G.crawlRelated(G.roots[0])
G.crawlAll(G.roots[0])
print 'now saving...'
G.saveGraph(G, 'medTwiceGraph.dat')
print 'done'
return G
#####################END MAIN######################
#'http://medtwice.com/am-i-pregnant/'
#'medTwiceGraph.dat'
#G = main('http://medtwice.com/menopause-overview/')
#print G.crawled
def loadGraph(filename):
with open(filename,'r') as inf:
return pickle.load(inf)
G = loadGraph('MedTwiceGraph.dat')
print G.roots[0].title
print G.roots[0].related
print G.crawled
for key in G.crawled:
print G.crawled[key].title
Using dill can solve this problem.
dill package: https://pypi.python.org/pypi/dill
reference: https://stackoverflow.com/a/28543378/6301132
According Asker's code, into this:
#notice:open the file in binary require
#save
with open('medTwiceGraph.dat','wb') as outf:
dill.dump(root,outf)
#load
def loadGraph(filename): #returns root
with open(filename,'rb') as inf:
return dill.load(inf)
root = loadGraph('medTwiceGraph.dat')
I wrote another example:
#Another example (with Python 3.x)
import dill
import os
class Employee:
def __init__ (self ,name='',contact={}) :
self.name = name
self.contact = contact
def print_self(self):
print(self.name, self.contact)
#save
def save_employees():
global emp
with open('employees.dat','wb') as fh:
dill.dump(emp,fh)
#load
def load_employees():
global emp
if os.path.exists('employees.dat'):
with open('employees.dat','rb') as fh:
emp=dill.load(fh)
#---
emp=[]
load_employees()
print('loaded:')
for tmpe in emp:
tmpe.print_self()
e=Employee() #new employee
if len(emp)==0:
e.name='Jack'
e.contact={'phone':'+086-12345678'}
elif len(emp)==1:
e.name='Jane'
e.contact={'phone':'+01-15555555','email':'a#b.com'}
else:
e.name='sb.'
e.contact={'telegram':'x'}
emp.append(e)
save_employees()

Categories