python unpickling EOFError when Reading file - python

I've written a basic storage class that holds data in a list of lists writes it to a file and then allows it to be read back from the file for persistence. It was working on the system it was originally written (windows 7) I moved it to a second system (windows 8) and now it seems to be malfunctioning.
The error I'm receiving is:
Traceback (most recent call last):
File "D:\Code Vault\pydb.py", line 69, in <module>
print Users.ReadData()
File "D:\Code Vault\pydb.py", line 23, in ReadData
self.data = cPickle.load(self.Inputstream)
EOFError
The full code for the class is as follows
class table():
#TODO Insert Key Feild to facilitate ID Search
def __init__(self,*args):
self.tablename = args[0]
self.data = []
self.colnames = {}
self.filename = str(self.tablename)+ ".p"
for e in args[1:]:
self.colnames.update({e:len(self.colnames)})
self.Filestorage = open(self.filename, "wb" )
self.Filestorage.close()
def ReadData(self):
self.Inputstream = open(self.filename, "rb")
self.Inputstream.seek(0)
self.data = cPickle.load(self.Inputstream)
self.Inputstream.close()
return self.data
def AddData (self, *args):
self.tempdata = []
for e in args:
self.tempdata.append(e)
if len(args) == len(self.colnames):
self.data.append(self.tempdata)
else:
return "Incorrect argument quantities"
return self.tempdata
def Commit(self):
self.Outputstream = open(self.filename, "ab" )
cPickle.dump(self.data, self.Outputstream)
self.Outputstream.close()
return self.ReadData()
def Search(self,query):
for e in self.ReadData():
if query in e:
return self.ReadData().index(e)
else:
return "Value " + str(query) + " does not exist"
def Update(self,query, edit):
for e in self.data:
if query in e:
e[e.index(query)] = edit
return self.data
def DumpTable(self):
self.data = []
self.Outputstream = open(self.filename+".p", "wb" )
cPickle.dump(self.data, self.Outputstream)
self.Outputstream.close()
self.ReadData()
Users = table("Users","Username","Password")
Users.AddData("bob","123456")
# Users.Commit()
# print Users.data
# print Users.filename
# print Users.data
print Users.ReadData()
It's probably a case of code blindness as it was working previously. Any Ideas would be appreciated

The amended code that answers the question.
import pickle
class table():
#TODO Insert Key Feild to facilitate ID Search
def __init__(self,*args):
self.tablename = args[0]
self.data = []
self.colnames = {}
self.filename = str(self.tablename)+ ".p"
for e in args[1:]:
self.colnames.update({e:len(self.colnames)})
try: #creating this conditional prevented the contents being erased upon instantiating the class
f = open(self.filename,"rb")
except IOError:
self.createfile = open(self.filename, "wb" )
self.createfile.close()
def ReadData(self):
self.Inputstream = open(self.filename, "rb")
self.data = pickle.load(self.Inputstream)
self.Inputstream.close()
return self.data
def AddData (self, *args):
self.tempdata = []
for e in args:
self.tempdata.append(e)
if len(args) == len(self.colnames):
self.data.append(self.tempdata)
else:
return "Incorrect argument quantities"
return self.tempdata
def Commit(self):
self.Outputstream = open(self.filename, "wb" )
pickle.dump(self.data, self.Outputstream)
self.Outputstream.close()
def createfile(self):
self.createfile = open(self.filename, "wb" )
self.createfile.close()
def Search(self,query):
for e in self.ReadData():
if query in e:
return self.ReadData().index(e)
else:
return "Value " + str(query) + " does not exist"
def Update(self,query, edit):
for e in self.data:
if query in e:
e[e.index(query)] = edit
return self.data
def DumpTable(self):
self.data = []
self.Outputstream = open(self.filename+".p", "wb" )
pickle.dump(self.data, self.Outputstream)
self.Outputstream.close()
self.ReadData()

The solution seems to be here pickle.load() raising EOFError in Windows
i.e. file needs to be open for read on binary mode with "rb" flag.

Related

ValueError: filedescriptor out of range in select() while using thrift?

I'm tring to provide a remote server by thrift, and below is my server-side trial:
from test_thrift.test_server.TestService import Iface,Processor
from thrift.Thrift import TType,TException
from thrift.Thrift import TProcessor
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from thrift.server import TNonblockingServer
port = int(config["server"]["port"])
handler = SmbService()
processor = Processor(handler)
socket = TSocket.TServerSocket(port=port)
server = TNonblockingServer.TNonblockingServer(processor, socket)
server.setNumThreads(100)
server.serve()
At my client-side, for every request I will create a new connect and close it after got response from server.
from thrift.transport.TSocket import TTransportException
from thrift.Thrift import TException
from thrift.protocol import TBinaryProtocol, TCompactProtocol
from thrift.transport import TTransport
from thrift.transport import TSocket
from thrift import Thrift
from test_thrift.test_server import TestService
class SmbClient(object):
def __init__(self):
try:
self.tsocket = TSocket.TSocket(settings.THRIFT_HOST, settings.THRIFT_PORT_PRODUCT)
self.transportFactory = TTransport.TFramedTransportFactory()
self.transport = self.transportFactory.getTransport(self.tsocket)
self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
self.client = TestService.Client(self.protocol)
self.transport.open()
except Thrift.TException as e:
self.logger.info(e)
class BaseService(object):
def __init__(self):
self.commonLogger = logging.getLogger('ads')
self.header = "header111"
def search_ads_list(self, request, account_id):
self.smbClient.client.getFBAdsByAccount(param1, param2)
def __enter__(self):
self.smbClient = SmbClient()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.smbClient.transport.close()
ech request call is like this:
with BaseService() as BaseSingleService:
status, data, info = BaseSingleService.search_ads_list(
request, account_id)
The actual amount of requests from client are not big enough , but after period of time. I got error like :
Traceback (most recent call last):
File "/home/xxx/xxx/src/smb_thrift_server.py", line 2200, in <module>
server.serve()
File "/home/xxx/xxx/venv3/lib/python3.5/site-packages/thrift/server/TNonblockingServer.py", line 350, in serve
self.handle()
File "/home/xxx/xxxx/venv3/lib/python3.5/site-packages/thrift/server/TNonblockingServer.py", line 310, in handle
rset, wset, xset = self._select()
File "/home/luban/smb_thrift_server/venv3/lib/python3.5/site-packages/thrift/server/TNonblockingServer.py", line 302, in _select
return select.select(readable, writable, readable)
ValueError: filedescriptor out of range in select()
Why error happended as I've closed the client connect each time after receiving the response?
update TestService codes below:
#
# Autogenerated by Thrift Compiler (0.9.1)
#
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
#
# options string: py:new_style
#
from thrift.Thrift import TType, TMessageType, TException, TApplicationException
from .ttypes import *
from thrift.Thrift import TProcessor
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol, TProtocol
try:
from thrift.protocol import fastbinary
except:
fastbinary = None
class Iface(object):
"""..."""
def getFBAdsByAccount(self, header, account_id, effective_status, access_token):
"""
Parameters:
- header
- account_id
- effective_status
- access_token
"""
pass
"""..."""
class Client(Iface):
def __init__(self, iprot, oprot=None):
self._iprot = self._oprot = iprot
if oprot is not None:
self._oprot = oprot
self._seqid = 0
def getFBAdsByAccount(self, header, account_id, effective_status, access_token):
"""
Parameters:
- header
- account_id
- effective_status
- access_token
"""
self.send_getFBAdsByAccount(header, account_id, effective_status, access_token)
return self.recv_getFBAdsByAccount()
def send_getFBAdsByAccount(self, header, account_id, effective_status, access_token):
self._oprot.writeMessageBegin('getFBAdsByAccount', TMessageType.CALL, self._seqid)
args = getFBAdsByAccount_args()
args.header = header
args.account_id = account_id
args.effective_status = effective_status
args.access_token = access_token
args.write(self._oprot)
self._oprot.writeMessageEnd()
self._oprot.trans.flush()
def recv_getFBAdsByAccount(self):
(fname, mtype, rseqid) = self._iprot.readMessageBegin()
if mtype == TMessageType.EXCEPTION:
x = TApplicationException()
x.read(self._iprot)
self._iprot.readMessageEnd()
raise x
result = getFBAdsByAccount_result()
result.read(self._iprot)
self._iprot.readMessageEnd()
if result.success is not None:
return result.success
if result.e is not None:
raise result.e
raise TApplicationException(TApplicationException.MISSING_RESULT, "getFBAdsByAccount failed: unknown result");
class Processor(Iface, TProcessor):
def __init__(self, handler):
self._handler = handler
self._processMap = {}
self._processMap["getFBAdsByAccount"] = Processor.process_getFBAdsByAccount
def process(self, iprot, oprot):
(name, type, seqid) = iprot.readMessageBegin()
if name not in self._processMap:
iprot.skip(TType.STRUCT)
iprot.readMessageEnd()
x = TApplicationException(TApplicationException.UNKNOWN_METHOD, 'Unknown function %s' % (name))
oprot.writeMessageBegin(name, TMessageType.EXCEPTION, seqid)
x.write(oprot)
oprot.writeMessageEnd()
oprot.trans.flush()
return
else:
self._processMap[name](self, seqid, iprot, oprot)
return True
def process_getFBAdsByAccount(self, seqid, iprot, oprot):
args = getFBAdsByAccount_args()
args.read(iprot)
iprot.readMessageEnd()
result = getFBAdsByAccount_result()
try:
result.success = self._handler.getFBAdsByAccount(args.header, args.account_id, args.effective_status, args.access_token)
except test_thrift.common.ttypes.ServerException as e:
result.e = e
oprot.writeMessageBegin("getFBAdsByAccount", TMessageType.REPLY, seqid)
result.write(oprot)
oprot.writeMessageEnd()
oprot.trans.flush()
class getFBAdsByAccount_args(object):
"""
Attributes:
- header
- account_id
- effective_status
- access_token
"""
thrift_spec = (
None, # 0
(1, TType.STRUCT, 'header', (test_thrift.common.ttypes.RequestHeader, test_thrift.common.ttypes.RequestHeader.thrift_spec), None, ), # 1
(2, TType.STRING, 'account_id', None, None, ), # 2
(3, TType.LIST, 'effective_status', (TType.STRING,None), None, ), # 3
(4, TType.STRING, 'access_token', None, None, ), # 4
)
def __init__(self, header=None, account_id=None, effective_status=None, access_token=None,):
self.header = header
self.account_id = account_id
self.effective_status = effective_status
self.access_token = access_token
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec))
return
iprot.readStructBegin()
while True:
(fname, ftype, fid) = iprot.readFieldBegin()
if ftype == TType.STOP:
break
if fid == 1:
if ftype == TType.STRUCT:
self.header = test_thrift.common.ttypes.RequestHeader()
self.header.read(iprot)
else:
iprot.skip(ftype)
elif fid == 2:
if ftype == TType.STRING:
self.account_id = iprot.readString();
else:
iprot.skip(ftype)
elif fid == 3:
if ftype == TType.LIST:
self.effective_status = []
(_etype24, _size21) = iprot.readListBegin()
for _i25 in range(_size21):
_elem26 = iprot.readString();
self.effective_status.append(_elem26)
iprot.readListEnd()
else:
iprot.skip(ftype)
elif fid == 4:
if ftype == TType.STRING:
self.access_token = iprot.readString();
else:
iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
iprot.readStructEnd()
def write(self, oprot):
if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None:
oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))
return
oprot.writeStructBegin('getFBAdsByAccount_args')
if self.header is not None:
oprot.writeFieldBegin('header', TType.STRUCT, 1)
self.header.write(oprot)
oprot.writeFieldEnd()
if self.account_id is not None:
oprot.writeFieldBegin('account_id', TType.STRING, 2)
oprot.writeString(self.account_id)
oprot.writeFieldEnd()
if self.effective_status is not None:
oprot.writeFieldBegin('effective_status', TType.LIST, 3)
oprot.writeListBegin(TType.STRING, len(self.effective_status))
for iter27 in self.effective_status:
oprot.writeString(iter27)
oprot.writeListEnd()
oprot.writeFieldEnd()
if self.access_token is not None:
oprot.writeFieldBegin('access_token', TType.STRING, 4)
oprot.writeString(self.access_token)
oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
def validate(self):
return
def __repr__(self):
L = ['%s=%r' % (key, value)
for key, value in self.__dict__.items()]
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
def __eq__(self, other):
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
def __ne__(self, other):
return not (self == other)
class getFBAdsByAccount_result(object):
"""
Attributes:
- success
- e
"""
thrift_spec = (
(0, TType.STRUCT, 'success', (test_thrift.common.ttypes.ResponseListMap, test_thrift.common.ttypes.ResponseListMap.thrift_spec), None, ), # 0
(1, TType.STRUCT, 'e', (test_thrift.common.ttypes.ServerException, test_thrift.common.ttypes.ServerException.thrift_spec), None, ), # 1
)
def __init__(self, success=None, e=None,):
self.success = success
self.e = e
def read(self, iprot):
if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec))
return
iprot.readStructBegin()
while True:
(fname, ftype, fid) = iprot.readFieldBegin()
if ftype == TType.STOP:
break
if fid == 0:
if ftype == TType.STRUCT:
self.success = test_thrift.common.ttypes.ResponseListMap()
self.success.read(iprot)
else:
iprot.skip(ftype)
elif fid == 1:
if ftype == TType.STRUCT:
self.e = test_thrift.common.ttypes.ServerException()
self.e.read(iprot)
else:
iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
iprot.readStructEnd()
def write(self, oprot):
if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None:
oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))
return
oprot.writeStructBegin('getFBAdsByAccount_result')
if self.success is not None:
oprot.writeFieldBegin('success', TType.STRUCT, 0)
self.success.write(oprot)
oprot.writeFieldEnd()
if self.e is not None:
oprot.writeFieldBegin('e', TType.STRUCT, 1)
self.e.write(oprot)
oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
def validate(self):
return
def __repr__(self):
L = ['%s=%r' % (key, value)
for key, value in self.__dict__.items()]
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
def __eq__(self, other):
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
def __ne__(self, other):
return not (self == other)

Class inheritance type checking after pickling in Python

Is there a sure-fire way to check that the class of an object is a sub-class of the desired super?
For Example, in a migration script that I'm writing, I have to convert objects of a given type to dictionaries in a given manner to ensure two-way compatability of the data.
This is best summed up like so:
Serializable
User
Status
Issue
Test
Set
Step
Cycle
However, when I'm recursively checking objects after depickling, I receive a Test object that yields the following results:
Testing data object type:
type(data)
{type}< class'__main.Test' >
Testing Class type:
type(Test())
{type}< class'__main.Test' >
Testing object type against class type:
type(Test()) == type(data)
{bool}False
Testing if object isinstance() of Class:
isinstance(data, Test)
{bool}False
Testing if Class isinstance() of Super Class:
isinstance(Test(), Serializable)
{bool}True
Testing isinstance() of Super Class::
isinstance(data, Serializable)
{bool}False
Interestingly, it doesn't appear to have any such problem prior to pickling as it handles the creation of dictionary and integrity hash just fine.
This only crops up with depickled objects in both Pickle and Dill.
For Context, here's the code in it's native environment - the DataCache object that is pickled:
class DataCache(object):
_hash=""
_data = None
#staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
#staticmethod
def dictify(data):
if isinstance(data,list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data,(dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key,value in datahash.iteritems():
datahash[key]= DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict = {}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
Finally, I know there's arguments for using JSON for readability in storage, I needed Pickle's ability to convert straight to and from Objects without specifying the object type myself. (thanks to the nesting, it's not really feasible)
Am I going mad here or does pickling do something to the class definitions?
EDIT:
Minimal Implementation:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
from aenum import Enum
import json # _tricks
import base64
import argparse
import os
import sys
import datetime
import dill
import hashlib
import collections
class Serializable(object):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
def __str__(self):
return str(self.sortSelf())
def sortSelf(self):
return collections.OrderedDict(sorted(self.__dict__.items()))
def toDict(self):
return self.__dict__
def fromDict(self, dict):
# Not using __dict__.update(...) to avoid polluting objects with the excess data
varMap = self.__dict__
if dict and varMap:
for key in varMap:
if (key in dict):
varMap[key] = dict[key]
self.__dict__.update(varMap)
return self
return None
class Issue(Serializable):
def __init__(self, initDict={}):
self.id = 0
self.key = ""
self.fields = {}
if initDict:
self.__dict__.update(initDict)
Serializable.__init__(self)
def fieldToDict(self, obj, key, type):
if key in obj:
result = obj[key]
else:
return None
if result is None:
return None
if isinstance(result, type):
return result.toDict()
return result
def fromDict(self, jsonDict):
super(Issue, self).fromDict(jsonDict)
self.fields["issuetype"] = IssueType().fromDict(self.fields["issuetype"])
self.fields["assignee"] = User().fromDict(self.fields["assignee"])
self.fields["creator"] = User().fromDict(self.fields["creator"])
self.fields["reporter"] = User().fromDict(self.fields["reporter"])
return self
def toDict(self):
result = super(Issue, self).toDict()
blankKeys = []
for fieldName, fieldValue in self.fields.iteritems():
if fieldValue is None:
blankKeys.append(fieldName)
if blankKeys:
for key in blankKeys:
self.fields.pop(key, None)
result["fields"]["issuetype"] = self.fieldToDict(result["fields"], "issuetype", IssueType)
result["fields"]["creator"] = self.fieldToDict(result["fields"], "creator", User)
result["fields"]["reporter"] = self.fieldToDict(result["fields"], "reporter", User)
result["fields"]["assignee"] = self.fieldToDict(result["fields"], "assignee", User)
return result
class IssueType(Serializable):
def __init__(self):
self.id = 0
self.name = ""
def toDict(self):
return {"id": str(self.id)}
class Project(Serializable):
def __init__(self):
Serializable.__init__(self)
self.id = 0
self.name = ""
self.key = ""
class Cycle(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.totalExecutions = 0
self.endDate = ""
self.description = ""
self.totalExecuted = 0
self.started = ""
self.versionName = ""
self.projectKey = ""
self.versionId = 0
self.environment = ""
self.totalCycleExecutions = 0
self.build = ""
self.ended = ""
self.name = ""
self.modifiedBy = ""
self.projectId = 0
self.startDate = ""
self.executionSummaries = {'executionSummary': []}
class Step(Serializable):
def __init__(self):
self.id = ""
self.orderId = 0
self.step = ""
self.data = ""
self.result = ""
self.attachmentsMap = {}
def toDict(self):
dict = {}
dict["step"] = self.step
dict["data"] = self.data
dict["result"] = self.result
dict["attachments"] = []
return dict
class Status(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.description = ""
self.isFinal = True
self.color = ""
self.isNative = True
self.statusCount = 0
self.statusPercent = 0.0
class User(Serializable):
def __init__(self):
self.displayName = ""
self.name = ""
self.emailAddress = ""
self.key = ""
self.active = False
self.timeZone = ""
class Execution(Serializable):
def __init__(self):
self.id = 0
self.orderId = 0
self.cycleId = -1
self.cycleName = ""
self.issueId = 0
self.issueKey = 0
self.projectKey = ""
self.comment = ""
self.versionId = 0,
self.versionName = "",
self.executedOn = ""
self.creationDate = ""
self.executedByUserName = ""
self.assigneeUserName = ""
self.status = {}
self.executionStatus = ""
def fromDict(self, jsonDict):
super(Execution, self).fromDict(jsonDict)
self.status = Status().fromDict(self.status)
# This is already listed as Execution Status, need to associate and convert!
return self
def toDict(self):
result = super(Execution, self).toDict()
result['status'] = result['status'].toDict()
return result
class ExecutionContainer(Serializable):
def __init__(self):
self.executions = []
def fromDict(self, jsonDict):
super(ExecutionContainer, self).fromDict(jsonDict)
self.executions = []
for executionDict in jsonDict["executions"]:
self.executions.append(Execution().fromDict(executionDict))
return self
class Test(Issue):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
Issue.__init__(self)
def toDict(self):
result = super(Test, self).toDict()
stepField = "CustomField_0001"
if result["fields"][stepField]:
steps = []
for step in result["fields"][stepField]["steps"]:
steps.append(step.toDict())
result["fields"][stepField] = steps
return result
def fromDict(self, jsonDict):
super(Test, self).fromDict(jsonDict)
stepField = "CustomField_0001"
steps = []
if stepField in self.fields:
for step in self.fields[stepField]["steps"]:
steps.append(Step().fromDict(step))
self.fields[stepField] = {"steps": steps}
return self
class Set(Issue):
def __init__(self, initDict={}):
self.__dict__.update(initDict)
Issue.__init__(self)
class DataCache(object):
_hash = ""
_data = None
#staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
#staticmethod
def dictify(data):
if isinstance(data, list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data, (dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key, value in datahash.iteritems():
datahash[key] = DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict={}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
def saveCache(name, projectKey, object):
filePath = "migration_caches/{projectKey}".format(projectKey=projectKey)
if not os.path.exists(path=filePath):
os.makedirs(filePath)
cache = DataCache()
cache.set(object)
targetFile = open("{path}/{name}".format(name=name, path=filePath), 'wb')
dill.dump(obj=cache, file=targetFile)
targetFile.close()
def loadCache(name, projectKey):
filePath = "migration_caches/{projectKey}/{name}".format(name=name, projectKey=projectKey)
result = False
try:
targetFile = open(filePath, 'rb')
try:
cache = dill.load(targetFile)
if isinstance(cache, DataCache):
if cache.verify():
result = cache.get()
except EOFError:
# except BaseException:
print ("Failed to load cache from file: {filePath}\n".format(filePath=filePath))
except IOError:
("Failed to load cache file at: {filePath}\n".format(filePath=filePath))
targetFile.close()
return result
testIssue = Test().fromDict({"id": 1000,
"key": "TEST",
"fields": {
"issuetype": {
"id": 1,
"name": "TestIssue"
},
"assignee": "Minothor",
"reporter": "Minothor",
"creator": "Minothor",
}
})
saveCache("Test", "TestProj", testIssue)
result = loadCache("Test", "TestProj")
EDIT 2
The script in it's current form, now seems to work correctly with vanilla Pickle, (initially switched to Dill due to a similar issue, which was solved by the switch).
However, if you are here with this issue and require Dill's features, then as Mike noted in the comments - it's possible to change the settings in dill.settings to have Dill behave pickle referenced items only with joblib mode, effectively mirroring pickle's standard pickling behaviour.

How to parallelize a class method or share class object between processes in python?

I have created a class containing all its instances and need to parallelize the process of instantiation, but cannot solve the problem of sharing the class as a class object. Is it possible in python 2.7 using multiprocessing?
OUTPUT_HEADINGS = []
class MyContainer(object):
"""
"""
instances = []
children = []
#classmethod
def export_to_csv(cls):
with open(args.output, "w") as output_file:
f_csv = csv.DictWriter(output_file, fieldnames=OUTPUT_HEADINGS)
f_csv.writeheader()
for instance in cls.instances:
f_csv.writerow(instance.to_dict())
def __new__(cls, dat_file):
try:
tree = ElementTree.parse(dat_file)
cls.children = tree.findall("parent_element/child_element")
except ElementTree.ParseError as err:
logging.exception(err)
if not cls.children:
msg = ("{}: No \"parent_element/child_element\""
" element found".format(os.path.basename(dat_file)))
logging.warning(msg)
cls.children = []
return False
else:
instance = super(MyContainer, cls).__new__(cls, dat_file)
instance.__init__(dat_file)
cls.instances.append(instance)
cls.children = []
return True
def __init__(self, dat_file):
self._name = os.path.basename(dat_file)
self.attr_value_sum = defaultdict(list)
var1 = MyContainer.children[0].find("var1")
var2 = MyContainer.children[0].get("var2")
cat_name = "{}.{}".format(var1, var2)
if cat_name not in OUTPUT_HEADINGS:
OUTPUT_HEADINGS.append(cat_name)
# processing and summarizing of xml data
def to_dict(self):
return output_dict
def main():
i = 0
try:
for f in FILE_LIST:
i += 1
print "{}/{}: {} in progress...".format(i, len(FILE_LIST), f)
print "{}".format("...DONE" if MyContainer(f) else "...SKIPPED")
except Exception as err:
logging.exception(err)
finally:
MyContainer.export_to_csv()
if __name__ == '__main__':
FILE_LIST = []
for d in args.dirs:
FILE_LIST.extend(get_name_defined_files(dir_path=d,
pattern=args.filename,
recursive=args.recursive))
main()
I tried to use multiprocessing.managers.BaseManager, to create a proxy for MyContainer class, but it can only create an instance object this way. I want actually to parallelize the MyContainer(dat_file) call.

Preset Variable with Pickle

import time
from random import randint
import pickle
MaTC = 1
MaTC = pickle.dump(MaTC, open("MaTCFile.txt", "wb"))
AnTC = 2
AnTC = pickle.dump(AnTC, open("AnTCFile.txt", "wb"))
AuTC = 3
AuTC = pickle.dump(AuTC, open("AuTCFile.txt", "wb"))
AlTC = 3
AlTC = pickle.dump(AlTC, open("AlTCFile.txt", "wb"))
JaTC = 3
JaTC = pickle.dump(JaTC, open("JaTCFile.txt", "wb"))
print("Hello Jacob")
time.sleep(1)
print("Choose an option!")
print("(1) Selecting Cleaners")
print("(2) Edit Cleaning Information")
print("(3) Reserved")
MenuChoice = input(": ")
if MenuChoice == "1":
print("Selecting Cleaner Function")
elif MenuChoice == "2":
print("(1) Check cleaning info")
print("(2) Enter New Cleaning info")
InfoSelect = input(": ")
if InfoSelect == "2":
Mare = { "Mares Times Cleaned": MaTC}
Andre = { "Andres Times Cleaned": AnTC}
Al = { "Als Times Cleaned": AlTC}
Austin = { "Austins Times Cleaned": AuTC}
Jacob = { "Jacobs Times Cleaned": JaTC}
pickle.dump( Mare, open ("MaresFile.txt", "wb"))
pickle.dump( Jacob, open ("JacobsFile.txt", "wb"))
pickle.dump( Andre, open ("AndresFile.txt", "wb"))
pickle.dump( Austin,open ("AustinsFile.txt", "wb"))
pickle.dump( Al, open ("AlsFile.txt", "wb"))
print(Mare)
print(Austin)
print(Jacob)
print(Andre)
print(Al)
Okay so basically what I am trying to achieve is have the MaTC, AnTC, AuTC, AlTC, and JaTC Variables be preset as 1, 2, 3, 3, and 3 at the first time running the program. But when I want to add 2 to say MaTC making it 3, When I start the program again I want it to = 3 on startup. I just started python a few days ago, I would love the feedback!
Use exceptions to handle the case where the file does not exist (i.e. first run of the program)
try:
f = open("MaTCFile.dat", "rb") # Pickle is not a .txt format
MaTC = pickle.load(f)
except IOError:
# Initialise default value
MaTC = 1
I would also be inclined to store all the variables in a single file using a dict:
default_data = {
"MaTC": 1, "AnTC": 2, # ... etc.
as this would make the program more manageable.
Here is a refactored version of your program. It uses the pickle module and demonstrates usage of pickletools, zlib, and a few other module. Hopefully, the code will help you further along in writing your program. One subject that you probably will want to research in the future is that of databases.
import os
import pickle
import pickletools
import sys
import zlib
SETTINGS_FILE = 'settings.sav'
def main():
"""Present the user with a cleaning application."""
settings = load_settings(SETTINGS_FILE)
print('Hello, Jacob!')
while True:
if show_menu(settings,
select_cleaners,
edit_cleaning_information,
reserved,
close_program):
break
settings.save(SETTINGS_FILE)
def load_settings(path):
"""Create the settings schema and load them from file is possible."""
settings = Namespace(mares=Parameter(1, is_positive_int),
andres=Parameter(2, is_positive_int),
austin=Parameter(3, is_positive_int),
al=Parameter(3, is_positive_int),
jacob=Parameter(3, is_positive_int))
if os.path.isfile(path):
settings.load(path)
return settings
def is_positive_int(value):
"""Ensure that the value is valid for the settings."""
return isinstance(value, int) and value >= 0
def show_menu(context, *args):
"""Help display a menu to the user."""
for option, function in enumerate(args, 1):
print(option, '-', function.__doc__)
while True:
number = get_int('Please enter an option: ') - 1
if 0 <= number < len(args):
return args[number](context)
else:
print('Your number was out of range.')
def get_int(prompt):
"""Get a valid number from the user."""
while True:
try:
text = input(prompt)
except EOFError:
sys.exit()
else:
try:
return int(text)
except ValueError:
print('You did not enter an integer.')
def select_cleaners(context):
"""Choose this to select your cleaner."""
print('Selecting Cleaning Function')
def edit_cleaning_information(context):
"""You can edit the cleaning information."""
show_menu(context, check_cleaning_info, enter_new_cleaning_info)
def reserved(context):
"""This is reserved for future use."""
print('NOT AVAILABLE')
def close_program(context):
"""Close the program."""
return True
def check_cleaning_info(context):
"""Show all cleaning info."""
print('Mares:', context.mares)
print('Andres:', context.andres)
print('Austin:', context.austin)
print('Al:', context.al)
print('Jacob:', context.jacob)
def enter_new_cleaning_info(context):
"""Enter in additional cleaning information."""
while True:
name = input('Who needs cleaning info adjusted? ').capitalize()
if name == 'Mares':
context.mares += get_int('Add to Mares: ')
elif name == 'Andres':
context.andres += get_int('Add to Andres: ')
elif name == 'Austin':
context.austin += get_int('Add to Austin: ')
elif name == 'Al':
context.al += get_int('Add to Al: ')
elif name == 'Jacob':
context.jacob += get_int('Add to Jacob: ')
else:
continue
break
###############################################################################
class _Settings:
"""_Settings(*args, **kwargs) -> _Settings instance"""
#staticmethod
def _save(path, obj):
"""Save an object to the specified path."""
data = zlib.compress(pickletools.optimize(pickle.dumps(obj)), 9)
with open(path, 'wb') as file:
file.write(data)
#staticmethod
def _load(path):
"""Load an object from the specified path."""
with open(path, 'rb') as file:
data = file.read()
return pickle.loads(zlib.decompress(data))
class Namespace(_Settings):
"""Namespace(**schema) -> Namespace instance"""
def __init__(self, **schema):
"""Initialize the Namespace instance with a schema definition."""
self.__original, self.__dynamic, self.__static, self.__owner = \
{}, {}, {}, None
for name, value in schema.items():
if isinstance(value, _Settings):
if isinstance(value, Namespace):
if value.__owner is not None:
raise ValueError(repr(name) + 'has an owner!')
value.__owner = self
self.__original[name] = value
else:
raise TypeError(repr(name) + ' has bad type!')
def __setattr__(self, name, value):
"""Set a named Parameter with a given value to be validated."""
if name in {'_Namespace__original',
'_Namespace__dynamic',
'_Namespace__static',
'_Namespace__owner',
'state'}:
super().__setattr__(name, value)
elif '.' in name:
head, tail = name.split('.', 1)
self[head][tail] = value
else:
attr = self.__original.get(name)
if not isinstance(attr, Parameter):
raise AttributeError(name)
attr.validate(value)
if value == attr.value:
self.__dynamic.pop(name, None)
else:
self.__dynamic[name] = value
def __getattr__(self, name):
"""Get a Namespace or Parameter value by its original name."""
if '.' in name:
head, tail = name.split('.', 1)
return self[head][tail]
if name in self.__dynamic:
return self.__dynamic[name]
attr = self.__original.get(name)
if isinstance(attr, Namespace):
return attr
if isinstance(attr, Parameter):
return attr.value
raise AttributeError(name)
__setitem__ = __setattr__
__getitem__ = __getattr__
def save(self, path):
"""Save the state of the entire Namespace tree structure."""
if isinstance(self.__owner, Namespace):
self.__owner.save(path)
else:
self._save(path, {Namespace: self.state})
def load(self, path):
"""Load the state of the entire Namespace tree structure."""
if isinstance(self.__owner, Namespace):
self.__owner.load(path)
else:
self.state = self._load(path)[Namespace]
def __get_state(self):
"""Get the state of this Namespace and any child Namespaces."""
state = {}
for name, types in self.__static.items():
box = state.setdefault(name, {})
for type_, value in types.items():
box[type_] = value.state if type_ is Namespace else value
for name, value in self.__original.items():
box = state.setdefault(name, {})
if name in self.__dynamic:
value = self.__dynamic[name]
elif isinstance(value, Parameter):
value = value.value
else:
box[Namespace] = value.state
continue
box.setdefault(Parameter, {})[type(value)] = value
return state
def __set_state(self, state):
"""Set the state of this Namespace and any child Namespaces."""
dispatch = {Namespace: self.__set_namespace,
Parameter: self.__set_parameter}
for name, box in state.items():
for type_, value in box.items():
dispatch[type_](name, value)
def __set_namespace(self, name, state):
"""Set the state of a child Namespace."""
attr = self.__original.get(name)
if not isinstance(attr, Namespace):
attr = self.__static.setdefault(name, {})[Namespace] = Namespace()
attr.state = state
def __set_parameter(self, name, state):
"""Set the state of a child Parameter."""
attr = self.__original.get(name)
for type_, value in state.items():
if isinstance(attr, Parameter):
try:
attr.validate(value)
except TypeError:
pass
else:
if value == attr.value:
self.__dynamic.pop(name, None)
else:
self.__dynamic[name] = value
continue
if not isinstance(value, type_):
raise TypeError(repr(name) + ' has bad type!')
self.__static.setdefault(name, {}).setdefault(
Parameter, {})[type_] = value
state = property(__get_state, __set_state, doc='Namespace state property.')
class Parameter(_Settings):
"""Parameter(value, validator=lambda value: True) -> Parameter instance"""
def __init__(self, value, validator=lambda value: True):
"""Initialize the Parameter instance with a value to validate."""
self.__value, self.__validator = value, validator
self.validate(value)
def validate(self, value):
"""Check that value has same type and passes validator."""
if not isinstance(value, type(self.value)):
raise TypeError('Value has a different type!')
if not self.__validator(value):
raise ValueError('Validator failed the value!')
#property
def value(self):
"""Parameter value property."""
return self.__value
###############################################################################
if __name__ == '__main__':
main()

Proper multiprocess-safe file locking in python

I'm trying to implement thread- and multiprocess-safe disk cache for small application. It is only required to run on modern linux distributions, no compatibility with Windows or OS X is required.
Here is the code:
class LockedOpen(object):
READ = 0
WRITE = 1
def __init__(self, filename, mode=READ, block=True):
self._filename = filename
if mode == LockedOpen.READ:
self._open_mode = os.O_RDONLY
self._lock_op = fcntl.LOCK_SH
self._open_mode_str = 'rb'
else:
self._open_mode = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
self._lock_op = fcntl.LOCK_EX
self._open_mode_str = 'wb'
if not block:
self._lock_op = self._lock_op | fcntl.LOCK_NB
def __enter__(self,):
self._fd = os.open(self._filename, self._open_mode)
fcntl.flock(self._fd, self._lock_op)
self._file_obj = os.fdopen(self._fd, self._open_mode_str)
return self._file_obj
def __exit__(self, exc_type, exc_val, exc_tb):
self._file_obj.flush()
os.fdatasync(self._fd)
fcntl.flock(self._fd, fcntl.LOCK_UN)
self._file_obj.close()
class DiskCache(object):
def __init__(self, dirname):
self._dir = dirname
def _filename_from_key(self, key):
return os.path.join(self._dir, key)
def put(self, key, value):
with LockedOpen(self._filename_from_key(key), LockedOpen.WRITE) as f:
f.write(value)
def get(self, key):
with LockedOpen(self._filename_from_key(key)) as f:
return f.read()
def delete(self, key):
os.unlink(self._filename_from_key(key))
The question is, am I doing this right? Are there any errors/caveats/implications with this code that I should know about? Any advice regarding performance is welcome as well.
Thanks~

Categories