I am trying to run a static method on one of my classes in my Django application. I am using the multiprocessing module to make this task a little faster since the method will be iterating over a large amount of objects in my database. It works fine when I run it locally, but when I run it in production I get this pickling error...
Quick Note: I using python3.6 locally, and python3.4 in production. Would this affect the pickling of my objects ?
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "/home/socialauto/social-auto-web/vehicle/models.py", line 193, in image_existence_check
p.map(Vehicle.check_image, enumerate(vehicles))
File "/usr/lib/python3.4/multiprocessing/pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.4/multiprocessing/pool.py", line 599, in get
raise self._value
File "/usr/lib/python3.4/multiprocessing/pool.py", line 383, in _handle_tasks
put(task)
File "/usr/lib/python3.4/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib/python3.4/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <function Vehicle.check_image at 0x7f49974d5268>: attribute lookup check_image on vehicle.models failed
Por Que
Model method:
#staticmethod
def check_image(veh):
index = veh[0]
print(index)
veh = veh[1]
try:
images = veh.images.all()
if images.count() == 1:
image = images[0]
response = requests.get(image.image_url)
if response.status_code == 200:
veh.has_image = True
else:
veh.has_image = False
elif images.count() > 1:
has_image = True
for img in images:
response = requests.get(img.image_url)
if response != 200:
has_image = False
veh.has_image = has_image
else:
veh.has_image = False
veh.save()
except Exception as e:
logging.error(e)
pass
#staticmethod
def image_existence_check():
from time import time
from multiprocessing.pool import Pool
ts = time()
vehicles = Vehicle.objects.all()[:100]
# map(lambda (i, x): {'name': x, 'rank': i}, enumerate(ranked_users))
with Pool(10) as p:
print('Beginning')
p.map(Vehicle.check_image, enumerate(vehicles))
print('Took {}s'.format(time() - ts))
Related
I want to create a class Storage where each object has a dictionary orderbooks as a property.
I want to write on orderbooks from the main process by invoking the method write, but I want to defer this action to another process and ensuring that the dictionary orderbooks is accessible from the main process.
To do so, I create a Mananger() that I pass during the definition of the object and that is used to notify the processes about the changes of the dictionary. My code is the following:
from multiprocessing import Process, Manager
class Storage():
def __init__(self,manager):
self.manager = manager
self.orderbooks = self.manager.dict()
def store_value(self,el):
self.orderbooks[el[0]] = el[1]
def write(self,el:list):
p = Process(target=self.store_value,args=(el,))
p.start()
if __name__ == '__main__':
manager=Manager()
book1 = Storage(manager)
book1.write([0,1])
However, when I run this code, I get the following error
Traceback (most recent call last):
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 21, in <module>
book1.write([0,1])
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 13, in write
p.start()
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
What is wrong with my code?
Per Aaron's posted comment:
from multiprocessing import Process, Manager
class Storage():
def __init__(self, orderbooks):
self.orderbooks = orderbooks
def store_value(self, el):
self.orderbooks[el[0]] = el[1]
def write(self, el: list):
p = Process(target=self.store_value, args=(el,))
p.start()
# Ensure we do not return until store_value has
# completed updating the dictionary:
p.join()
if __name__ == '__main__':
manager = Manager()
orderbooks = manager.dict()
book1 = Storage(orderbooks)
book1.write([0, 1])
print(orderbooks)
Prints:
{0: 1}
I am trying to use multiprocessing in a class in the following code:
class test:
def __init__(self):
return
global calc_corr
#staticmethod
def calc_corr(idx, df1, df2):
arr1 = df1.iloc[idx:idx+5, :].values.flatten('F')
arr2 = df2.iloc[idx:idx+5, :].values.flatten('F')
df_tmp = pd.DataFrame([arr1, arr2]).T
df_tmp.dropna(how='any', inplace=True)
corr = df_tmp.corr().iloc[0, 1]
return corr
def aa(self):
df1 = pd.DataFrame(np.random.normal(size=(100, 6)))
df2 = pd.DataFrame(np.random.normal(size=(100, 6)))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)]
for f in concurrent.futures.as_completed(results):
print(f.result())
if __name__ == '__main__':
t = test()
t.aa()
I am using a #staticmethod because it is not related to the class, it's just a computing tool. But using it raises the following error when running the code:
D:\anaconda3\python.exe C:/Users/jonas/Desktop/728_pj/test.py
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\jonas\Desktop\728_pj\test.py", line 31, in <module>
t.aa()
File "C:\Users\jonas\Desktop\728_pj\test.py", line 26, in aa
print(f.result())
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
Process finished with exit code 1
Can anyone help me fix this?
I think it is somehow caused by the staticmethod being declared as global. When I tried removing the global calc_corr line and changing
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)] to
results = [executor.submit(self.calc_corr, i, df1, df2) for i in range(20)] it seemed to work fine. I'm not actually sure of the reason what you wrote doesn't work but hopefully this will.
Note: Removing the tuple for the arguments is unrelated to this issue but was causing another issue afterwards.
I set up a try catch in my code, but it appears that my exception was not correct because it did not seem to catch it.
I am using an exception from a module, and perhaps I didn't import it correctly? Here is my code:
import logging
import fhirclient.models.bundle as b
from fhirclient.server import FHIRUnauthorizedException
logging.disable(logging.WARNING)
def get_all_resources(resource, struct, smart):
'''Perform a search on a resource type and get all resources entries from all retunred bundles.\n
This function takes all paginated bundles into consideration.'''
if smart.ready == False:
smart.reauthorize
search = resource.where(struct)
bundle = search.perform(smart.server)
resources = [entry.resource for entry in bundle.entry or []]
next_url = _get_next_url(bundle.link)
while next_url != None:
try:
json_dict = smart.server.request_json(next_url)
except FHIRUnauthorizedException:
smart.reauthorize
continue
bundle = b.Bundle(json_dict)
resources += [entry.resource for entry in bundle.entry or []]
next_url = _get_next_url(bundle.link)
return resources
Now when i ran the code I got the following error:
Traceback (most recent call last):
File "code.py", line 79, in <module>
main()
File "code.py", line 42, in main
reports = get_all_resources(dr.DiagnosticReport, search, smart)
File "somepath/fhir_tools/resource.py", line 23, in get_all_resources
json_dict = smart.server.request_json(next_url)
File "/usr/local/lib/python3.6/dist-packages/fhirclient/server.py", line 153, in request_json
res = self._get(path, headers, nosign)
File "/usr/local/lib/python3.6/dist-packages/fhirclient/server.py", line 181, in _get
self.raise_for_status(res)
File "/usr/local/lib/python3.6/dist-packages/fhirclient/server.py", line 256, in raise_for_status
raise FHIRUnauthorizedException(response)
server.FHIRUnauthorizedException: <Response [401]>
Shouldn't my exception catch this?
I have a scrapy script that works locally, but when I deploy it to Scrapinghub, it's giving all errors. Upon debugging, the error is coming from Yielding the item.
This is the error I get.
ERROR [scrapy.utils.signal] Error caught on signal handler: <bound method ?.item_scraped of <sh_scrapy.extension.HubstorageExtension object at 0x7fd39e6141d0>> Less
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/twisted/internet/defer.py", line 150, in maybeDeferred
result = f(*args, **kw)
File "/usr/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "/usr/local/lib/python2.7/site-packages/sh_scrapy/extension.py", line 45, in item_scraped
item = self.exporter.export_item(item)
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 304, in export_item
result = dict(self._get_serialized_fields(item))
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 75, in _get_serialized_fields
value = self.serialize_field(field, field_name, item[field_name])
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 284, in serialize_field
return serializer(value)
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 290, in _serialize_value
return dict(self._serialize_dict(value))
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 300, in _serialize_dict
key = to_bytes(key) if self.binary else key
File "/usr/local/lib/python2.7/site-packages/scrapy/utils/python.py", line 117, in to_bytes
'object, got %s' % type(text).__name__)
TypeError: to_bytes must receive a unicode, str or bytes object, got int
It doesn't specify the field with issues, but by process of elimination, I came to realize it's this part of the code:
try:
item["media"] = {}
media_index = 0
media_content = response.xpath("//audio/source/#src").extract_first()
if media_content is not None:
item["media"][media_index] = {}
preview = item["media"][media_index]
preview["Media URL"] = media_content
preview["Media Type"] = "Audio"
media_index += 1
except IndexError:
print "Index error for media " + item["asset_url"]
I cleared some parts up to make it easier to tackle, but basically this part is the issue. Something it doesn't like about the item media.
I'm beginner in both Python and Scrapy. So sorry if this turns out to be silly basic Python mistake. Any idea?
EDIT: So after getting the answer from ThunderMind, the solution was to simply do str(media_index) for key
Yeah, right here:
item["media"][media_index] = {}
media_index is a mutable. and Keys can't be mutable.
Read Python dict, to know what should be used as keys.
I have a command as follows:
class AddChatMessages(Command):
arguments = [
('messages', AmpList([('message', Unicode()), ('type', Integer())]))]
And I have a responder for it in a controller:
def add_chat_messages(self, messages):
for i, m in enumerate(messages):
messages[i] = (m['message'], m['type'])
self.main.add_chat_messages(messages)
return {}
commands.AddChatMessages.responder(add_chat_messages)
I am writing a unit test for it. This is my code:
class AddChatMessagesTest(ProtocolTestMixin, unittest.TestCase):
command = commands.AddChatMessages
data = {'messages': [{'message': 'hi', 'type': 'None'}]}
def assert_callback(self, unused):
pass
Where ProtocolMixin is as follows:
class ProtocolTestMixin(object):
def setUp(self):
self.protocol = client.CommandProtocol()
def assert_callback(self, unused):
raise NotImplementedError("Has to be implemented!")
def test_responder(self):
responder = self.protocol.lookupFunction(
self.command.commandName)
d = responder(self.data)
d.addCallback(self.assert_callback)
return d
It works if AmpList is not involved, but when it is - I get following error:
======================================================================
ERROR: test_responder
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/defer.py", line 139, in maybeDeferred
result = f(*args, **kw)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 203, in runWithWarningsSuppressed
reraise(exc_info[1], exc_info[2])
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 199, in runWithWarningsSuppressed
result = f(*a, **kw)
File "/Users/<username>/Projects/space/tests/client_test.py", line 32, in test_responder
d = responder(self.data)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1016, in doit
kw = command.parseArguments(box, self)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1717, in parseArguments
return _stringsToObjects(box, cls.arguments, protocol)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2510, in _stringsToObjects
argparser.fromBox(argname, myStrings, objects, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1209, in fromBox
objects[nk] = self.fromStringProto(st, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1465, in fromStringProto
boxes = parseString(inString)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2485, in parseString
return cls.parse(StringIO(data))
TypeError: must be string or buffer, not list
Which makes sense, but how do I serialize a list in AddChatMessagesTest.data?
The responder expects to be called with a serialized box. It will then deserialize it, dispatch the objects to application code, take the object the application code returns, serialize it, and then return that serialized form.
For a few AMP types. most notably String, the serialized form is the same as the deserialized form, so it's easy to overlook this.
I think that you'll want to pass your data through Command.makeArguments in order to produce an object suitable to pass to a responder.
For example:
>>> from twisted.protocols.amp import Command, Integer
>>> class Foo(Command):
... arguments = [("bar", Integer())]
...
>>> Foo.makeArguments({"bar": 17}, None)
AmpBox({'bar': '17'})
>>>
If you do this with a Command that uses AmpList I think you'll find makeArguments returns an encoded string for the value of that argument and that the responder is happy to accept and parse that kind of string.