Multiprocessing map unorderable? - python

I have an operation using a dictionnary that I want to parallelize, but multiprocessing.map is causing me headaches
def dict_segmentor(dictionnary,n_processes):
print("len dictionnary")
print(len(dictionnary))
if len(dictionnary) < n_processes:
seg_size = len(dictionnary)
else:
seg_size = len(dictionnary) // n_processes
print("segmenting dictionnary")
print("seg_size "+str(seg_size))
print("len(dictionnary) "+str(len(dictionnary)))
itemlist=list(dictionnary.items())
seg_ranges = [dict(itemlist[s:s+seg_size]) for s in range(1, len(dictionnary)+1, seg_size)]
print("finished")
return seg_ranges
def multiprocess_calc(n_processes, dictionnary,struc):
dictionnary=dictionnary
struc=struc
seg_ranges1 = dict_segmentor(dictionnary,n_processes)
#this is invoked to break the dict to be passed into dicts into a list. Works as expected
print("seg_range_check")
print("seg_ranges1 {}".format(type(seg_ranges1)))#Returns a dict as expected
print("seg_ranges1 {}".format(type(seg_ranges1[0])))#Returns a list as expected
print("seg_ranges1 {}".format(len(seg_ranges1))) #Returns expected len=1
print("seg_ranges1 {}".format(len(seg_ranges1[0]))) #Returns expected len
processes = multiprocessing.Pool(n_processes)
print("Mapping Building")
processes.map(Builder, seg_ranges1,1)
def main():
file_multiprocess = 'pref_multiprocess.csv'
n_CPUs = multiprocessing.cpu_count()
n_processes = n_CPUs-1
print("\nNumber of CPUs detected:", n_CPUs)
multiprocess_calc(n_processes, file_multiprocess,struc)
if __name__ == '__main__':
main()
Here is complete Traceback:
Traceback (most recent call last):
File "<ipython-input-37-d0279721826c>", line 1, in <module>
runfile('Pyscript.py', wdir='C:/Python Scripts')
File "C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 89, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "Pyscript.py", line 1033, in <module>
main()
File "Pyscript.py", line 1025, in main
multiprocess_calc(n_processes, dictionnary,struc)
File "Pyscript.py", line 911, in multiprocess_calc
processes.map(Builder, seg_ranges1,1)
File "C:\Anaconda3\lib\multiprocessing\pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Anaconda3\lib\multiprocessing\pool.py", line 608, in get
raise self._value
TypeError: unorderable types: list() > int()
I don't understand, even by reading this carefully (https://docs.python.org/3.5/library/multiprocessing.html#module-multiprocessing).
Each chunk is a dict, so it should be sent through map to the builder.
But instead I get that stupid error and the traceback doesn't help. I looked into the code of pool.py but no luck,
and my builder is not involved since its first operation (a control printing) is not even displayed. The builder function seems to be totally ignored (and there is not even a syntax error)
So I conclude this is a problem with map.
In case I would have misunderstood the multiprocessing.map function and that it would first make a chunk, then iterate over it to apply map on each sub sub element, what function of multiprocessing could I use? Apply use only one thread. That would mean I should do this manually?
Please feel free to correct my code and give me some insights. Thanks by advance
Edit: here is the builder function:
def Builder(dictionary,struc=struc):
#def Builder(keys, dictionary=dictionary,struc=struc): #Alternative
#Note, I even tried to use only the keys, passing the dictionary from a global variable but it didn't work
print("Building Tree") #Not even displayed
print("type dictionary"+str(type(dictionary)))
frags=0
try:
if True:
print("Building")
#for id in keys: #Alternative
for id in dictionary:
seq=dictionary[id]
for i in range(3):
frags+=1
if len(seq)<=3:
break
seq=seq[i:-i]
struc.append(seq)
print("Number of frags found {}".format(frags))
except TypeError as e:
print (e)
print ("error in Builder")

Related

Use multiprocess in class with python version == 3.9

I am trying to use multiprocessing in a class in the following code:
class test:
def __init__(self):
return
global calc_corr
#staticmethod
def calc_corr(idx, df1, df2):
arr1 = df1.iloc[idx:idx+5, :].values.flatten('F')
arr2 = df2.iloc[idx:idx+5, :].values.flatten('F')
df_tmp = pd.DataFrame([arr1, arr2]).T
df_tmp.dropna(how='any', inplace=True)
corr = df_tmp.corr().iloc[0, 1]
return corr
def aa(self):
df1 = pd.DataFrame(np.random.normal(size=(100, 6)))
df2 = pd.DataFrame(np.random.normal(size=(100, 6)))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)]
for f in concurrent.futures.as_completed(results):
print(f.result())
if __name__ == '__main__':
t = test()
t.aa()
I am using a #staticmethod because it is not related to the class, it's just a computing tool. But using it raises the following error when running the code:
D:\anaconda3\python.exe C:/Users/jonas/Desktop/728_pj/test.py
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\jonas\Desktop\728_pj\test.py", line 31, in <module>
t.aa()
File "C:\Users\jonas\Desktop\728_pj\test.py", line 26, in aa
print(f.result())
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
Process finished with exit code 1
Can anyone help me fix this?
I think it is somehow caused by the staticmethod being declared as global. When I tried removing the global calc_corr line and changing
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)] to
results = [executor.submit(self.calc_corr, i, df1, df2) for i in range(20)] it seemed to work fine. I'm not actually sure of the reason what you wrote doesn't work but hopefully this will.
Note: Removing the tuple for the arguments is unrelated to this issue but was causing another issue afterwards.

Calling a python script within another python script with args

Current Implementation which needs optimization
import subprocess
childprocess = subprocess.Popen(
['python',
'/full_path_to_directory/called_script.py',
'arg1',
'arg2'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
returnVal = childprocess.communicate()[0]
print(retVal)
Is this a correct way to call another script(called_script.py) within the current working directory?
Is there a better way to call the other script? I used import script but it gives me below error
called_script.py
def func(arg1, arg2, arg3):
#doSomething
#sys.out.write(returnVal)
if __name__ == "__main__":
func(arg1, arg2, arg3)
Implementation 2 (throws exception and errored out)
caller_script.py
Both of them are under the same path (i.e. /home/bin)
import called_script
returnVal = called_script.func(arg1,arg2,arg3)
print(returnVal)
Output:
nullNone
Traceback (most recent call last):
File "/path_to_caller/caller_script.py", line 89, in <module>
l.simple_bind_s(binddn, pw)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 206, in simple_bind_s
msgid = self.simple_bind(who,cred,serverctrls,clientctrls)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 200, in simple_bind
return self._ldap_call(self._l.simple_bind,who,cred,EncodeControlTuples(serverctrls),EncodeControlTuples(clientctrls))
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
TypeError: argument 2 must be string or read-only buffer, not None
Another alternative I used and gave me an error is
Implementation 3(throws exception and errors out)
caller_script.py
import ldap
returnVal = subprocess.call(['python','called_script.py','arg1','arg2'])
print(returnVal)
l = ldap.initialize(cp.get('some_config_ref','some_url'))
try:
l.protocol_version = ldap.VERSION3
l.simple_bind_s(binddn, returnVal)
except ldap.INVALID_CREDENTIALS:
sys.stderr.write("Your username or password is incorrect.")
sys.exit(1)
except ldap.LDAPError, e:
if type(e.message) == dict and e.message.has_key('xyz'):
sys.stderr.write(e.message['xyz'])
else:
sys.stderr.write(e)
sys.exit(1)
Output:
returnVal0Traceback (most recent call last):
File "./path_to_script/caller_script.py", line 88, in <module>
l.simple_bind_s(binddn, pw)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 206, in simple_bind_s
msgid = self.simple_bind(who,cred,serverctrls,clientctrls)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 200, in simple_bind
return self._ldap_call(self._l.simple_bind,who,cred,EncodeControlTuples(serverctrls),EncodeControlTuples(clientctrls))
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
TypeError: argument 2 must be string or read-only buffer, not int
Here is an example where you are calling a function from another file, you pass one value, a list, which can have an arbitrary amount of numbers, and you get the sum. Make sure they are in the same directory or you will need the path. The function in your example "script.py" does not allow you to pass a value.
called_script.py
def add_many(list_add):
the_sum = sum(list_add)
return the_sum
caller_script.py
import called_script
a_list = [1, 2, 3, 4]
the_sum = called_script.add_many(a_list)
print(the_sum)

Errors with Python and eyed3

import os
import eyed3
def files(path):
for file in os.listdir(path):
if os.path.isfile(os.path.join(path, file)):
yield file
def title_alteration(music_artist_string):
music_artist_string = music_artist_string.replace(';', ' feat. ', 1)
semicolon_count = music_artist_string.count(';')
music_artist_string = music_artist_string.replace(';', ', ', semicolon_count-1)
music_artist_string = music_artist_string.replace(';', ' & ')
return music_artist_string
def main():
audio_files = eyed3.load(files('D:\\iTunes Music\\iTunes\\iTunes Media\\Music'))
title_alteration(audio_files.tag.artist)
if __name__ == '__main__':
main()
Can I get some help debugging this, I got it down to three distinct functions with some help from my last post, now I just need to know why is this getting errors when I attempt to run it on this directory on my pc.
I'm getting these errors (TLDR; It doesnt like Line 20 [the audio_files line]):
Traceback (most recent call last):
File "D:/Pycharm Projects/Music Alterations v2.py", line 25, in <module>
main()
File "D:/Pycharm Projects/Music Alterations v2.py", line 20, in main
audio_files = eyed3.load(files('D:\\iTunes Music\\iTunes\\iTunes Media\\Music'))
File "C:\Users\cLappy\AppData\Local\Programs\Python\Python38\lib\site-packages\eyed3\core.py", line 74, in load
path = pathlib.Path(path)
File "C:\Users\cLappy\AppData\Local\Programs\Python\Python38\lib\pathlib.py", line 1038, in __new__
self = cls._from_parts(args, init=False)
File "C:\Users\cLappy\AppData\Local\Programs\Python\Python38\lib\pathlib.py", line 679, in _from_parts
drv, root, parts = self._parse_args(args)
File "C:\Users\cLappy\AppData\Local\Programs\Python\Python38\lib\pathlib.py", line 663, in _parse_args
a = os.fspath(a)
TypeError: expected str, bytes or os.PathLike object, not generator
Process finished with exit code 1
Your files function is a generator, to be used like an iterator. The eyed3.load function doesn't want a generator or an iterator. It wants a pathname or something like one. Passing a generator where a pathname is expected does not magically cause iteration over all the values the generator would generate. It would work better just to make a list of all the pathnames of interest and then iterate over that list, calling eyed3.load for each pathname.

Scrapinghub Getting Error caught on signal handler: <bound method ? on Yield

I have a scrapy script that works locally, but when I deploy it to Scrapinghub, it's giving all errors. Upon debugging, the error is coming from Yielding the item.
This is the error I get.
ERROR [scrapy.utils.signal] Error caught on signal handler: <bound method ?.item_scraped of <sh_scrapy.extension.HubstorageExtension object at 0x7fd39e6141d0>> Less
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/twisted/internet/defer.py", line 150, in maybeDeferred
result = f(*args, **kw)
File "/usr/local/lib/python2.7/site-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(*arguments, **named)
File "/usr/local/lib/python2.7/site-packages/sh_scrapy/extension.py", line 45, in item_scraped
item = self.exporter.export_item(item)
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 304, in export_item
result = dict(self._get_serialized_fields(item))
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 75, in _get_serialized_fields
value = self.serialize_field(field, field_name, item[field_name])
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 284, in serialize_field
return serializer(value)
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 290, in _serialize_value
return dict(self._serialize_dict(value))
File "/usr/local/lib/python2.7/site-packages/scrapy/exporters.py", line 300, in _serialize_dict
key = to_bytes(key) if self.binary else key
File "/usr/local/lib/python2.7/site-packages/scrapy/utils/python.py", line 117, in to_bytes
'object, got %s' % type(text).__name__)
TypeError: to_bytes must receive a unicode, str or bytes object, got int
It doesn't specify the field with issues, but by process of elimination, I came to realize it's this part of the code:
try:
item["media"] = {}
media_index = 0
media_content = response.xpath("//audio/source/#src").extract_first()
if media_content is not None:
item["media"][media_index] = {}
preview = item["media"][media_index]
preview["Media URL"] = media_content
preview["Media Type"] = "Audio"
media_index += 1
except IndexError:
print "Index error for media " + item["asset_url"]
I cleared some parts up to make it easier to tackle, but basically this part is the issue. Something it doesn't like about the item media.
I'm beginner in both Python and Scrapy. So sorry if this turns out to be silly basic Python mistake. Any idea?
EDIT: So after getting the answer from ThunderMind, the solution was to simply do str(media_index) for key
Yeah, right here:
item["media"][media_index] = {}
media_index is a mutable. and Keys can't be mutable.
Read Python dict, to know what should be used as keys.

How to assign a value to a sliced output signal?

I'm a beginner with myhdl.
I try to translate the following Verilog code to MyHDL:
module ModuleA(data_in, data_out, clk);
input data_in;
output reg data_out;
input clk;
always #(posedge clk) begin
data_out <= data_in;
end
endmodule
module ModuleB(data_in, data_out, clk);
input [1:0] data_in;
output [1:0] data_out;
input clk;
ModuleA instance1(data_in[0], data_out[0], clk);
ModuleA instance2(data_in[1], data_out[1], clk);
endmodule
Currently, I have this code:
import myhdl
#myhdl.block
def ModuleA(data_in, data_out, clk):
#myhdl.always(clk.posedge)
def logic():
data_out.next = data_in
return myhdl.instances()
#myhdl.block
def ModuleB(data_in, data_out, clk):
instance1 = ModuleA(data_in(0), data_out(0), clk)
instance2 = ModuleA(data_in(1), data_out(1), clk)
return myhdl.instances()
# Create signals
data_in = myhdl.Signal(myhdl.intbv()[2:])
data_out = myhdl.Signal(myhdl.intbv()[2:])
clk = myhdl.Signal(bool())
# Instantiate the DUT
dut = ModuleB(data_in, data_out, clk)
# Convert tfe DUT to Verilog
dut.convert()
But it doesn't works because signal slicing produce a read-only shadow signal (cf MEP-105).
So, what is it the good way in MyHDL to have a writable slice of a signal?
Edit:
This is the error I get
$ python demo.py
Traceback (most recent call last):
File "demo.py", line 29, in <module>
dut.convert()
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/_block.py", line 342, in convert
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_toVerilog.py", line 177, in __call__
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_analyze.py", line 170, in _analyzeGens
File "/usr/lib/python3.6/ast.py", line 253, in visit
return visitor(node)
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_analyze.py", line 1072, in visit_Module
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_misc.py", line 148, in raiseError
myhdl.ConversionError: in file demo.py, line 4:
Signal has multiple drivers: data_out
You can use an intermediate list of Signal(bool()) as placeholder.
#myhdl.block
def ModuleB(data_in, data_out, clk):
tsig = [myhdl.Signal(bool(0)) for _ in range(len(data_in))]
instances = []
for i in range(len(data_in)):
instances.append(ModuleA(data_in(i), tsig[i], clk))
#myhdl.always_comb
def assign():
for i in range(len(data_out)):
data_out.next[i] = tsig[i]
return myhdl.instances()
A quick (probably non-fulfilling) comment, is that the intbv is treated as a single entity that can't have multiple drives. Two references that might help shed some light:
http://jandecaluwe.com/hdldesign/counting.html
http://docs.myhdl.org/en/stable/manual/structure.html#converting-between-lists-of-signals-and-bit-vectors

Categories