Use multiprocess in class with python version == 3.9 - python

I am trying to use multiprocessing in a class in the following code:
class test:
def __init__(self):
return
global calc_corr
#staticmethod
def calc_corr(idx, df1, df2):
arr1 = df1.iloc[idx:idx+5, :].values.flatten('F')
arr2 = df2.iloc[idx:idx+5, :].values.flatten('F')
df_tmp = pd.DataFrame([arr1, arr2]).T
df_tmp.dropna(how='any', inplace=True)
corr = df_tmp.corr().iloc[0, 1]
return corr
def aa(self):
df1 = pd.DataFrame(np.random.normal(size=(100, 6)))
df2 = pd.DataFrame(np.random.normal(size=(100, 6)))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)]
for f in concurrent.futures.as_completed(results):
print(f.result())
if __name__ == '__main__':
t = test()
t.aa()
I am using a #staticmethod because it is not related to the class, it's just a computing tool. But using it raises the following error when running the code:
D:\anaconda3\python.exe C:/Users/jonas/Desktop/728_pj/test.py
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\jonas\Desktop\728_pj\test.py", line 31, in <module>
t.aa()
File "C:\Users\jonas\Desktop\728_pj\test.py", line 26, in aa
print(f.result())
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
Process finished with exit code 1
Can anyone help me fix this?

I think it is somehow caused by the staticmethod being declared as global. When I tried removing the global calc_corr line and changing
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)] to
results = [executor.submit(self.calc_corr, i, df1, df2) for i in range(20)] it seemed to work fine. I'm not actually sure of the reason what you wrote doesn't work but hopefully this will.
Note: Removing the tuple for the arguments is unrelated to this issue but was causing another issue afterwards.

Related

How can I avoid numba error with recursion?

I have setup a function that iterates over combinations of chars to form strings.
It is recursive to itself, the recursive call looks like that:
testG(charNum - 1, arr2)
But when i call the entire function, I get this error:
>>> testSpeedGPU()
Traceback (most recent call last):
File "<pyshell#9>", line 1, in <module>
testSpeedGPU()
File "F:\Script Projects#\HASHFinder.py", line 90, in testSpeedGPU
testG(4, [''])
File "D:\Python\lib\site-packages\numba\cuda\dispatcher.py", line 40, in __call__
return self.compiled(*args, **kws)
File "D:\Python\lib\site-packages\numba\cuda\compiler.py", line 758, in __call__
kernel = self.specialize(*args)
File "D:\Python\lib\site-packages\numba\cuda\compiler.py", line 769, in specialize
kernel = self.compile(argtypes)
File "D:\Python\lib\site-packages\numba\cuda\compiler.py", line 784, in compile
kernel = compile_kernel(self.py_func, argtypes,
File "D:\Python\lib\site-packages\numba\core\compiler_lock.py", line 32, in _acquire_compile_lock
return func(*args, **kwargs)
TypeError: compile_kernel() got an unexpected keyword argument 'boundscheck'
Here is the function's body:
#jit(target ="cuda")
def testG(charNum, inpArray) -> null:
if charNum == 1:
arr2 = []
for s in range(len(inpArray)):
for i in range(len(alp)):
arr2.append(alp[i] + inpArray[s])
return
else:
print("more than 1")
arr2 = []
for s in range(len(inpArray)):
for i in range(len(alp)):
arr2.append(alp[i] + inpArray[s])
testG(charNum - 1, arr2)
I think it does have to do with the recursion but I really dont know.
Thanks for your help!
PS: The function works when not marked with #jit(target="cuda")

error in creating series in pandas

I am getting an error when creating a series in pandas.
Whenever I try to print the series I have created, I get an error.
The code I am running:
import pandas as pd
data2 = [1,2,3,4]
index = ['a','b','c','d']
s = pd.Series(data2, index)
print(s.shape)
s
The error:
Traceback (most recent call last):
File "<pyshell#6>", line 1, in <module>
s
File "C:\Python34\lib\idlelib\rpc.py", line 611, in displayhook
text = repr(value)
File "C:\Python34\lib\site-packages\pandas\core\base.py", line 80, in __repr__
return str(self)
File "C:\Python34\lib\site-packages\pandas\core\base.py", line 59, in __str__
return self.__unicode__()
File "C:\Python34\lib\site-packages\pandas\core\series.py", line 1060, in __unicode__
width, height = get_terminal_size()
File "C:\Python34\lib\site-packages\pandas\io\formats\terminal.py", line 33, in get_terminal_size
return shutil.get_terminal_size()
File "C:\Python34\lib\shutil.py", line 1071, in get_terminal_size
size = os.get_terminal_size(sys.__stdout__.fileno())
AttributeError: 'NoneType' object has no attribute 'fileno'
Your error is related to pyshell, not to pandas.
Try to run it through python directly or jupyter console, because the code you provided is correct.

APScheduler ValueError: The target callable does not accept the following keyword arguments:

I am running into an error calling add_job with kwargs for a function:
I created a function to create scheduled jobs:
def initial_data_pull():
q.enqueue(initial_pull, timout='3h')
def initial_data_load():
dates_list = date_between_list(start=date(2010, 1, 1), stop=date.today())
naics = ['541611', '541618', '541613',
'541511', '541512', '541513',
'541519', '518210', '541612']
fundings = ['3600', '97DH', '7504', '7505',
'7522', '7523', '7524', '7526',
'7527', '7528', '7529', '7530',
'7570']
for date_item in dates_list:
for fund in fundings:
for naic in naics:
sched.add_job(initial_data_pull,
kwargs={'naics_code': naic,
'funding_agency_id': fund,
'date_signed': date_item})
The function initial_pull looks like:
def initial_pull(naics_code=None, funding_agency_id=None, date_signed=None):
print('Gathering Contracts...')
df = fpds_generic(naics_code=naics_code,
funding_agency_id=funding_agency_id,
date_signed=date_signed)
# Code to Process and load data
The function fpds_generic is a function that goes to fpds and gather's data. I am getting the following error when I run the inital_data_load function.
Traceback (most recent call last):
File "clock.py", line 55, in <module>
initial_data_load()
File "clock.py", line 52, in initial_data_load
sched.add_job(initil_data_pull, kwargs=kw)
File "/home/spitfiredd/anaconda3/envs/g2x_flask/lib/python3.6/site-packages/apscheduler/schedulers/base.py", line 425, in add_job
job = Job(self, **job_kwargs)
File "/home/spitfiredd/anaconda3/envs/g2x_flask/lib/python3.6/site-packages/apscheduler/job.py", line 44, in __init__
self._modify(id=id or uuid4().hex, **kwargs)
File "/home/spitfiredd/anaconda3/envs/g2x_flask/lib/python3.6/site-packages/apscheduler/job.py", line 175, in _modify
check_callable_args(func, args, kwargs)
File "/home/spitfiredd/anaconda3/envs/g2x_flask/lib/python3.6/site-packages/apscheduler/util.py", line 385, in check_callable_args
', '.join(unmatched_kwargs))
ValueError: The target callable does not accept the following keyword arguments: naics_code, funding_agency_id, date_signed
Why is it saying that the function does not accept those keyword args when it does, and how do I fix this?

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

KeyError: 0 using multiprocessing in python

I have the following code inwhich I try to call a function compute_cluster which do some computations and write the results in a txt file (each process write its results in different txt files independently), however, when I run the following code:
def main():
p = Pool(19)
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
p.close()
if __name__ == "__main__":
main()
it crashes with the following errors:
File "RMSD_calc.py", line 124, in <module>
main()
File "RMSD_calc.py", line 120, in main
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 225, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 522, in get
raise self._value
KeyError: 0
and when I searched online for the meaning of "KeyError: 0" i didn't find anything helpful so any suggestions why this error happens is highly appreciated
KeyError happens in compute_cluster() in a child process and p.map() reraises it for you in the parent:
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
if __name__=="__main__":
p = Pool()
p.map(f, [None])
Output
Traceback (most recent call last):
File "raise-exception-in-child.py", line 9, in <module>
p.map(f, [None])
File "/usr/lib/python2.7/multiprocessing/pool.py", line 227, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 528, in get
raise self._value
KeyError: 0
To see the full traceback, catch the exception in the child process:
import logging
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
def f_mp(args):
try:
return f(args)
except Exception:
logging.exception("f(%r) failed" % (args,))
if __name__=="__main__":
p = Pool()
p.map(f_mp, [None])
Output
ERROR:root:f(None) failed
Traceback (most recent call last):
File "raise-exception-in-child.py", line 10, in f_mp
return f(args)
File "raise-exception-in-child.py", line 6, in f
d[0] # <-- raises KeyError
KeyError: 0
It shows that d[0] caused the exception.

Categories