Can anyone help me understand why this simple example of trying to speed up a for loop using python's multiprocessing module produces unstable results? I use a Manager.List to store the values from the child processes.
Clearly I'm doing at least one thing wrong. What would be the correct way to do this?
import numpy as np
import multiprocessing
from matplotlib import pyplot as plt
from functools import partial
from multiprocessing import Manager
def run_parallel(x_val, result):
val = np.arctan(x_val)
result.append(val)
def my_func(x_array, parallel=False):
if not parallel:
result = []
for k in x_array:
result.append(np.arctan(k))
return result
else:
manager = Manager()
m_result = manager.list()
pool = multiprocessing.Pool(4)
pool.map(partial(run_parallel, result=m_result), x_array)
return list(m_result)
test_x = np.linspace(0.1,1,50)
serial = my_func(test_x,parallel=False)
parallel = my_func(test_x,parallel=True)
plt.figure()
plt.plot(test_x, serial, label='serial')
plt.plot(test_x,parallel, label='parallel')
plt.legend(loc='best')
plt.show()
The output I'm getting looks like this
and it looks different every time this runs.
I added some print functions and it turned out that the order of elements from x_array is arbitrary... That's why it looks so weird. I think you should keep argument and value of arctan pairs and then order it by argument value
EDIT
I read more and it turned out that map returns values in order... This works as you wanted:
import numpy as np
import multiprocessing
from matplotlib import pyplot as plt
from functools import partial
from multiprocessing import Manager
def run_parallel(x_val, result):
val = np.arctan(x_val)
return val
def my_func(x_array, parallel=False):
if not parallel:
result = []
for k in x_array:
result.append(np.arctan(k))
return result
else:
manager = Manager()
m_result = manager.list()
pool = multiprocessing.Pool(4)
x = pool.map(partial(run_parallel, result=m_result), x_array)
return list(x)
test_x = np.linspace(0.1,1,50)
parallel = my_func(test_x,parallel=True)
plt.figure()
plt.plot(test_x,parallel, label='parallel')
plt.legend(loc='best')
plt.show()
Related
I am working in a Jupyter notebook. I'm new to multiprocessing in python, and I'm trying to parallelize the calculation of a function for a grid of parameters. Here is a snippet of code quite representative of what I'm doing:
import os
import numpy as np
from concurrent.futures import ProcessPoolExecutor
def f(x,y):
print(os.getpid(), x,y,x+y)
return x+y
xs = np.linspace(5,7,3).astype(int)
ys = np.linspace(1,3,3).astype(int)
func = lambda p: f(*p)
with ProcessPoolExecutor() as executor:
args = (arg for arg in zip(xs,ys))
results = executor.map(func, args)
for res in results:
print(res)
The executor doesn't even start.
No problem whatsoever if I serially execute the same with, e.g. list comprehension,
args = (arg for arg in zip(xs,ys))
results = [func(arg) for arg in args]
Are you running on Windows? I think your main problem is that each process is trying to re-execute your whole script, so you should include an if name == "main" check. I think you have a second issue trying to use a lambda function that can't be pickled, since the processes communicate by pickling the data. There are work-arounds for that but in this case it looks like you don't really need the lambda. Try something like this:
import os
import numpy as np
from concurrent.futures import ProcessPoolExecutor
def f(x, y):
print(os.getpid(), x, y, x + y)
return x + y
if __name__ == '__main__':
xs = np.linspace(5, 7, 3).astype(int)
ys = np.linspace(1, 3, 3).astype(int)
with ProcessPoolExecutor() as executor:
results = executor.map(f, xs, ys)
for res in results:
print(res)
Is there any way to move the iterator in this example?
import tensorflow as tf
import numpy as np
from multiprocessing import Process, Queue
def store(batch, queue):
while True:
queue.put(batch)
if __name__=='__main__':
pqueue = Queue()
a1 = np.arange(1000)
m = tf.data.Dataset.from_tensor_slices(a1).repeat().batch(1)
iter_m = m.make_one_shot_iterator()
m_init_ops = iter_m.make_initializer(m)
next_m = iter_m.get_next()
with tf.Session() as sess:
batch = sess.run(next_m)
pp_process = Process(target=store,args=(batch, pqueue,))
pp_process.daemon = True
pp_process.start()
for i in range(10):
print(pqueue.get())
My idea is to store processed data in the queue that can be accessed by tensorflow for training, unfortunately I could not advance the iterator. Any suggestions will be greatly appreciated.
The current output is
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
Tensorflow multithreading
The iterator is not advancing since you are technically only executing the get_next operation once: sess.run(next_m). If you were only using tensorflow multithreading, you could have obtained the desired results by simply moving it into the store function:
def store(sess, next_m, queue):
while True:
queue.put(sess.run(next_m))
# batch = sess.run(next_m) <- Remove
pp_process = Thread(target=store,args=(sess, next_m, pqueue,)) # <- Thread with correct args passed
Tensorflow multiprocessing
However, for multiprocessing, you should also ensure that you never instantiate (fork) a new process after already having created a session since the session object is not serializable.
In your case, you can simply create a new session in the store function and start the main session after forking:
from multiprocessing import Process, Queue
import numpy as np
import tensorflow as tf
def store(next_m, queue):
with tf.Session() as sess:
while True:
queue.put(sess.run(next_m))
if __name__ == '__main__':
...
pp_process = Process(target=store, args=(next_m, pqueue,))
pp_process.daemon = True
pp_process.start() # <- Fork before starting this session!
with tf.Session() as sess:
for i in range(10):
print(pqueue.get())
first a bit of context :
I'm trying to write down a python script to convert Image in greyscale (.tif) to a .jpeg with the so called ''jet'' colormap. I managed to do it with a for loop but it's a bit long for one image (millions of pixels to treat !), so I would like to use multiprocessing.
My problem here is that to convert each grey pixel into a coloured one I have to use two variables (the minimum value of light intensity ''min_img'' and an vector ''dx_cm'' to go from the initial grey scale to a 256 scale, corresponding to the jet colormap).
So to pass the information of ''min_img'' and ''dx_cm'' to the processes I try to use multiprocessing.Value() but in return I get the error :
RuntimeError: Synchronized objects should only be shared between processes through inheritance
I tried many different things from different sources and no matter the version of my code I'm struggling with that error. So I'm sorry if my code isn't clean, I would be very grateful if someone could help me with that.
My non-working code :
import multiprocessing
from PIL import Image
from matplotlib import cm
def fun(gr_list,dx,minp):
dx_cmp = dx.value
min_imgp = minp.value
rgb_res=list()
for i in range(len(gr_list)):
rgb_res.extend(cm.jet(round(((gr_list[i]-min_imgp)/dx_cmp)-1))[0:-1])
return rgb_res
if __name__ == '__main__':
RGB_list=list()
n = multiprocessing.cpu_count()
img = Image.open(r'some_path_to_a.tif')
Img_grey=list(img.getdata())
dx_cm = multiprocessing.Value('d',(max(Img_grey)-min(Img_grey))/256)
min_img = multiprocessing.Value('d',min(Img_grey))
with multiprocessing.Pool(n) as p:
RGB_list = list(p.map(fun, (Img_grey,dx_cm,min_img)))
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in RGB_list]))
res.save('rgb_file.jpg')
PS : Here is an example of the the initial for loop that I would like to parallelize :
from PIL import Image
from matplotlib import cm
if __name__ == '__main__':
img = Image.open(r'some_path_to_a.tif')
Img_grey = list(img.getdata())
dx_cm = (max(Img_grey)-min(Img_grey))/256
min_img = min(Img_grey)
Img_rgb = list()
for i in range(len(Img_grey)):
Img_rgb.extend(cm.jet(round(((Img_grey[i]-min_img)/dx_cm)-1))[0:-1])
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in Img_rgb]))
res.save('rgb_file.jpg')
Your fun method is looping over some list, but in this case it will receive a "part", an item from your list, so it should return only the result of its processing.
I have changed the working code to run with multiprocessing.
As the fun method returns a list, the p.map will return a list of lists (a list of results) and that need to be flatten, were done with list extends method before.
Tried with process pool and thread pool multiprocessing, in my scenario there wasn't any performance gains.
Process multiprocessing:
from PIL import Image
from matplotlib import cm
import multiprocessing
def fun(d):
part, dx_cm, min_img = d
return cm.jet(round(((part-min_img)/dx_cm)-1))[0:-1]
if __name__ == '__main__':
img = Image.open(r'a.tif')
Img_grey = list(img.getdata())
def Gen(img_data):
dx_cm = (max(img_data)-min(img_data))/256
min_img = min(img_data)
for part in img_data:
yield part, dx_cm, min_img
n = multiprocessing.cpu_count()
with multiprocessing.Pool(n) as p:
Img_rgb = [item for sublist in p.map(fun, Gen(Img_grey)) for item in sublist]
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in Img_rgb]))
res.save('b.jpg')
Thread multiprocessing:
from PIL import Image
from matplotlib import cm
import multiprocessing
from multiprocessing.pool import ThreadPool
if __name__ == '__main__':
img = Image.open(r'a.tif')
Img_grey = list(img.getdata())
dx_cm = (max(Img_grey)-min(Img_grey))/256
min_img = min(Img_grey)
def fun(part):
return cm.jet(round(((part-min_img)/dx_cm)-1))[0:-1]
n = multiprocessing.cpu_count()
with ThreadPool(n) as p:
Img_rgb = [item for sublist in p.map(fun, Img_grey) for item in sublist]
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in Img_rgb]))
res.save('b.jpg')
So it seems that the computational burden isn't big enough for multiprocessing to be helpful.
Nevertheless, for those coming across this topic interested in the image processing part of my question, I found another much quicker way (15 to 20 x than previous method) to do the same thing without a for loop :
from matplotlib import cm
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
from PIL import Image
cm_jet = cm.get_cmap('jet')
img_src = Image.open(r'path to your grey image')
img_src.mode='I'
Img_grey = list(img_src.getdata())
max_img = max(Img_grey)
min_img = min(Img_grey)
rgb_array=np.uint8(cm_jet(((np.array(img_src)-min_img)/(max_img-min_img)))*255)
ax = plt.subplot(111)
im = ax.imshow(rgb_array, cmap='jet')
divider = make_axes_locatable(ax)
cax_plot = divider.append_axes("right", size="5%", pad=0.05)
cbar=plt.colorbar(im, cax=cax_plot, ticks=[0,63.75,127.5,191.25,255])
dx_plot=(max_img-min_img)/255
cbar.ax.set_yticklabels([str(min_img),str(round(min_img+63.75*dx_plot)),str(round(min_img+127.5*dx_plot)),str(round(min_img+191.25*dx_plot)), str(max_img)])
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
plt.savefig('test_jet.jpg', quality=95, dpi=1000)
I want to integrate a function that has no closed form solution with an unknown variable and then plot vs the unknown variable. To try a simpler test, I tried to use the integral of f(x,c) = (x^2+c), integrated with respect to x and plot with different values of c. However, the code below gets the error
only size-1 arrays can be converted to Python scalars
even though the integral of a number, e.g. integral(5), seems to return the correct scalar value.
import numpy as np
import matplotlib.pyplot as plt
from scipy import integrate
def f(x,c):
return x**2+c
def integral(c):
return integrate.quad(f,0,10, args = (c,))[0]
y = np.linspace(0,20,200)
plt.plot(y, integral(y))
You pass a numpy array as the argument c while you wanted to integrate over x for all the items of c. Therefore you can use this:
def f(x,c):
return x**2+c
def integrate_f(c):
result = np.zeros(len(c))
counter = 0
for item in c:
result[counter] = integrate.quad(f,0,10, args = (item))[0]
counter +=1
return result
c_array = np.linspace(0,1,200)
plt.plot(c_array, integrate_f(c_array))
onno was a bit faster. But here is my similar solution. You need to loop over all the different c:
import numpy as np
import matplotlib.pyplot as plt
from scipy import integrate
def f(x,c):
return x**2+c
def getIntegral(c_list):
result = []
for c in c_list:
integral = integrate.quad(f,0,10,args = c)[0]
result.append(integral)
return result
if __name__ == "__main__":
c_list = np.linspace(0,20,200)
plt.plot(c_list, getIntegral(c_list))
plt.show()
I have a large number of files to process. I have written a script that get, sort and plot the datas I want. So far, so good. I have tested it and it gives the desired result.
Then I wanted to do this using multithreading. I have looked into the doc and examples on the internet, and using one thread in my program works fine. But when I use more, at some point I get random matplotlib error, and I suspect some conflict there, even though I use a function with names for the plots, and iI can't see where the problem could be.
Here is the whole script should you need more comment, i'll add them. Thank you.
#!/usr/bin/python
import matplotlib
matplotlib.use('GTKAgg')
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import matplotlib.colors as mcl
from matplotlib import rc #for latex
import time as tm
import sys
import threading
import Queue #queue in 3.2 and Queue in 2.7 !
import pdb #the debugger
rc('text', usetex=True)#for latex
map=0 #initialize the map index. It will be use to index the array like this: array[map,[x,y]]
time=np.zeros(1) #an array to store the time
middle_h=np.zeros((0,3)) #x phi c
#for the middle of the box
current_file=open("single_void_cyl_periodic_phi_c_middle_h_out",'r')
for line in current_file:
if line.startswith('# === time'):
map+=1
np.append(time,[float(line.strip('# === time '))])
elif line.startswith('#'):
pass
else:
v=np.fromstring(line,dtype=float,sep=' ')
middle_h=np.vstack( (middle_h,v[[1,3,4]]) )
current_file.close()
middle_h=middle_h.reshape((map,-1,3)) #3d array: map, x, phi,c
#####
def load_and_plot(): #will load a map file, and plot it along with the corresponding profile loaded before
while not exit_flag:
print("fecthing work ...")
#try:
if not tasks_queue.empty():
map_index=tasks_queue.get()
print("----> working on map: %s" %map_index)
x,y,zp=np.loadtxt("single_void_cyl_growth_periodic_post_map_"+str(map_index),unpack=True, usecols=[1, 2,3])
for i,el in enumerate(zp):
if el<0.:
zp[i]=0.
xv=np.unique(x)
yv=np.unique(y)
X,Y= np.meshgrid(xv,yv)
Z = griddata((x, y), zp, (X, Y),method='nearest')
figure=plt.figure(num=map_index,figsize=(14, 8))
ax1=plt.subplot2grid((2,2),(0,0))
ax1.plot(middle_h[map_index,:,0],middle_h[map_index,:,1],'*b')
ax1.grid(True)
ax1.axis([-15, 15, 0, 1])
ax1.set_title('Profiles')
ax1.set_ylabel(r'$\phi$')
ax1.set_xlabel('x')
ax2=plt.subplot2grid((2,2),(1,0))
ax2.plot(middle_h[map_index,:,0],middle_h[map_index,:,2],'*r')
ax2.grid(True)
ax2.axis([-15, 15, 0, 1])
ax2.set_ylabel('c')
ax2.set_xlabel('x')
ax3=plt.subplot2grid((2,2),(0,1),rowspan=2,aspect='equal')
sub_contour=ax3.contourf(X,Y,Z,np.linspace(0,1,11),vmin=0.)
figure.colorbar(sub_contour,ax=ax3)
figure.savefig('single_void_cyl_'+str(map_index)+'.png')
plt.close(map_index)
tasks_queue.task_done()
else:
print("nothing left to do, other threads finishing,sleeping 2 seconds...")
tm.sleep(2)
# except:
# print("failed this time: %s" %map_index+". Sleeping 2 seconds")
# tm.sleep(2)
#####
exit_flag=0
nb_threads=2
tasks_queue=Queue.Queue()
threads_list=[]
jobs=list(range(map)) #each job is composed of a map
print("inserting jobs in the queue...")
for job in jobs:
tasks_queue.put(job)
print("done")
#launch the threads
for i in range(nb_threads):
working_bee=threading.Thread(target=load_and_plot)
working_bee.daemon=True
print("starting thread "+str(i)+' ...')
threads_list.append(working_bee)
working_bee.start()
#wait for all tasks to be treated
tasks_queue.join()
#flip the flag, so the threads know it's time to stop
exit_flag=1
for t in threads_list:
print("waiting for threads %s to stop..."%t)
t.join()
print("all threads stopped")
Following David's suggestion, I did it in multiprocessing. I get a 5 times speed up with 8 processors. I believe the rest is do to the single-process work at the begining of my script.
edit: However sometimes the script "hangs" at the last map, even though it produces the right maps, with the following error:
File "single_void_cyl_plot_mprocess.py", line 90, in tasks_queue.join()
File "/usr/local/epd-7.0-2-rh5-x86_64/lib/python2.7/multiprocessing/queues.py", line 316, in join self._cond.wait()
File "/usr/local/epd-7.0-2-rh5-x86_64/lib/python2.7/multiprocessing/synchronize.py", line 220, in wait self._wait_semaphore.acquire(True, timeout)
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
from matplotlib import rc #for latex
from multiprocessing import Process, JoinableQueue
import pdb #the debugger
rc('text', usetex=True)#for latex
map=0 #initialize the map index. It will be use to index the array like this: array[map,x,y,...]
time=np.zeros(1) #an array to store the time
middle_h=np.zeros((0,3)) #x phi c
#for the middle of the box
current_file=open("single_void_cyl_periodic_phi_c_middle_h_out",'r')
for line in current_file.readlines():
if line.startswith('# === time'):
map+=1
np.append(time,[float(line.strip('# === time '))])
elif line.startswith('#'):
pass
else:
v=np.fromstring(line,dtype=float,sep=' ')
middle_h=np.vstack( (middle_h,v[[1,3,4]]) )
current_file.close()
middle_h=middle_h.reshape((map,-1,3)) #3d array: map, x, phi,c
#######
def load_and_plot(): #will load a map file, and plot it along with the corresponding profile loaded before
while tasks_queue.empty()==False:
print("fecthing work ...")
try:
map_index=tasks_queue.get() #get some work to do from the queue
print("----> working on map: %s" %map_index)
x,y,zp=np.loadtxt("single_void_cyl_growth_periodic_post_map_"+str(map_index),\
unpack=True, usecols=[1, 2,3])
for i,el in enumerate(zp):
if el<0.:
zp[i]=0.
xv=np.unique(x)
yv=np.unique(y)
X,Y= np.meshgrid(xv,yv)
Z = griddata((x, y), zp, (X, Y),method='nearest')
figure=plt.figure(num=map_index,figsize=(14, 8))
ax1=plt.subplot2grid((2,2),(0,0))
ax1.plot(middle_h[map_index,:,0],middle_h[map_index,:,1],'*b')
ax1.grid(True)
ax1.axis([-15, 15, 0, 1])
ax1.set_title('Profiles')
ax1.set_ylabel(r'$\phi$')
ax1.set_xlabel('x')
ax2=plt.subplot2grid((2,2),(1,0))
ax2.plot(middle_h[map_index,:,0],middle_h[map_index,:,2],'*r')
ax2.grid(True)
ax2.axis([-15, 15, 0, 1])
ax2.set_ylabel('c')
ax2.set_xlabel('x')
ax3=plt.subplot2grid((2,2), (0,1),rowspan=2,aspect='equal')
sub_contour=ax3.contourf(X,Y,Z,np.linspace(0,1,11),vmin=0.)
figure.colorbar(sub_contour,ax=ax3)
figure.savefig('single_void_cyl_'+str(map_index)+'.png')
plt.close(map_index)
tasks_queue.task_done() #work for this item finished
except:
print("failed this time: %s" %map_index)
#######
nb_proc=8 #number of processes
tasks_queue=JoinableQueue() #a queue to pile up the work to do
jobs=list(range(map)) #each job is composed of a map
print("inserting jobs in the queue...")
for job in jobs:
tasks_queue.put(job)
print("done")
#launch the processes
for i in range(nb_proc):
current_process=Process(target=load_and_plot)
current_process.start()
#wait for all tasks to be treated
tasks_queue.join()