Thread pool is slow and same speed as serial

Thread pool is slow and same speed as serial - python

I'm trying to speed up calculations for extensive real time object detection and doing computation on it.
I'm using OpenCV with thread pool and producer, consumer for the video capture. But the execution speed is the same as the serial one.
How would I improve the speed of the execution ?
if __name__ == "__main__":
video_name = '2016-11-18_07-30-01.h264'
cap = cv2.VideoCapture(video_name)
det = detector.CarDetector()
car_tracker = Sort_Algorithm.Sort()
ped_tracker = Sort_Algorithm.Sort()
df_region, df_line = load_filter()
region = Region(df_region)
distance = compute_max_polygon_diagonal(df_region) * 0.1
region_buffered = region.buffer(distance)
threadn = cv2.getNumberOfCPUs()
pool = ThreadPool(processes = 2)
pending = deque()
threaded_mode = True
lock = threading.Lock()
while True:
while len(pending) > 0 and pending[0].ready():
res = pending.popleft().get()
cv2.imshow('video ', res)
if len(pending) < threadn:
ret, frame = cap.read()
if threaded_mode:
t1 = time.time()
H = [-2.01134074616, -16.6502442427, -1314.05715739, -3.35391526592, -22.3546973012, 2683.63584335,
-0.00130731963137, -0.0396207582264, 1]
matrix = np.reshape(H, (3, 3))
dst = cv2.warpPerspective(frame.copy(), matrix, (frame.shape[1], frame.shape[0]))
task = pool.apply_async(pipeline, (lock, frame.copy(),car_tracker, ped_tracker,df_region,region_buffered, df_line, det, dst, matrix))
cv2.imshow('dst', dst)
else:
task = DummyTask(pipeline,(lock, frame.copy(),car_tracker, ped_tracker,df_region, region_buffered, df_line, det, dst, matrix))
pending.append(task)
ch = cv2.waitKey(1)
if ch == ord(' '):
threaded_mode = not threaded_mode
if ch == 27:
break
The code for pipeline:
def pipeline(lock, img, car_tracker, ped_tracker, df_region, region_buffered, df_line, det, dst, H):
lock.acquire()
global point_lists
global df_car_lists
global frame_idx
global counter
global data_peds
global data_cars
global genera_data_pd_cars
global genera_data_pd_peds
car_box, ped_box = det.get_localization(img)
car_detections = car_tracker.update(np.array(car_box))
ped_detections = ped_tracker.update(np.array(ped_box))
saved_region = df_region.values
saved_region = np.delete(saved_region, 2, 1)
frame_idx+=1
cv2.warpPerspective(np.array(df_line, dtype=np.float32), H, (df_line.shape[1], df_line.shape[0]))
cv2.polylines(dst, np.int32([[saved_region]]), False, color=(255, 0, 0))
cv2.polylines(dst, np.int32([np.array(df_line, dtype=np.float32)]), False, color=(255, 0, 0))
for trk in car_detections:
trk = trk.astype(np.int32)
helpers.draw_box_label(img, trk, trk[4]) # Draw the bounding boxes on the
for other in ped_detections:
other = other.astype(np.int32)
helpers.draw_box_label(img, other, other[4]) # Draw the bounding boxes on the
for trk in car_detections:
trk = trk.astype(np.int32)
p = np.array([[((trk[1] + trk[3]) / 2, (trk[0] + trk[2]) / 2)]], dtype=np.float32)
center_pt = cv2.perspectiveTransform(p, H)
ptx = center_pt.T.item(0)
pty = center_pt.T.item(1)
df_cars = compute(trk[4], ptx, pty, frame_idx, df_region, region_buffered, df_line)
genera_data_pd_cars = genera_data_pd_cars.append(df_cars)
for other in ped_detections:
other = other.astype(np.int32)
p = np.array([[((other[1] + other[3]) / 2, (other[0] + other[2]) / 2)]], dtype=np.float32)
center_pt = cv2.perspectiveTransform(p, H)
ptx = center_pt.T.item(0)
pty = center_pt.T.item(1)
df_peds = compute(other[4], ptx, pty, frame_idx, df_region, region_buffered, df_line)
genera_data_pd_peds = genera_data_pd_cars.append(df_peds)
query = "is_in_region == True and is_in_region_now == True"
df_peds = genera_data_pd_peds.query(query)
query = " is_in_region == True"
df_cars = genera_data_pd_cars.query(query)
if len(df_cars)> 1 and len(df_peds) > 1:
df_car_in_t_range_ped = select_slice(df_cars, df_peds)
df_ped_in_t_range_car = select_slice(df_peds, df_cars)
t_abs_crossing_car = df_cars['t_abs_at_crossing'].iloc[0]
t_abs_crossing_ped = df_peds['t_abs_at_crossing'].iloc[0]
dt_crossing = t_abs_crossing_car - t_abs_crossing_ped
is_able_to_pass_before_ped = \
((df_car_in_t_range_ped['t_abs_at_crossing_estimated'] -
t_abs_crossing_ped) > 0).any()
behavior = Behavior( # is_passed_before_ped
dt_crossing < 0,
# is_able_to_stop
df_car_in_t_range_ped['is_able_to_halt'].any(),
# is_too_fast
df_car_in_t_range_ped['is_too_fast'].any(),
# is_close_enough
df_car_in_t_range_ped['is_close_enough'].any(),
# is_able_to_pass_before_ped
is_able_to_pass_before_ped)
interaction = Interaction(trk[4], other[4])
interaction = interaction.assess_behavior(behavior)
code, res, msg = interaction.code, interaction.res, interaction.msg
print(msg)
genera_data_pd_cars = genera_data_pd_cars.iloc[0:0]
genera_data_pd_peds = genera_data_pd_peds.iloc[0:0]
lock.release()
return img

Multi-threading in python for CPU bound tasks is limited by GIL and effectively makes single thread run a time.
Ofcourse if you launch multiple threads for CPU bound tasks the performance is going to be even degraded because there is lot of overhead for both for kernel and python interpreter to manage these threads.
Kernel wants to schedule these threads and python wants to restrict these threads from running simultaneous and this results lot of context switches happening which degrades the performance.
If you are using just numpy in the threads then you would be fine as numpy isn't impacted by GIL since it uses atomic operations, but I am not sure if that is true for OpenCV as well.
Threads in python arn't meant for computation tasks.
This is classic problem of threads with python, consider using multiprocessing and there are number of articles on this topic, you might want to check few of them.

Threads aren't executed in parallel in cpython. Try using the ProcessPoolExecutor instead.

Related

opencv - cuda python: Erode and Dilate are slower on GPU then CPU. Why?

I try to move my code from CPU to Cuda on Opencv-python (v 4.4.5) with a Quadro P1000.
I experienced a massive gain in speed for background subtraction, but morphological operations are slower than on CPU.
There are several related questions to this, but most of them are related to c++ or do not propose any useful ideas. (Q1, Q2, Q3)
My code for testing returns that the CPU implementation for the morphological operator is at 20 fps, while the GPU implementation is at 10 FPS on 4k video stream.
import cv2
from vidgear.gears import VideoGear
from tqdm import tqdm
from time import time
#print(cv2.__file__)
cv2.cuda.setDevice(1)
cv2.cuda.printCudaDeviceInfo(1)
erosion_size = 3
erosion_shape = cv2.MORPH_RECT
element_erosion = cv2.getStructuringElement(erosion_shape, (2 * erosion_size + 1, 2 * erosion_size + 1),
(erosion_size, erosion_size))
dilatation_size = 3
dilation_shape = cv2.MORPH_RECT
element_dilation = cv2.getStructuringElement(dilation_shape, (2 * dilatation_size + 1, 2 * dilatation_size + 1),
(dilatation_size, dilatation_size))
def morphological_filter(img):
## erosion and dilation are faster on cpu ? WTH
erosion_dst = cv2.erode(img, element_erosion,iterations=1)
dilatation_dst = cv2.dilate(erosion_dst, element_dilation, iterations=3)
dilatation_dst = cv2.dilate(dilatation_dst, element_dilation)
return dilatation_dst
path = "path/to/vid"
cap = VideoGear(source=path,backend = cv2.CAP_FFMPEG).start()
backSub = cv2.cuda.createBackgroundSubtractorMOG2()
img_c = cv2.cuda_GpuMat()
img = cap.read()
img_c.upload(img)
img_c = cv2.cuda.cvtColor(img_c, cv2.COLOR_BGR2GRAY)
background = cv2.cuda_GpuMat(img_c.size(),img_c.type())
print(img_c.size())
dilation_cuda = cv2.cuda.createMorphologyFilter(cv2.MORPH_DILATE, img_c.type(), element_dilation)
erosion_cuda = cv2.cuda.createMorphologyFilter(cv2.MORPH_ERODE, img_c.type(), element_erosion)
def morphological_filter_gpu(img):
erosion_dst = erosion_cuda.apply(img)
dilatation_dst = dilation_cuda.apply(erosion_dst)
dilatation_dst = dilation_cuda.apply(dilatation_dst)
return dilatation_dst
start = time()
for i in tqdm(range(1000)):
img = cap.read()
if type(img) ==None:
break
img_c.upload(img)
img_c = cv2.cuda.cvtColor(img_c, cv2.COLOR_BGR2GRAY)
img_d = backSub.apply(img_c, -1, cv2.cuda_Stream.Null())
res = img_d.download()
res = morphological_filter(res)
end = time()
cv2.destroyAllWindows()
print("time elapsed: ", end-start)
print("FPS ",1000/(end-start))
path = "path/to/vid"
cap = VideoGear(source=path,backend = cv2.CAP_FFMPEG).start()
start = time()
for i in tqdm(range(1000)):
img = cap.read()
if type(img) ==None:
break
img_c.upload(img)
img_c = cv2.cuda.cvtColor(img_c, cv2.COLOR_BGR2GRAY)
img_d = backSub.apply(img_c, -1, cv2.cuda_Stream.Null())
img_d = morphological_filter_gpu(img_d)
res = img_d.download()
end = time()
cv2.destroyAllWindows()
print("time elapsed: ", end-start)
print("FPS ",1000/(end-start))
Now the final question: Is it due to the cuda implementation in opencv or do I use something wrong and if so: how to accelerate it?

Array creation too slow

I am trying to create an image array from scratch.
I got the code running but it takes arrounds 30 secs to run it.
I feel it could be faster by using numpy native functions.
How can I do this?
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
image = np.float32(final_image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
How can I do to make this image array creation faster?
I need to create the image every timesteps because I want to simulate real live data stream that come every new timesteps.
This is why I would like to optimize only this part of the code :
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
How can I do this?

First simplest optimizations are next:
Use comparing values to np.arange(...) instead of inner loop.
Use gray image instead of 3-channels RGB. 3 times less data to process.
Use np.uint8 type instead of np.float32, which is faster to process and doesn't need conversion to float32 for CV2 visualizing.
All these above optimizations give huge speedup (10x times), and my running time is 2.6 sec instead of 27 sec before.
Also another very useful optimization that I didn't do is that you don't need to recompute previous image pixels in a case when max/min of whole data within current window didn't change. You need to recompute previous image data only in the case if max/min changed. And I expect that your real-life data is gradually changing like Forex or Bitcoin prices, hence max/min change within a window is very non-often.
Optimizations 1)-3) mentioned above are implemented in the next code:
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
aranges = np.arange(image_heigh, dtype = np.int32)[:, None]
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
#final_image = np.zeros((image_heigh, image_width, image_channel), dtype = np.float32)
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
final_image = (aranges < vol_norm[None, :].astype(np.int32)).astype(np.uint8) * 255
# ===================== part to optimize end
if show_img:
cv2.imshow('ok', final_image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
For above code I just did one more optimization of inner loop which speed-up code above even 2x times more to have timings of 1.3 sec. But also I put back 3 channels plus float32, this reduced speed resulting in final 2.8 sec, here is the code
Another next optimization is possible if re-computing old images data is not needed.
Main thing to be optimized was that you were re-computing almost same whole image on each step with 1 pixel shift-step along width. Instead of this you need to compute whole image once, then shift right not 1 pixel but whole image width.
Then after this optimization running time is 0.08 sec.
And do 1 pixel stepping only for showing animation, not for computing image data, image data should be computed just once if you need speed.
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = volumes.size #256
image_channel = 3
screen_width = 256
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(0, len(volumes), image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
for start in range(0, final_image.shape[1] - screen_width):
image = np.float32(final_image[:, start : start + screen_width])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
I also created animation image out of your data:
If you want to create same animation just append next piece of code to the end of script above:
# Needs: python -m pip install pillow
import PIL.Image
imgs = [PIL.Image.fromarray(final_image[:, start : start + screen_width].astype(np.uint8) * 255) for start in range(0, final_image.shape[1] - screen_width, 6)]
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
I've implemented also simulation of real-time live stream data rendering/visualizing.
live_stream() generator spits out random amount of data at random points of time, this is to simulate data generation process.
stream_fetcher() listens to live stream and records all data received to python queue q0, this fetcher is run in one thread.
renderer() gets data recorded by fetcher and renders it into image through your mathematical formulas and normalization process, it renders as much data as available, resulting in images with varying widths, rendered images are saved to another queue q1.
visualizer() visualizes rendered data by fetching as much rendered images as available.
All functions run in separate threads not to block whole process. Also if any of threads works to slow then it skips some of data to catch-up with current real-time data, thus every queue doesn't overflow.
Also you may see that visualized process is jumpy, it is not because functions are somewhat slow, but because live stream spits out different amount of data in each time step, this is how usually real-time data may behave.
In the next code I did also extra optimization mentioned before, that is not-recomputing image if min/max didn't change.
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 20)).astype(np.float64).cumsum() + last
yield a
last = a[-1]
time.sleep(random.random() * 0.1)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
vols = np.concatenate(data)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put(whole_image)
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
imgs = []
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
image = np.concatenate(data, axis = 1)[:, -image_width:]
cv2.imshow('ok', image)
cv2.waitKey(1)
if imgs is not None:
try:
# Needs: python -m pip install pillow
import PIL.Image
has_pil = True
except:
has_pil = False
imgs = None
if has_pil:
imgs.append(PIL.Image.fromarray(np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)))
if len(imgs) >= 1000:
print('saving...', flush = True)
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
imgs = None
print('saved!', flush = True)
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Above live process simulation is rendered into result.png which I show down below:
I've also decided to improve visualization, by using more advanced matplotlib instead of cv2 to be able to show axes and doing real-time plot drawing. Visualization image is down below:
Next is a matplotlib-based code corresponding to last image above:
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
save_nsec = 20
dpi, fps = 100, 15
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
pos = 0
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 30)).astype(np.float64).cumsum() + last
yield a, pos, pos + a.size - 1
pos += a.size
last = a[-1]
time.sleep(random.random() * 2.2 / fps)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
data_vols = [e[0] for e in data]
data_minx, data_maxx = data[0][1], data[-1][2]
vols = np.concatenate(data_vols)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put((whole_image, data_maxx - whole_image.shape[1] + 1, data_maxx, vols_min, vols_max))
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
import matplotlib.pyplot as plt, matplotlib.animation
def images():
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
minx = min([e[1] for e in data])
maxx = min([e[2] for e in data])
miny = min([e[3] for e in data])
maxy = min([e[4] for e in data])
image = np.concatenate([e[0] for e in data], axis = 1)[:, -image_width:]
image = np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)
image = np.repeat(image[:, :, None], 3, axis = -1)
yield image, minx, maxx, miny, maxy
it = images()
im = None
fig = plt.figure(figsize = (image_width / dpi, image_height / dpi), dpi = dpi)
def animate_func(i):
nonlocal it, im, fig
image, minx, maxx, miny, maxy = next(it)
print(f'.', end = '', flush = True)
if im is None:
im = plt.imshow(image, interpolation = 'none', aspect = 'auto')
else:
im.set_array(image)
im.set_extent((minx, maxx, miny, maxy))
return [im]
anim = matplotlib.animation.FuncAnimation(fig, animate_func, frames = round(save_nsec * fps), interval = 1000 / fps)
print('saving...', end = '', flush = True)
#anim.save('result.mp4', fps = fps, dpi = dpi, extra_args = ['-vcodec', 'libx264'])
anim.save('result.gif', fps = fps, dpi = dpi, writer = 'imagemagick')
print('saved!', end = '', flush = True)
plt.show()
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Then I've decided to play a bit and colored last image with RGB palette, the higher the peak is more red-ish it is, if it is more in the middle then it is more green-ish, if it is low enough then it is more blue-ish. Resulting image below was achieved by this coloring code:
And another one colored animation below, line-style instead of bar-style, with the help of this code:

Python Script for Art Museum Installation intermittently locks up, Removing Thermal Camera Sensor read function seems to work?

I have a python script for an installation in an art museum that is meant to run continuously playing sounds, driving an LED matrix, and sensing people via OpennCV and a thermal camera.
Each of the parts of the script work and all of them work together but randomly the script locks up and I need to restart it. I want to script to not lock up so no one has to reset it during the exhibition.
I have the code running on a spare Raspberry Pi and a spare LED matrix and it continues to cycle through fine. The only changes that I made were commenting out the start of a thread to check the IR sensor and a call to a function to get the max temp from the sensor.
To be clear, if I leave these bits of code in the script runs fine 1 -3 or sometimes 10 times. But it seems to lock up in the first "state" when IRcount = 0
I am stuck. Any help is greatly appreciated.
```
#!/usr/bin/python
import glob
import queue
import sys
import pygame
import cv2
import random
import math
import colorsys
import time
from rpi_ws281x import *
from PIL import Image
import numpy as np
import threading
global thresh
sys.path.insert(0, "/home/pi/irpython/build/lib.linux-armv7l-3.5")
import MLX90640 as mlx
currentTime = int(round(time.time() * 1000))
InflateWait = int(round(time.time() * 1000))
minTime = 6000
maxTime = 12000
lineHeight1 = 0
lineHue1 = float(random.randrange(1,360))/255
# IR Functions
# Function to just grab the Max Temp detected. If over threshold then start
# the sequence, if not stay in state 0
def maxTemp():
mlx.setup(8) #set frame rate of MLX90640
f = mlx.get_frame()
mlx.cleanup()
# get max and min temps from sensor
# v_min = min(f)
v_max = int(max(f))
return v_max
# Function to detect individual people's heat blob group of pixels
# run in a thread only at the end of the script
def irCounter():
img = Image.new( 'L', (24,32), "black") # make IR image
mlx.setup(8) #set frame rate of MLX90640
f = mlx.get_frame()
mlx.cleanup()
for x in range(24):
row = []
for y in range(32):
val = f[32 * (23-x) + y]
row.append(val)
img.putpixel((x, y), (int(val)))
# convert raw temp data to numpy array
imgIR = np.array(img)
# increase the 24x32 px image to 240x320px for ease of seeing
bigIR = cv2.resize(depth_uint8, dsize=(240,320), interpolation=cv2.INTER_CUBIC)
# Use a bilateral filter to blur while hopefully retaining edges
brightBlurIR = cv2.bilateralFilter(bigIR,9,150,150)
# Threshold the image to black and white
retval, threshIR = cv2.threshold(brightBlurIR, 26, 255, cv2.THRESH_BINARY)
# Define kernal for erosion and dilation and closing operations
kernel = np.ones((5,5),np.uint8)
erosionIR = cv2.erode(threshIR,kernel,iterations = 1)
dilationIR = cv2.dilate(erosionIR,kernel,iterations = 1)
closingIR = cv2.morphologyEx(dilationIR, cv2.MORPH_CLOSE, kernel)
# Detect countours
contours, hierarchy = cv2.findContours(closingIR, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Get the number of contours ( contours count when touching edge of image while blobs don't)
ncontours = str(len(contours))
# Show images in window during testing
cv2.imshow("Combined", closingIR)
cv2.waitKey(1)
#initialize pygame
pygame.init()
pygame.mixer.init()
pygame.mixer.set_num_channels(30)
print("pygame initialized")
# assign sound chennels for pygame
channel0 = pygame.mixer.Channel(0)
channel1 = pygame.mixer.Channel(1)
channel2 = pygame.mixer.Channel(2)
channel3 = pygame.mixer.Channel(3)
channel4 = pygame.mixer.Channel(4)
channel5 = pygame.mixer.Channel(5)
channel6 = pygame.mixer.Channel(6)
channel7 = pygame.mixer.Channel(7)
channel8 = pygame.mixer.Channel(8)
channel9 = pygame.mixer.Channel(9)
channel10 = pygame.mixer.Channel(10)
channel11 = pygame.mixer.Channel(11)
channel12 = pygame.mixer.Channel(12)
channel13 = pygame.mixer.Channel(13)
channel14 = pygame.mixer.Channel(14)
channel15 = pygame.mixer.Channel(15)
channel16 = pygame.mixer.Channel(16)
channel17 = pygame.mixer.Channel(17)
channel18 = pygame.mixer.Channel(18)
channel19 = pygame.mixer.Channel(19)
channel20 = pygame.mixer.Channel(20)
channel21 = pygame.mixer.Channel(21)
channel22 = pygame.mixer.Channel(22)
channel23 = pygame.mixer.Channel(23)
channel24 = pygame.mixer.Channel(24)
channel25 = pygame.mixer.Channel(25)
channel26 = pygame.mixer.Channel(26)
channel27 = pygame.mixer.Channel(27)
channel28 = pygame.mixer.Channel(28)
# load soundfiles
echoballs = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/echo balls FIX.ogg")
organbounce = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/ORGAN BOUNCE fix.ogg")
jar = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/jar whoop fix.ogg")
garland = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/GARLAND_fix.ogg")
dribble= pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/dribble.ogg")
cowbell = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/cowbell fix.ogg")
clackyballs = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/clacky balls boucne.ogg")
burpees = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/burpees_fix.ogg")
brokensynth = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/broken synth bounce.ogg")
woolballs = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/wool balls in jar FIX.ogg")
wiimoye = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/wiimoye_fix.ogg")
warpyorgan = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/warpy organ bounce#.2.ogg")
vibrate = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/vibrate fix.ogg")
turtlesbounce = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/turtles fix.ogg")
timer = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/timer.ogg")
tape = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/tape fix.ogg")
tambourine = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/TAMBOURINE.ogg")
springybounce = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/springy bounce.ogg")
smash3 = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/smash fix.ogg")
bristle2 = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/BRISTLE FIX.ogg")
blackkeys = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/black keys FIX.ogg")
zipper = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/zipper.ogg")
presatisfactionsweep = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/pre-satisfaction sweep .ogg")
satisfaction = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/SATISFACTION.ogg")
altsatisfaction = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/alt_satisfaction_trimmed.ogg")
solosatisfaction = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/SOLO_SATISFACTION.ogg")
print("sound files loaded")
# initializing sounds list
soundsList = [echoballs, organbounce, zipper, jar, garland, dribble, cowbell, clackyballs, burpees, brokensynth, woolballs,
wiimoye, warpyorgan, vibrate, turtlesbounce, timer, tambourine, springybounce, smash3, bristle2, blackkeys, zipper ]
IRcount = 0 # define initial state for main loop
pygame.display.set_mode((32, 8))
print("pygame dispaly open")
# LED strip configuration:
LED_COUNT = 256 # Number of LED pixels.
LED_PIN = 18 # GPIO pin connected to the pixels (18 uses PWM!).
#LED_PIN = 10 # GPIO pin connected to the pixels (10 uses SPI /dev/spidev0.0).
LED_FREQ_HZ = 800000 # LED signal frequency in hertz (usually 800khz)
LED_DMA = 10 # DMA channel to use for generating signal (try 10)
LED_BRIGHTNESS = 100 # Set to 0 for darkest and 255 for brightest
LED_INVERT = False # True to invert the signal (when using NPN transistor level shift)
LED_CHANNEL = 0 # set to '1' for GPIOs 13, 19, 41, 45 or 53
# Define functions which animate LEDs in various ways.
# PNG to LED function used to shuffle througfh folders of numbered PNGs exported
# from animations created
def pngToLED (strip, pngfile):
RGBimage = Image.open(pngfile).convert('RGB')
np_image = np.array(RGBimage)
colours = [Color(x[0],x[1],x[2]) for rows in np_image for x in rows]
colours2d = np.reshape(colours, (32, 8), order='F')
colours2d[1::2, :] = colours2d[1::2, ::-1]
pic = colours2d.flatten('C')
for i in range( 0, strip.numPixels(), 1 ):# iterate over all LEDs - range(start_value, end_value, step)
strip.setPixelColor(i, int(pic[ i ]))
strip.show()
def colorWipe(strip, color,wait_ms=10):
"""Wipe color across display a pixel at a time."""
for i in range(strip.numPixels()):
strip.setPixelColor(i, color)
strip.show()
time.sleep(1)
def theaterChase(strip, color, wait_ms, iterations=10):
"""Movie theater light style chaser animation."""
for j in range(iterations):
for q in range(3):
for i in range(0, strip.numPixels(), 3):
strip.setPixelColor(i+q, color)
strip.show()
time.sleep(wait_ms/1000.0)
for i in range(0, strip.numPixels(), 3):
strip.setPixelColor(i+q, 0)
def wheel(pos):
"""Generate rainbow colors across 0-255 positions."""
if pos < 85:
return Color(pos * 3, 255 - pos * 3, 0)
elif pos < 170:
pos -= 85
return Color(255 - pos * 3, 0, pos * 3)
else:
pos -= 170
return Color(0, pos * 3, 255 - pos * 3)
def rainbow(strip, wait_ms=20, iterations=1):
"""Draw rainbow that fades across all pixels at once."""
for j in range(256*iterations):
for i in range(strip.numPixels()):
strip.setPixelColor(i, wheel((i+j) & 255))
strip.show()
time.sleep(wait_ms/1000.0)
def rainbowCycle(strip, wait_ms=20, iterations=5):
"""Draw rainbow that uniformly distributes itself across all pixels."""
for j in range(256*iterations):
for i in range(strip.numPixels()):
strip.setPixelColor(i, wheel((int(i * 256 / strip.numPixels()) + j) & 255))
strip.show()
time.sleep(wait_ms/1000.0)
def theaterChaseRainbow(strip, wait_ms=90):
"""Rainbow movie theater light style chaser animation."""
for j in range(256):
for q in range(3):
for i in range(0, strip.numPixels(), 3):
strip.setPixelColor(i+q, wheel((i+j) % 255))
strip.show()
time.sleep(wait_ms/1000.0)
for i in range(0, strip.numPixels(), 3):
strip.setPixelColor(i+q, 0)
# Plasma LED Function from Root 42
def plasmaLED (plasmaTime):
h = 8
w = 32
out = [ Color( 0, 0, 0 ) for x in range( h * w ) ]
plasmaBright = 100.0
for x in range( h ):
for y in range( w ):
hue = (4.0 + math.sin( plasmaTime + x ) + math.sin( plasmaTime + y / 4.5 ) \
+ math.sin( x + y + plasmaTime ) + math.sin( math.sqrt( ( x + plasmaTime ) ** 2.0 + ( y + 1.5 * plasmaTime ) ** 2.0 ) / 4.0 ))/8
hsv = colorsys.hsv_to_rgb( hue , 1, 1 )
if y % 2 == 0: #even
out[ x + (h * y)] = Color( *[ int( round( c * plasmaBright ) ) for c in hsv ] )
else: #odd
out[ (y * h) + (h -1 -x) ] = Color( *[ int( round( c * plasmaBright ) ) for c in hsv ] )
for i in range( 0, strip.numPixels(), 1 ):# iterate over all LEDs - range(start_value, end_value, step)
strip.setPixelColor(i, out[ i ]) # set pixel to color in picture
strip.show()
# variables for plasma
plasmaTime = 5.0 # time
plasmaSpeed = 0.05 # speed of time
# thread for IRcounter function
class TempTask:
def __init__(self):
self.ir_temp = 0
self.lock = threading.Lock() #control concurrent access for safe multi thread access
self.thread = threading.Thread(target=self.update_temp)
def update_temp(self):
while True:
with self.lock:
self.ir_temp = irCounter()
time.sleep(0.1)
def start(self):
self.thread.start()
# Millis timer count function
def CheckTime( lastTime, wait):
if currentTime - lastTime >= wait:
lastTime += wait
return True
return False
# Main program logic follows:
if __name__ == '__main__':
# not currently starting the trhead because program is locking up without it
# want to figure out initial problem first
#start thread
#task = TempTask()
#task.start()
# Create NeoPixel object with appropriate configuration.
strip = Adafruit_NeoPixel(LED_COUNT, LED_PIN, LED_FREQ_HZ, LED_DMA, LED_INVERT, LED_BRIGHTNESS, LED_CHANNEL)
# Intialize the library (must be called once before other functions).
strip.begin()
print ('Press Ctrl-C to quit.')
try:
while True:
currentTime = int(round(time.time() * 1000))
if IRcount == 0:
#random solid color
colorWipe(strip, Color(random.randint(60,255), random.randint(60,255), random.randint(60,255)))
# use random.sample() to shuffle sounds list
shuffledSounds = random.sample(soundsList, len(soundsList))
if pygame.mixer.Channel(0).get_busy() == False: channel0.play(shuffledSounds[0],loops = -1)
thresh = 0
'''
# the threshold check below is the only thing I have taken out of
# Program on my test Raspberry Pi. It seems to not lock up without it
# not sure why this would be a problem.
thresh = int(maxTemp())
print (thresh)
if thresh >= 27:
InflateWait = int(round(time.time() * 1000))
print (thresh)
IRcount = 1
print("Threshold Temp Detected: Begin Sound Sequence")
else:
IRcount = 0
'''
if CheckTime(InflateWait,random.randint(minTime, maxTime)):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 1:
LEDimages = glob.glob("/home/pi/ruff-wavs/Crystal_Mirror/*.png")
for LEDimage in sorted(LEDimages):
pngToLED (strip, LEDimage)
if pygame.mixer.Channel(1).get_busy() == False:
channel1.play(shuffledSounds[1],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 2:
LEDimages = glob.glob("/home/pi/ruff-wavs/Mercury_Loop/*.png")
for LEDimage in sorted(LEDimages):
pngToLED (strip, LEDimage)
if pygame.mixer.Channel(2).get_busy() == False:
channel2.play(shuffledSounds[2],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 3:
LEDimages = glob.glob("/home/pi/ruff-wavs/Pink_Lava/*.png")
for LEDimage in sorted(LEDimages):
pngToLED (strip, LEDimage)
if pygame.mixer.Channel(3).get_busy() == False:
channel3.play(shuffledSounds[3],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 4:
LEDimages = glob.glob("/home/pi/ruff-wavs/Horiz_Mosaic/*.png")
for LEDimage in sorted(LEDimages):
pngToLED (strip, LEDimage)
if pygame.mixer.Channel(4).get_busy() == False:
channel4.play(shuffledSounds[4],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 5:
plasmaLED()
plasmaTime = plasmaTime + plasmaSpeed # increment plasma time
if pygame.mixer.Channel(5).get_busy() == False:
channel5.play(shuffledSounds[5],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 6:
LEDimages = glob.glob("/home/pi/ruff-wavs/Radio_Loop/*.png")
for LEDimage in sorted(LEDimages):
pngToLED (strip, LEDimage)
if pygame.mixer.Channel(6).get_busy() == False:
channel6.play(shuffledSounds[6],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 7:
LEDimages = glob.glob("/home/pi/ruff-wavs/Star_Loop/*.png")
for LEDimage in sorted(LEDimages):
pngToLED (strip, LEDimage)
if pygame.mixer.Channel(7).get_busy() == False:
channel7.play(shuffledSounds[7],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
elif IRcount == 14:
plasmaLED()
plasmaTime = plasmaTime + plasmaSpeed # increment plasma time
if pygame.mixer.Channel(14).get_busy() == False:
channel14.play(shuffledSounds[14],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
print (thresh)
elif IRcount == 15:
plasmaLED()
plasmaTime = plasmaTime + plasmaSpeed # increment plasma time
if pygame.mixer.Channel(15).get_busy() == False:
channel15.play(shuffledSounds[15],loops = -1)
waitTime = random.randint(minTime, maxTime)
if CheckTime(InflateWait,waitTime):
InflateWait = int(round(time.time() * 1000))
IRcount += 1
print(IRcount)
elif IRcount == 16:
# random color theater chase increment random ms to speed up with sounds
theaterChase(strip, Color(random.randint(1,255), random.randint(1,255), random.randint(1,255)), random.randint(40,50))
pygame.mixer.fadeout(45000)
if pygame.mixer.Channel(22).get_busy() == False:
channel22.play(presatisfactionsweep)
IRcount = 17
print(IRcount)
print("sweep end start")
elif IRcount == 18:
# random color theater chase increment random ms to speed up with sounds
theaterChase(strip, Color(random.randint(1,255), random.randint(1,255), random.randint(1,255)), random.randint(30,40))
if pygame.mixer.Channel(22).get_busy() == False:
pygame.mixer.stop()
channel23.play(satisfaction)
IRcount = 19
print(IRcount)
print("Play Satisfaction Sount")
elif IRcount == 19:
rainbowCycle(strip, 5)
if pygame.mixer.Channel(23).get_busy() == False: IRcount = 0
except KeyboardInterrupt:
colorWipe(strip, Color(0,0,0), 1)
pygame.mixer.stop()
pygame.quit()
```
Update 1 - Suspected Function(s)
When I left the script run overnight and came to the exhibit in the morning it would be stuck in the 1st state IRcount = 0 The only things that happen in that state is the maxTemp() function to get the max temp, the LED color wipe function to cycle colors.
When I would come in in the morning it would be stuck, playing a single sound from pygame, as it should, but it would not be cycling colors. I removed the maxTemp() from my test Pi and it has been working fine.
def maxTemp():
mlx.setup(8) #set frame rate of MLX90640
f = mlx.get_frame()
mlx.cleanup()
# get max and min temps from sensor
# v_min = min(f)
v_max = int(max(f))
return v_max
Update # 2
I thought that the thread might be the problem so I commented out the thread start call. That is why I made the simpler maxTemp() function to see if that would work better than the thread. So when I was using the max temp then the thread wasn't being called.
I don't understand threads very well. Is it possible to have the max temp variable update continuously and have the simple OpenCV numPy manipulations running continuously? That would be ideal. When I originally added the thread it seemed to stop after a few cycles.
I do not have a join on the thread. I know threads don't "restart" but do I need to call it again as the state machine starts again?
# not currently starting the thread because program is locking up without it
# want to figure out initial problem first
#start thread
#task = TempTask()
#task.start()
Update #3
I Uploaded new code that eliminated the duplicate functions. Everything is handled in the thread temp.task now. That seems to work fine. I also put the github suggestion of polling the thermal sensor if the image is a duplicate but that has not happened.
I left the program run over night and when I came in in the morning it was locked up. The SD card is set to read only mode. I ssh'd into the pi. I have my auto start python script in /etc/profile
It seems to start the script each time I log into ssh. When I logged in this morning to see if the pi was still up it game an out of memory error.
```
Traceback (most recent call last):
File "/home/pi/ruff-wavs/shufflewavdemo.py", line 210, in <module>
altsatisfaction = pygame.mixer.Sound("/home/pi/ruff-wavs/sounds/alt_satisfaction_trimmed.ogg")
pygame.error: Unable to open file '/home/pi/ruff-wavs/sounds/alt_satisfaction_trimmed.ogg'
OSError: [Errno 28] No space left on device
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.5/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.5/dist-packages/virtualenvwrapper/hook_loader.py", line 223, in <module>
main()
File "/usr/local/lib/python3.5/dist-packages/virtualenvwrapper/hook_loader.py", line 145, in main
output.close()
OSError: [Errno 28] No space left on device
-bash: cannot create temp file for here-document: No space left on device
Could that be because it is in read only mode?
I used this script to switch from writable to read only and back.
[https://github.com/JasperE84/root-ro][1]
[1]: https://github.com/JasperE84/root-ro

I suspect the issue is that you're accessing the mlx device both in the main thread via maxTemp() as well as in the irCounter() thread. The fact that it works when you take out the maxTemp call, and that that call happens in the if IRcount == 0: state supports this.
I would add the maxTemp functionality to the irCounter thread, so that accessing it from only a single thread; and update a global variable (protected by a lock) with the maxTemp results if you need to retain this functionality.

pool map no performance gain

I'm doing video capture from opencv using the following
frame_count = 90000
while f < frame_count:
ret, frame = cap.read()
f+=1
if (f > 41300):
dst = cv2.warpPerspective(frame.copy(), matrix, (frame.shape[1], frame.shape[0]))
pipeline(pool_1, pool_2, frame, car_tracker, ped_tracker, df_region, region_buffered, df_line_car, df_line_ped, det, dst, matrix)
cv2.imshow("frame", frame)
cv2.imshow('dst', dst)
cv2.waitKey(1)
In function pipeline:
df_cars= pool_1.map(compute, cars)
df_peds = pool_2.map(compute, peds)
compute is:
def compute(v):
gb = ('trajectory_id',)
global general_pd
id = v[0]
x = v[1]
y = v[2]
frame_id = v[3]
general_pd.loc[len(general_pd)] = [id, x,y, frame_id]
grouped = general_pd.loc[general_pd['trajectory_id'] == id]
df_region = v[4]
region_buffered = v[5]
df_line = v[6]
rpp = RawParameterProcessor(grouped, df_line, frame_idx, df_region, region_buffered, gb=gb)
df_parameter_car = rpp.compute()
return df_parameter_car
I don't know what I'm doing wrong, so that I can't get the correct performance gain, I launch two process, and each frame of the video capture I launch a process and do the jobs async, but I don't get any performance gain.

Python multiprocess inside of tkinter - won't work in Windows but in Linux

I have an application in Tkinter.
Part of this application is a method:
It basically takes long lists of random values and checks if the random values are inside of a previously defined grid. Afterwards it writes them into another variable to export it.
This is a rather long process. So I would like to multiprocess it.
Read some stuff about how to do that. Here's the resulting code:
I've read around SO for stuff that might be relevant. I am running an up-to-date Spyder with Python 3.7 as part of the Anaconda-suite on both machines, all (at least included) packages are up-to-date and I've included the
if __name__ == '__main__':
-line. I've also experimented with indentation of
p.start()
and
processes.append(p)
Simply can't get it to work.
def ParallelStuff(myIn1, myIn2, myIn3, myIn4, anotherIn1, anotherIn2, anotherIn3, return_dict, processIterator):
tempOut1 = np.zeros(len(myIn1)) # myIn1, myIn2, myIn3 are of the same length
tempOut2 = np.zeros(len(myIn1))
tempOut3 = np.zeros(len(myIn1))
bb = 0
for i in range(len(myIn3)):
xx = myIn3[i]
yy = myIn4[i]
hits = np.isin(anotherIn1, xx)
goodY = anotherIn3[np.where(hits==1)]
if np.isin(yy, goodY):
tempOut1[bb] = myIn1[i]
tempOut2[bb] = myIn2[i]
tempOut3[bb] = anotherIn3
bb += 1
return_dict[processIterator] = [tempOut1, tempOut1, tempOut3]
nCores = multiprocessing.cpu_count()
def export_Function(self):
out1 = np.array([])
out2 = np.array([])
out3 = np.array([])
for loop_one in range(0, N):
# ...
# stuff that works on both systems with only one core...
# ... and on linux with all cores
processes = []
nTotal = int(len(xRand))
if nTotal%nCores == 0:
o = int(nTotal/nCores)
else:
o = int(nTotal/(nCores-1))
manager = multiprocessing.Manager()
return_dict = manager.dict()
for processIterator in range (nCores):
offset = o*i
myIn1 = in1[offset : min(nTotal, offset + o)]
myIn2 = in2[offset : min(nTotal, offset + o)]
myIn3 = in3[offset : min(nTotal, offset + o)]
myIn4 = in4[offset : min(nTotal, offset + o)]
if __name__ == '__main__':
p = multiprocessing.Process(target = ParallelStuff, args = (myIn1, myIn2, myIn3, myIn4, anotherIn1, anotherIn2, anotherIn3, return_dict, processIterator))
p.start()
processes.append(p)
for p in range(len(processes)):
processes[p].join()
myOut1 = return_dict[p][0]
myOut2 = return_dict[p][1]
myOut3 = return_dict[p][2]
out1 = np.concatenate((out1, myOut1[np.where(myOut1 != 0)]))
out2 = np.concatenate((out2, myOut2[np.where(myOut2 != 0)]))
out3 = np.concatenate((out3, myOut3[np.where(myOut3 != 0)]))
When I run my programm on my Linux machine it does exactly what it's supposed to do. Distribute to all 8 cores, computes, concatenates the 3 results in the respective arrays, exports.
When I run my programm on my Windows machine the application's window freezes, the process becomes inactive, a new kernel automatically opens and a new window appears.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Thread pool is slow and same speed as serial - python

Threads aren't executed in parallel in cpython. Try using the ProcessPoolExecutor instead.

Related

opencv - cuda python: Erode and Dilate are slower on GPU then CPU. Why?

Array creation too slow

Python Script for Art Museum Installation intermittently locks up, Removing Thermal Camera Sensor read function seems to work?

pool map no performance gain

Python multiprocess inside of tkinter - won't work in Windows but in Linux

Categories

Resources