Dask delayed + Matplotlib.savefig() -> FAIL - python

My goal is to produce multiple png files from respectively multiple numpy arrays, loaded from medical images in my HD.
To make things quicker, I'm using dask delayed.
Here's my working code:
import os.path
from glob import glob
import nibabel as nib
import numpy as np
from dask import delayed
def process(data):
# Need to have the import inside so that multiprocessing works.
# Apparently doesn't solve the issue anyway..
import matplotlib.pyplot as plt
outpath = '/Users/user/outputdir/'
name = os.path.basename(data.get_filename())
savename = name[:name.index('.')] + '.png'
plt.imshow(np.rot90(data.get_data()[15:74, 6:82, 18, 0]),
extent=[0, 1, 0, 1], aspect=1.28, cmap='gray')
plt.axis('off')
out = os.path.join(outpath, savename)
plt.savefig(out)
plt.close()
return out
L = []
for fn in glob("/Users/user/imagefiles/mb*.nii.gz"):
nifti = delayed(nib.load)(fn)
outpng = delayed(process)(nifti)
L.append(outpng)
results = delayed(print)(L)
results.compute()
My problem is that after each run some of the output images are empty (nothing in the png), and which images are empty seem pretty random, since all input data is valid.
I suspect this is a problem of multiprocessing and matplotlib, as seen in other related thread.
Does anyone have a suggestion on how to get this working with dask?
EDIT: Minimal working example
import os.path
import random
import string
import numpy as np
from dask import delayed
def gendata(fn):
return
def process(data):
# Need to have the import inside so that multiprocessing works.
import matplotlib.pyplot as plt
outpath = '/Users/user/Pictures/test/'
name = ''.join(random.choices(string.ascii_lowercase, k=10))
savename = name + '.png'
data = np.random.randint(0, 255, size=(100,100,20,2))
plt.imshow(np.rot90(data[15:74, 6:82, 18, 0]),
extent=[0, 1, 0, 1], aspect=1.28, cmap='gray')
plt.axis('off')
out = os.path.join(outpath, savename)
plt.savefig(out)
plt.close()
return out
L = []
for fn in range(0, 10):
nifti = delayed(gendata)(fn)
outpng = delayed(process)(nifti)
L.append(outpng)
results = delayed(print)(L)
results.compute()

Related

scipy and numpy not error with as_matrix function

This is a "working example" that does not work. Why does this not run? scipy seems to not work.
i get this error:
File "display_map.py", line 35, in
rot_cw = R.from_quat(keyframe["rot_cw"]).as_matrix()
AttributeError: 'Rotation' object has no attribute 'as_matrix'
please can someone help me me change it. I tried reducing the version of scipy
import msgpack
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from numpy.linalg import inv
from scipy.spatial.transform import Rotation as R
import open3d as o3d
import sys
if len(sys.argv) < 2:
print(
"ERROR: Please provide path to .msg file. Example usage is; python3 visualize_openvslam_map.py path_to.msg"
)
exit()
with open(sys.argv[1], "rb") as f:
upacked_msg = msgpack.Unpacker(f)
packed_msg = upacked_msg.unpack()
keyfarmes = packed_msg["keyframes"]
landmarks = packed_msg["landmarks"]
# FILL IN KEYFRAME POINTS(ODOMETRY) TO ARRAY
keyframe_points = []
keyframe_points_color = []
for keyframe in keyfarmes.values():
# get conversion from camera to world
trans_cw = np.matrix(keyframe["trans_cw"]).T
rot_cw = R.from_quat(keyframe["rot_cw"]).as_matrix()
# compute conversion from world to camera
rot_wc = rot_cw.T
trans_wc = -rot_wc * trans_cw
keyframe_points.append((trans_wc[0, 0], trans_wc[1, 0], trans_wc[2, 0]))
keyframe_points = np.array(keyframe_points)
keyframe_points_color = np.repeat(np.array([[0., 1., 0.]]),
keyframe_points.shape[0],
axis=0)
# FILL IN LANDMARK POINTS TO ARRAY
landmark_points = []
landmark_points_color = []
for lm in landmarks.values():
landmark_points.append(lm["pos_w"])
landmark_points_color.append([
abs(lm["pos_w"][1]) * 4,
abs(lm["pos_w"][1]) * 2,
abs(lm["pos_w"][1]) * 3
])
landmark_points = np.array(landmark_points)
landmark_points_color = np.array(landmark_points_color)
# CONSTRUCT KEYFRAME(ODOMETRY) FOR VISUALIZTION
keyframe_points_pointcloud = o3d.geometry.PointCloud()
keyframe_points_pointcloud.points = o3d.utility.Vector3dVector(keyframe_points)
keyframe_points_pointcloud.colors = o3d.utility.Vector3dVector(
keyframe_points_color)
# CONSTRUCT LANDMARK POINTCLOUD FOR VISUALIZTION
landmark_points_pointcloud = o3d.geometry.PointCloud()
landmark_points_pointcloud.points = o3d.utility.Vector3dVector(landmark_points)
landmark_points_pointcloud.colors = o3d.utility.Vector3dVector(
landmark_points_color)
# VISULIZE MAP
o3d.visualization.draw_geometries([
keyframe_points_pointcloud, landmark_points_pointcloud,
o3d.geometry.TriangleMesh.create_coordinate_frame()
])
In scipy.spatial.Rotation methods from_dcm, as_dcm were renamed to from_matrix, as_matrix respectively.

Python 3.7 : multiprocessing a for loop with shared variables

first a bit of context :
I'm trying to write down a python script to convert Image in greyscale (.tif) to a .jpeg with the so called ''jet'' colormap. I managed to do it with a for loop but it's a bit long for one image (millions of pixels to treat !), so I would like to use multiprocessing.
My problem here is that to convert each grey pixel into a coloured one I have to use two variables (the minimum value of light intensity ''min_img'' and an vector ''dx_cm'' to go from the initial grey scale to a 256 scale, corresponding to the jet colormap).
So to pass the information of ''min_img'' and ''dx_cm'' to the processes I try to use multiprocessing.Value() but in return I get the error :
RuntimeError: Synchronized objects should only be shared between processes through inheritance
I tried many different things from different sources and no matter the version of my code I'm struggling with that error. So I'm sorry if my code isn't clean, I would be very grateful if someone could help me with that.
My non-working code :
import multiprocessing
from PIL import Image
from matplotlib import cm
def fun(gr_list,dx,minp):
dx_cmp = dx.value
min_imgp = minp.value
rgb_res=list()
for i in range(len(gr_list)):
rgb_res.extend(cm.jet(round(((gr_list[i]-min_imgp)/dx_cmp)-1))[0:-1])
return rgb_res
if __name__ == '__main__':
RGB_list=list()
n = multiprocessing.cpu_count()
img = Image.open(r'some_path_to_a.tif')
Img_grey=list(img.getdata())
dx_cm = multiprocessing.Value('d',(max(Img_grey)-min(Img_grey))/256)
min_img = multiprocessing.Value('d',min(Img_grey))
with multiprocessing.Pool(n) as p:
RGB_list = list(p.map(fun, (Img_grey,dx_cm,min_img)))
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in RGB_list]))
res.save('rgb_file.jpg')
PS : Here is an example of the the initial for loop that I would like to parallelize :
from PIL import Image
from matplotlib import cm
if __name__ == '__main__':
img = Image.open(r'some_path_to_a.tif')
Img_grey = list(img.getdata())
dx_cm = (max(Img_grey)-min(Img_grey))/256
min_img = min(Img_grey)
Img_rgb = list()
for i in range(len(Img_grey)):
Img_rgb.extend(cm.jet(round(((Img_grey[i]-min_img)/dx_cm)-1))[0:-1])
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in Img_rgb]))
res.save('rgb_file.jpg')
Your fun method is looping over some list, but in this case it will receive a "part", an item from your list, so it should return only the result of its processing.
I have changed the working code to run with multiprocessing.
As the fun method returns a list, the p.map will return a list of lists (a list of results) and that need to be flatten, were done with list extends method before.
Tried with process pool and thread pool multiprocessing, in my scenario there wasn't any performance gains.
Process multiprocessing:
from PIL import Image
from matplotlib import cm
import multiprocessing
def fun(d):
part, dx_cm, min_img = d
return cm.jet(round(((part-min_img)/dx_cm)-1))[0:-1]
if __name__ == '__main__':
img = Image.open(r'a.tif')
Img_grey = list(img.getdata())
def Gen(img_data):
dx_cm = (max(img_data)-min(img_data))/256
min_img = min(img_data)
for part in img_data:
yield part, dx_cm, min_img
n = multiprocessing.cpu_count()
with multiprocessing.Pool(n) as p:
Img_rgb = [item for sublist in p.map(fun, Gen(Img_grey)) for item in sublist]
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in Img_rgb]))
res.save('b.jpg')
Thread multiprocessing:
from PIL import Image
from matplotlib import cm
import multiprocessing
from multiprocessing.pool import ThreadPool
if __name__ == '__main__':
img = Image.open(r'a.tif')
Img_grey = list(img.getdata())
dx_cm = (max(Img_grey)-min(Img_grey))/256
min_img = min(Img_grey)
def fun(part):
return cm.jet(round(((part-min_img)/dx_cm)-1))[0:-1]
n = multiprocessing.cpu_count()
with ThreadPool(n) as p:
Img_rgb = [item for sublist in p.map(fun, Img_grey) for item in sublist]
res = Image.frombytes("RGB", (img.size[0], img.size[1]), bytes([int(0.5 + 255*i) for i in Img_rgb]))
res.save('b.jpg')
So it seems that the computational burden isn't big enough for multiprocessing to be helpful.
Nevertheless, for those coming across this topic interested in the image processing part of my question, I found another much quicker way (15 to 20 x than previous method) to do the same thing without a for loop :
from matplotlib import cm
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
from PIL import Image
cm_jet = cm.get_cmap('jet')
img_src = Image.open(r'path to your grey image')
img_src.mode='I'
Img_grey = list(img_src.getdata())
max_img = max(Img_grey)
min_img = min(Img_grey)
rgb_array=np.uint8(cm_jet(((np.array(img_src)-min_img)/(max_img-min_img)))*255)
ax = plt.subplot(111)
im = ax.imshow(rgb_array, cmap='jet')
divider = make_axes_locatable(ax)
cax_plot = divider.append_axes("right", size="5%", pad=0.05)
cbar=plt.colorbar(im, cax=cax_plot, ticks=[0,63.75,127.5,191.25,255])
dx_plot=(max_img-min_img)/255
cbar.ax.set_yticklabels([str(min_img),str(round(min_img+63.75*dx_plot)),str(round(min_img+127.5*dx_plot)),str(round(min_img+191.25*dx_plot)), str(max_img)])
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
plt.savefig('test_jet.jpg', quality=95, dpi=1000)

How to display a set of files with MatPlotLib in a loop

I have a set of .txt named "occupancyGrid_i", i being a number from 0-100.
What I'd like to do is to open every one of them and show them for 3 seconds. The data of the .txt is a [N x M] matrix.
import numpy
import matplotlib.pyplot as plt
import time
while True:
matrix = numpy.loadtxt('res/matrix_' + str(i) + '.txt')
plt.clf()
plt.imshow(matrix)
plt.show()
time.sleep(3)
i=i+1
What I have done so far doesn't seem to be enough. What am I doing wrong?
You can try something like this, adapting the code suggested in this answer:
import os
import numpy as np
import pylab as plt
N_IMAGES = 100
VMIN, VMAX = 0, 1 # range of values in matrices
i = 0
while True:
if i < N_IMAGES:
path = 'res/matrix_' + str(i) + '.txt'
if os.path.exists(path): # check if file exists
matrix = np.loadtxt('matrices/matrix_' + str(i) + '.txt')
plt.imshow(matrix, vmin=VMIN, vmax=VMAX)
plt.title("Matrix {}".format(i))
plt.pause(3)
i += 1
else:
# terminate you program or start from the beginning
break
# i = 0
# continue
I dont know what exactly your goal is. But to display text in matplotlib you can use text from pyplot.
`
import numpy
import matplotlib.pyplot as plt
import time
for i in range(1,5):
s = ''
with open(str(i)+'.txt','r') as f:
s=f.read()
plt.text(0.5, 0.67,s,transform=plt.gca().transAxes)
plt.show()
time.sleep(3)
First 2 argument (0.5 ,0.67) are cordinate of displayed text.
I think you should find some other way of displaying text. Just print them on your console, plotting them is not the best way to represent text data.

python program to convert mapper and reducer program for hadoop

I have two programs that the first program is about to convert .h5 file to .tiff file and second program is predict temperature using arima model.
here is first program for converting .h5 file to .tiff file and min and max .tiff and it also call geo tiff.
from osgeo import gdal
import numpy as np
import os
import h5py
from collections import defaultdict
from osgeo import osr
import datetime
in_dir = r'/Users/sunnybhargav/Desktop/jan'
out_dir = r'/Users/sunnybhargav/Desktop/new_output'
#in_dir = input('Enter input directory path where hdf files are stored: ')
#out_dir = input('Enter output directory path where geotiff files are to be stored: ')
def arrayToTif(array,tifFilePath,proj,transform,nodatavalue):
with open(tifFilePath,'a') as file:
pass
# write raster
out_ds = gdal.GetDriverByName('GTiff').Create(tifFilePath,
array.shape[1],
array.shape[0],
1, #Number of bands
gdal.GDT_Float32)
out_ds.GetRasterBand(1).WriteArray(array)
out_ds.GetRasterBand(1).SetNoDataValue(nodatavalue)
# close tif to write into disk (free tif file)
out_ds = None
dates_dict = defaultdict(list)
for root,directories,filenames in os.walk(in_dir):
for filename in filenames:
if (filename.endswith('.h5')):
hdffileDate = filename[6:15]
hdfdate = (int(hdffileDate[0:2]))
dates_dict[hdfdate].append(filename)
print(filename)
for key in dates_dict.keys():
file_list = dates_dict[key]
min_lst = 1000*np.ones((2816,2805))
max_lst = -1000*np.ones((2816,2805))
for v in file_list:
hdf_ds = h5py.File(os.path.join(in_dir,v))
lst = np.array(hdf_ds['LST'])[0,:,:]
hdf_ds = gdal.Open(os.path.join(in_dir,v))
metadata = hdf_ds.GetMetadata_Dict()
lst = lst.astype('Float32')
max_lst = np.maximum(max_lst,lst)
lst[lst==-999] = 999
min_lst = np.minimum(min_lst,lst)
min_lst[min_lst==999] = -999
transform = (0,1,0,0,0,-1)
proj = None
nodatavalue = -999
tiffileDate = v[6:15]
MinName = 'MIN' +v[0:2]+str.lower(tiffileDate) + '.tif'
MaxName = 'MAX' +v[0:2]+str.lower(tiffileDate) + '.tif'
arrayToTif
(max_lst,os.path.join(out_dir,MaxName),proj,transform,nodatavalue)
arrayToTif
(min_lst,os.path.join(out_dir,MinName),proj,transform,nodatavalue)
del lst
del min_lst
del max_lst
second program
in second program is for tiff to get ndarray and then get out put for particular all max temprature and predict next 5 day prediction.
import pandas as pd
import seaborn as sns
import matplotlib
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from numpy import genfromtxt
import csv
import datetime
from datetime import datetime
import time
from matplotlib import pyplot
from pandas import Series
from statsmodels.tsa.arima_model import ARIMA
import numpy
from statsmodels.tsa.stattools import adfuller
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import acf, pacf
from sklearn.metrics import mean_squared_error
import numpy as np
import subprocess
import gdal,osr
from gdalconst import *
import os
import numpy as np
from PIL import Image
import scipy.misc
from datetime import datetime
# import timeseries as ts
count = 1
max_temp = []
min_temp = []
filename = []
filenamer = []
max_temp_points = []
min_temp_points = []
source = r'/Volumes/bhargav 1/data/NEW_MAX'
for root, dirs, filenames in os.walk(source):
print(filenames)
for f in filenames:
print (f)
dataset = gdal.Open( source + '//' + f ,gdal.GA_ReadOnly)
#print(dataset)
geotransform = dataset.GetGeoTransform()
band = dataset.GetRasterBand(1)
data = band.ReadAsArray(0,0,dataset.RasterXSize,dataset.RasterYSize).astype(np.float64)
#print(np.histogram(data,bins=500))
print(np.shape(data))
max_temp_point = data[793][1160]
max_temp_point = max_temp_point - 273
print(max_temp_point)
print("Count:",count)
max_temp_points.append(max_temp_point)
count = count + 1
print(np.shape(max_temp_points))
print(np.mean(max_temp_points))
count = 1
np.save("Max_temp_points_1",max_temp_points)
X = max_temp_points
model = ARIMA(X, order=(5,0,4))
model_fit = model.fit(disp=-1)
# print summary of fit model
print(model_fit.summary())
forecast = model_fit.predict()
print (forecast)
# plot
start_index = len(X)
end_index = start_index + 6
predict_val = model_fit.predict(start=start_index, end=end_index)
print('Prediction:',predict_val)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()

how to use multi-threading for optimizing face detection?

I have a code which uses a list of image URLs from a CSV file and then performs face detection on those images after which it loads some models and does predictions on those images.
I did some load tests and found that the get_face function in the code takes a major chunk of the time required to produce the results and the extra time is taken by the pickle file created for predictions.
Question: Is there a possibility that by running these processes in threads, time can be reduced and also how this can be done in a multi threading way?
Here is the code example:
from __future__ import division
import numpy as np
from multiprocessing import Process, Queue, Pool
import os
import pickle
import pandas as pd
import dlib
from skimage import io
from skimage.transform import resize
df = pd.read_csv('/home/instaurls.csv')
detector = dlib.get_frontal_face_detector()
img_width, img_height = 139, 139
confidence = 0.8
def get_face():
output = None
data1 = []
for row in df.itertuples():
img = io.imread(row[1])
dets = detector(img, 1)
for i, d in enumerate(dets):
img = img[d.top():d.bottom(), d.left():d.right()]
img = resize(img, (img_width, img_height))
output = np.expand_dims(img, axis=0)
break
data1.append(output)
data1 = np.concatenate(data1)
return data1
get_face()
csv sample
data
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/17883193_940000882769400_8455736118338387968_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/22427207_1737576603205281_7879421442167668736_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/12976287_1720757518213286_1180118177_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/16788491_748497378632253_566270225134125056_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/21819738_128551217878233_9151523109507956736_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/14295447_318848895135407_524281974_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/18160229_445050155844926_2783054824017494016_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/17883193_940000882769400_8455736118338387968_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/22427207_1737576603205281_7879421442167668736_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/12976287_1720757518213286_1180118177_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/16788491_748497378632253_566270225134125056_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/21819738_128551217878233_9151523109507956736_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/14295447_318848895135407_524281974_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/18160229_445050155844926_2783054824017494016_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
Here is how you could try to do it in parallel:
from __future__ import division
import numpy as np
from multiprocessing import Process, Queue, Pool
import os
import pickle
import pandas as pd
import dlib
from skimage import io
from skimage.transform import resize
from csv import DictReader
df = DictReader(open('/home/instaurls.csv')) # DictReader is iterable
detector = dlib.get_frontal_face_detector()
img_width, img_height = 139, 139
confidence = 0.8
def get_face(row):
"""
Here row is dictionary where keys are CSV header names
and values are values from current CSV row.
"""
output = None
img = io.imread(row[1]) # row[1] has to be changed to row['data']?
dets = detector(img, 1)
for i, d in enumerate(dets):
img = img[d.top():d.bottom(), d.left():d.right()]
img = resize(img, (img_width, img_height))
output = np.expand_dims(img, axis=0)
break
return output
if __name__ == '__main__':
pool = Pool() # default to number CPU cores
data = list(pool.imap(get_face, df))
print np.concatenate(data)
Pay attention to get_face and argument that it has. Also, to what it returns. This is what I meant when I said smaller chunks of work. Now get_face processes one row from CSV.
When you run this script, pool will be a reference to a instance of a Pool and you then call get_face for each row/tuple in df.itertuples().
After everything is done, data holds processing data and then you do np.concatenate on it.

Categories