matplotlib and python multithread / multiprocessing file processing - python

I have a large number of files to process. I have written a script that get, sort and plot the datas I want. So far, so good. I have tested it and it gives the desired result.
Then I wanted to do this using multithreading. I have looked into the doc and examples on the internet, and using one thread in my program works fine. But when I use more, at some point I get random matplotlib error, and I suspect some conflict there, even though I use a function with names for the plots, and iI can't see where the problem could be.
Here is the whole script should you need more comment, i'll add them. Thank you.
#!/usr/bin/python
import matplotlib
matplotlib.use('GTKAgg')
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import matplotlib.colors as mcl
from matplotlib import rc #for latex
import time as tm
import sys
import threading
import Queue #queue in 3.2 and Queue in 2.7 !
import pdb #the debugger
rc('text', usetex=True)#for latex
map=0 #initialize the map index. It will be use to index the array like this: array[map,[x,y]]
time=np.zeros(1) #an array to store the time
middle_h=np.zeros((0,3)) #x phi c
#for the middle of the box
current_file=open("single_void_cyl_periodic_phi_c_middle_h_out",'r')
for line in current_file:
if line.startswith('# === time'):
map+=1
np.append(time,[float(line.strip('# === time '))])
elif line.startswith('#'):
pass
else:
v=np.fromstring(line,dtype=float,sep=' ')
middle_h=np.vstack( (middle_h,v[[1,3,4]]) )
current_file.close()
middle_h=middle_h.reshape((map,-1,3)) #3d array: map, x, phi,c
#####
def load_and_plot(): #will load a map file, and plot it along with the corresponding profile loaded before
while not exit_flag:
print("fecthing work ...")
#try:
if not tasks_queue.empty():
map_index=tasks_queue.get()
print("----> working on map: %s" %map_index)
x,y,zp=np.loadtxt("single_void_cyl_growth_periodic_post_map_"+str(map_index),unpack=True, usecols=[1, 2,3])
for i,el in enumerate(zp):
if el<0.:
zp[i]=0.
xv=np.unique(x)
yv=np.unique(y)
X,Y= np.meshgrid(xv,yv)
Z = griddata((x, y), zp, (X, Y),method='nearest')
figure=plt.figure(num=map_index,figsize=(14, 8))
ax1=plt.subplot2grid((2,2),(0,0))
ax1.plot(middle_h[map_index,:,0],middle_h[map_index,:,1],'*b')
ax1.grid(True)
ax1.axis([-15, 15, 0, 1])
ax1.set_title('Profiles')
ax1.set_ylabel(r'$\phi$')
ax1.set_xlabel('x')
ax2=plt.subplot2grid((2,2),(1,0))
ax2.plot(middle_h[map_index,:,0],middle_h[map_index,:,2],'*r')
ax2.grid(True)
ax2.axis([-15, 15, 0, 1])
ax2.set_ylabel('c')
ax2.set_xlabel('x')
ax3=plt.subplot2grid((2,2),(0,1),rowspan=2,aspect='equal')
sub_contour=ax3.contourf(X,Y,Z,np.linspace(0,1,11),vmin=0.)
figure.colorbar(sub_contour,ax=ax3)
figure.savefig('single_void_cyl_'+str(map_index)+'.png')
plt.close(map_index)
tasks_queue.task_done()
else:
print("nothing left to do, other threads finishing,sleeping 2 seconds...")
tm.sleep(2)
# except:
# print("failed this time: %s" %map_index+". Sleeping 2 seconds")
# tm.sleep(2)
#####
exit_flag=0
nb_threads=2
tasks_queue=Queue.Queue()
threads_list=[]
jobs=list(range(map)) #each job is composed of a map
print("inserting jobs in the queue...")
for job in jobs:
tasks_queue.put(job)
print("done")
#launch the threads
for i in range(nb_threads):
working_bee=threading.Thread(target=load_and_plot)
working_bee.daemon=True
print("starting thread "+str(i)+' ...')
threads_list.append(working_bee)
working_bee.start()
#wait for all tasks to be treated
tasks_queue.join()
#flip the flag, so the threads know it's time to stop
exit_flag=1
for t in threads_list:
print("waiting for threads %s to stop..."%t)
t.join()
print("all threads stopped")

Following David's suggestion, I did it in multiprocessing. I get a 5 times speed up with 8 processors. I believe the rest is do to the single-process work at the begining of my script.
edit: However sometimes the script "hangs" at the last map, even though it produces the right maps, with the following error:
File "single_void_cyl_plot_mprocess.py", line 90, in tasks_queue.join()
File "/usr/local/epd-7.0-2-rh5-x86_64/lib/python2.7/multiprocessing/queues.py", line 316, in join self._cond.wait()
File "/usr/local/epd-7.0-2-rh5-x86_64/lib/python2.7/multiprocessing/synchronize.py", line 220, in wait self._wait_semaphore.acquire(True, timeout)
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
from matplotlib import rc #for latex
from multiprocessing import Process, JoinableQueue
import pdb #the debugger
rc('text', usetex=True)#for latex
map=0 #initialize the map index. It will be use to index the array like this: array[map,x,y,...]
time=np.zeros(1) #an array to store the time
middle_h=np.zeros((0,3)) #x phi c
#for the middle of the box
current_file=open("single_void_cyl_periodic_phi_c_middle_h_out",'r')
for line in current_file.readlines():
if line.startswith('# === time'):
map+=1
np.append(time,[float(line.strip('# === time '))])
elif line.startswith('#'):
pass
else:
v=np.fromstring(line,dtype=float,sep=' ')
middle_h=np.vstack( (middle_h,v[[1,3,4]]) )
current_file.close()
middle_h=middle_h.reshape((map,-1,3)) #3d array: map, x, phi,c
#######
def load_and_plot(): #will load a map file, and plot it along with the corresponding profile loaded before
while tasks_queue.empty()==False:
print("fecthing work ...")
try:
map_index=tasks_queue.get() #get some work to do from the queue
print("----> working on map: %s" %map_index)
x,y,zp=np.loadtxt("single_void_cyl_growth_periodic_post_map_"+str(map_index),\
unpack=True, usecols=[1, 2,3])
for i,el in enumerate(zp):
if el<0.:
zp[i]=0.
xv=np.unique(x)
yv=np.unique(y)
X,Y= np.meshgrid(xv,yv)
Z = griddata((x, y), zp, (X, Y),method='nearest')
figure=plt.figure(num=map_index,figsize=(14, 8))
ax1=plt.subplot2grid((2,2),(0,0))
ax1.plot(middle_h[map_index,:,0],middle_h[map_index,:,1],'*b')
ax1.grid(True)
ax1.axis([-15, 15, 0, 1])
ax1.set_title('Profiles')
ax1.set_ylabel(r'$\phi$')
ax1.set_xlabel('x')
ax2=plt.subplot2grid((2,2),(1,0))
ax2.plot(middle_h[map_index,:,0],middle_h[map_index,:,2],'*r')
ax2.grid(True)
ax2.axis([-15, 15, 0, 1])
ax2.set_ylabel('c')
ax2.set_xlabel('x')
ax3=plt.subplot2grid((2,2), (0,1),rowspan=2,aspect='equal')
sub_contour=ax3.contourf(X,Y,Z,np.linspace(0,1,11),vmin=0.)
figure.colorbar(sub_contour,ax=ax3)
figure.savefig('single_void_cyl_'+str(map_index)+'.png')
plt.close(map_index)
tasks_queue.task_done() #work for this item finished
except:
print("failed this time: %s" %map_index)
#######
nb_proc=8 #number of processes
tasks_queue=JoinableQueue() #a queue to pile up the work to do
jobs=list(range(map)) #each job is composed of a map
print("inserting jobs in the queue...")
for job in jobs:
tasks_queue.put(job)
print("done")
#launch the processes
for i in range(nb_proc):
current_process=Process(target=load_and_plot)
current_process.start()
#wait for all tasks to be treated
tasks_queue.join()

Related

Python multiprocessing producing unstable results

Can anyone help me understand why this simple example of trying to speed up a for loop using python's multiprocessing module produces unstable results? I use a Manager.List to store the values from the child processes.
Clearly I'm doing at least one thing wrong. What would be the correct way to do this?
import numpy as np
import multiprocessing
from matplotlib import pyplot as plt
from functools import partial
from multiprocessing import Manager
def run_parallel(x_val, result):
val = np.arctan(x_val)
result.append(val)
def my_func(x_array, parallel=False):
if not parallel:
result = []
for k in x_array:
result.append(np.arctan(k))
return result
else:
manager = Manager()
m_result = manager.list()
pool = multiprocessing.Pool(4)
pool.map(partial(run_parallel, result=m_result), x_array)
return list(m_result)
test_x = np.linspace(0.1,1,50)
serial = my_func(test_x,parallel=False)
parallel = my_func(test_x,parallel=True)
plt.figure()
plt.plot(test_x, serial, label='serial')
plt.plot(test_x,parallel, label='parallel')
plt.legend(loc='best')
plt.show()
The output I'm getting looks like this
and it looks different every time this runs.
I added some print functions and it turned out that the order of elements from x_array is arbitrary... That's why it looks so weird. I think you should keep argument and value of arctan pairs and then order it by argument value
EDIT
I read more and it turned out that map returns values in order... This works as you wanted:
import numpy as np
import multiprocessing
from matplotlib import pyplot as plt
from functools import partial
from multiprocessing import Manager
def run_parallel(x_val, result):
val = np.arctan(x_val)
return val
def my_func(x_array, parallel=False):
if not parallel:
result = []
for k in x_array:
result.append(np.arctan(k))
return result
else:
manager = Manager()
m_result = manager.list()
pool = multiprocessing.Pool(4)
x = pool.map(partial(run_parallel, result=m_result), x_array)
return list(x)
test_x = np.linspace(0.1,1,50)
parallel = my_func(test_x,parallel=True)
plt.figure()
plt.plot(test_x,parallel, label='parallel')
plt.legend(loc='best')
plt.show()

Python - ProcessPoolExecutor hangs when called from a mpl_connect handler

I'm using parallel processing to generate a plot of functions using complex numbers. My script allows you to zoom in on an area of the plot using the standard matplotlib controls and then regenerate the plot within the new limits to improve resolution.
This is my first foray into parallel processing and I've got as far as understanding that I need to preface with if __name__ == __main__: to allow the module to be imported properly. When running my script, the first plot is successfully generated and appears as expected. However, when the plotting function is called again from my event handler it instead hangs indefinitely. I assume that the hang is caused by some similar issue to that of requiring if __name__ == __main__:, as the parallel processes are being spawned from outside the main body of the script, but I haven't figured out anything further than this.
import numpy as np
import matplotlib.pyplot as plt
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
res = [1000, 1000]
base_factor = 2.
cpuNum = multiprocessing.cpu_count()
def brot(c, depth=200):
z = complex(0)
for i in range(depth):
z = pow(z, 2) + c
if abs(z) > 2:
return i
return -1
def brot_gen(span):
re_span = span[0]
im_span = span[1]
mset = np.zeros([len(im_span), len(re_span)])
for re in range(len(re_span)):
for im in range(len(im_span)):
mset[im][re] = brot(complex(re_span[re], im_span[im]))
return mset
def brot_gen_parallel(re_lim, im_lim):
re_span = np.linspace(re_lim[0], re_lim[1], res[0])
im_span = np.linspace(im_lim[0], im_lim[1], res[1])
split_re_span = np.array_split(re_span, cpuNum)
packages = [(sec, im_span) for sec in split_re_span]
print("Generating set between", re_lim, "and", im_lim, "...")
with ProcessPoolExecutor(max_workers = cpuNum) as executor:
result = executor.map(brot_gen, packages)
mset = np.concatenate(list(result), axis=1)
print("Set generated")
return mset
def handler(ax):
def action(event):
if event.button == 2:
cur_re_lim = ax.get_xlim()
cur_im_lim = ax.get_ylim()
mset = brot_gen_parallel(cur_re_lim, cur_im_lim)
ax.cla()
ax.imshow(mset, extent=[cur_re_lim[0], cur_re_lim[1], cur_im_lim[0], cur_im_lim[1]], origin="lower", vmin=0, vmax=200, interpolation="bilinear")
plt.draw()
fig = ax.get_figure()
fig.canvas.mpl_connect('button_release_event', action)
return action
if __name__ == "__main__":
re_lim = np.array([-2.5, 2.5])
im_lim = res[1]/res[0] * re_lim
mset = brot_gen_parallel(re_lim, im_lim)
plt.imshow(mset, extent=[re_lim[0], re_lim[1], im_lim[0], im_lim[1]], origin="lower", vmin=0, vmax=200, interpolation="bilinear")
ax = plt.gca()
f = handler(ax)
plt.show()
EDIT: I wondered if there was a bug in the code causing an exception, but that this might not be being successfully passed back to the console, however I tested this by running the same task without splitting it into parallel tasks and it completed successfully.
I have discovered the answer to my own question. The answer lies in the IDE I was using. In my experience, in most IDEs plt.show() blocks execution by default, however in Spyder the default seems to be the equivalent of plt.show(block=False), meaning that the script completed and so whatever was required to successfully start the parallel processes was no longer available, causing the hang. This was solved by simply changing the statement to plt.show(block=True), meaning that the script was still live.
I'm still very new to parallel processing so I'd be very interested in any more information anyone can give on what was lacking to stop the parallel processing from working.

How to display a set of files with MatPlotLib in a loop

I have a set of .txt named "occupancyGrid_i", i being a number from 0-100.
What I'd like to do is to open every one of them and show them for 3 seconds. The data of the .txt is a [N x M] matrix.
import numpy
import matplotlib.pyplot as plt
import time
while True:
matrix = numpy.loadtxt('res/matrix_' + str(i) + '.txt')
plt.clf()
plt.imshow(matrix)
plt.show()
time.sleep(3)
i=i+1
What I have done so far doesn't seem to be enough. What am I doing wrong?
You can try something like this, adapting the code suggested in this answer:
import os
import numpy as np
import pylab as plt
N_IMAGES = 100
VMIN, VMAX = 0, 1 # range of values in matrices
i = 0
while True:
if i < N_IMAGES:
path = 'res/matrix_' + str(i) + '.txt'
if os.path.exists(path): # check if file exists
matrix = np.loadtxt('matrices/matrix_' + str(i) + '.txt')
plt.imshow(matrix, vmin=VMIN, vmax=VMAX)
plt.title("Matrix {}".format(i))
plt.pause(3)
i += 1
else:
# terminate you program or start from the beginning
break
# i = 0
# continue
I dont know what exactly your goal is. But to display text in matplotlib you can use text from pyplot.
`
import numpy
import matplotlib.pyplot as plt
import time
for i in range(1,5):
s = ''
with open(str(i)+'.txt','r') as f:
s=f.read()
plt.text(0.5, 0.67,s,transform=plt.gca().transAxes)
plt.show()
time.sleep(3)
First 2 argument (0.5 ,0.67) are cordinate of displayed text.
I think you should find some other way of displaying text. Just print them on your console, plotting them is not the best way to represent text data.

numpy plot create figure failed after several minutes idel

I write a small program using web.py, and in one of classes I use numpy/plot.
I found that every first I visit the page , it works fine. but after several minutes, the function of plt.figure() frozen! this function will never return! That's so weird.
please have a look of my codes:
def DrawMapMain(MapParameter,inputfile='out.txt',imgfile='out.png'):
print "DrawMapMain..."
plt.ioff() # turn off interactive mode
plt.close('all')
xmin,xmax,ymin,ymax = MapParameter['xmin'],MapParameter['xmax'],MapParameter['ymin'],MapParameter['ymax']
print('LevelFile:',MapParameter['LevelFile'])
LonCenter = (xmin+xmax)/2.0
LatCenter = (ymin+ymax)/2.0
nx, ny = 200,200
if(not os.path.isfile(inputfile)):
print(u'输入文件%s不存在,请检查!'%(inputfile))
sys.exit(0)
Region = np.loadtxt(inputfile)
#print(Region)
x,y,z = Region[:,1],Region[:,2],Region[:,3]
lon_array = np.linspace(xmin, xmax, nx)
lat_array = np.linspace(ymin, ymax, ny)
print('Data lon/lat box :',x.min(),x.max(),y.min(),y.max())
print(u'离散点插值到网格')
zi,xi,yi = Interpolater.griddata_all(x,y,z,lon_array,lat_array,func='line_rbf')#scipy_idw')# #line_rbf
print(u'扩展矩阵插值: ')
zi,xi,yi,lon_array,lat_array,nx,ny=Interpolater.extened_grid(zi,lon_array,lat_array,zoom=int(2)) #
print(u'mask非绘图区域')
grid1 = Interpolater.build_inside_mask_array(MapParameter['ShapeFile'],lon_array,lat_array)
zi[np.logical_not(grid1)]=np.NaN
#-----------------------------------------------------------------------------------
print(u'Create figure...')
#fig = plt.figure(num=1,figsize=(12, 9), dpi=100)
fig = plt.figure(figsize=(12, 9), dpi=100)
#fig = plt.figure()
print(u'Create figure...Done')
.........skipped
first time I visit the page, I got:
mask非绘图区域
Create figure...
Create figure...Done
(104, 35, 108, 39.5)
this is ok, but after a while, visit again, I got:
mask非绘图区域
Create figure...
and I can see the process 'python' take 25% of my cpu(which have 4 core), that means it falls into a deadloop!
this is my web.py class, , for reference:
class Month:
def POST(self):
form = ParameterForm()
if not form.validates():
return render.Month(form)
else:
StationInfoFile='./StationsId.txt' # make sure this file is exist.
if(not os.path.isfile(StationInfoFile)):
print(u'StationInfoFile 文件%s不存在!'%(inputfile))
sys.exit(0)
StationsInfo = np.loadtxt(StationInfoFile) # load all data as integer and float, not string
StationsId,StationsLon,StationsLat = StationsInfo[:,0].astype(np.int64),StationsInfo[:,1],StationsInfo[:,2]
basedir, DataCats, DataCatsDict=u'D:/测试数据',[ u'逐日平均', u'逐日降水'],{ u'逐日平均':'td', u'逐日降水':'rd'}
iFrom,iEnd= \
int(form['Start Year'].value)*10000+ int(form['Start Month'].value)*100+ int(form['Start Day'].value), \
int(form['End Year'].value)*10000+ int(form['End Month'].value)*100+ int(form['End Day'].value) # value from form is string!
MapParameter=GetMapParameter()
if (u'温度' == form['Data Source'].value):
d=u'逐日平均'
tmpDataTxt='Test_temp.txt'
tmpOutPNG='./static/'+'Test_temp.png'
MapParameter['LevelFile']='.\maplev_temp.LEV'
MapParameter['Title']=u'逐日平均'
elif (u'降水(mm)' == form['Data Source'].value):
d=u'逐日降水'
tmpDataTxt='Test_pred.txt'
tmpOutPNG='./static/'+'Test_temp.png'
MapParameter['LevelFile']='.\maplev_rain.LEV'
MapParameter['Title']=u'逐日降水'
else:
print "form['Data Source'].value=",form['Data Source'].value
print "----------- PROCESSING FOR CATEGORY:",d
tmpMeanVal=[]
for i in range(len(StationsId)):
s,lo,la=StationsId[i],StationsLon[i],StationsLat[i]
#print basedir,d, str(s),DataCatsDict[d]+'.txt'
datafile=os.path.join(basedir,d, str(s))+DataCatsDict[d]+'.txt'
print datafile,iFrom,iEnd
data=getdata.GetData(datafile,iFrom,iEnd)
a=np.mean(np.array(data)[:,1])*0.1
tmpMeanVal.append([s,lo,la,a])
rec=np.array(tmpMeanVal,dtype=[('int','int'),('float','float')])
print 'Writing data ...'
np.savetxt(tmpDataTxt,tmpMeanVal,fmt="%6i %-7.2f %-7.2f %8.2f")
print 'Writing data ... Done.'
sssss=open(tmpDataTxt,'r')
print sssss.read()
sssss.close()
DrawMapMain(MapParameter,inputfile=tmpDataTxt,imgfile=tmpOutPNG)
return render.Reports(tmpOutPNG)
First I suspect that the plt.figure may have some memory leak problems, so I us clf,plt.close('all') at the beginning and end of the function both! I even wrote a segment of test code :
if __name__ == "__main__":
MapParameter=GetMapParameter()
MapParameter['LevelFile']='.\maplev_rain.LEV'
MapParameter['Title']=u'逐日降水'
for iloop in range(0,10):
DrawMapMain(MapParameter,inputfile='Test_pred.txt',imgfile='c:/Test_pred'+str(iloop)+'.png')
MapParameter['LevelFile']='.\maplev_temp.LEV'
MapParameter['Title']=u'逐日temp'
for iloop in range(0,10):
DrawMapMain(MapParameter,inputfile='Test_temp.txt',imgfile='c:/Test_temp'+str(iloop)+'.png')
this code works fine. It's so wired, does anybody know some clue? very thanks!
This question has an answer in the comments:
What matplotlib backend are you using? You should be using one of the non-interactive ones if you're running things from a webserver. E.g. do import matplotlib; matplotlib.use('Agg') before import matplotlib.pyplot as plt. – Joe Kington Jan 16 at 17:56
For more information on matplotlib backends, see: http://matplotlib.org/faq/usage_faq.html#what-is-a-backend – Joe Kington Jan 17 at 15:21

Plot really big file in python (5GB) with x axis offset

I am trying to plot a very big file (~5 GB) using python and matplotlib. I am able to load the whole file in memory (the total available in the machine is 16 GB) but when I plot it using simple imshow I get a segmentation fault. This is most probable to the ulimit which I have set to 15000 but I cannot set higher. I have come to the conclusion that I need to plot my array in batches and therefore made a simple code to do that. My main isue is that when I plot a batch of the big array the x coordinates start always from 0 and there is no way I can overlay the images to create a final big one. If you have any suggestion please let me know. Also I am not able to install new packages like "Image" on this machine due to administrative rights. Here is a sample of the code that reads the first 12 lines of my array and make 3 plots.
import os
import sys
import scipy
import numpy as np
import pylab as pl
import matplotlib as mpl
import matplotlib.cm as cm
from optparse import OptionParser
from scipy import fftpack
from scipy.fftpack import *
from cmath import *
from pylab import *
import pp
import fileinput
import matplotlib.pylab as plt
import pickle
def readalllines(file1,rows,freqs):
file = open(file1,'r')
sizer = int(rows*freqs)
i = 0
q = np.zeros(sizer,'float')
for i in range(rows*freqs):
s =file.readline()
s = s.split()
#print s[4],q[i]
q[i] = float(s[4])
if i%262144 == 0:
print '\r ',int(i*100.0/(337*262144)),' percent complete',
i += 1
file.close()
return q
parser = OptionParser()
parser.add_option('-f',dest="filename",help="Read dynamic spectrum from FILE",metavar="FILE")
parser.add_option('-t',dest="dtime",help="The time integration used in seconds, default 10",default=10)
parser.add_option('-n',dest="dfreq",help="The bandwidth of each frequency channel in Hz",default=11.92092896)
parser.add_option('-w',dest="reduce",help="The chuncker divider in frequency channels, integer default 16",default=16)
(opts,args) = parser.parse_args()
rows=12
freqs = 262144
file1 = opts.filename
s = readalllines(file1,rows,freqs)
s = np.reshape(s,(rows,freqs))
s = s.T
print s.shape
#raw_input()
#s_shift = scipy.fftpack.fftshift(s)
#fig = plt.figure()
#fig.patch.set_alpha(0.0)
#axes = plt.axes()
#axes.patch.set_alpha(0.0)
###plt.ylim(0,8)
plt.ion()
i = 0
for o in range(0,rows,4):
fig = plt.figure()
#plt.clf()
plt.imshow(s[:,o:o+4],interpolation='nearest',aspect='auto', cmap=cm.gray_r, origin='lower')
if o == 0:
axis([0,rows,0,freqs])
fdf, fdff = xticks()
print fdf
xticks(fdf+o)
print xticks()
#axis([o,o+4,0,freqs])
plt.draw()
#w, h = fig.canvas.get_width_height()
#buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8)
#buf.shape = (w,h,4)
#buf = np.rol(buf, 3, axis=2)
#w,h,_ = buf.shape
#img = Image.fromstring("RGBA", (w,h),buf.tostring())
#if prev:
# prev.paste(img)
# del prev
#prev = img
i += 1
pl.colorbar()
pl.show()
If you plot any array with more than ~2k pixels across something in your graphics chain will down sample the image in some way to display it on your monitor. I would recommend down sampling in a controlled way, something like
data = convert_raw_data_to_fft(args) # make sure data is row major
def ds_decimate(row,step = 100):
return row[::step]
def ds_sum(row,step):
return np.sum(row[:step*(len(row)//step)].reshape(-1,step),1)
# as per suggestion from tom10 in comments
def ds_max(row,step):
return np.max(row[:step*(len(row)//step)].reshape(-1,step),1)
data_plotable = [ds_sum(d) for d in data] # plug in which ever function you want
or interpolation.
Matplotlib is pretty memory-inefficient when plotting images. It creates several full-resolution intermediate arrays, which is probably why your program is crashing.
One solution is to downsample the image before feeding it into matplotlib, as #tcaswell suggests.
I also wrote some wrapper code to do this downsampling automatically, based on your screen resolution. It's at https://github.com/ChrisBeaumont/mpl-modest-image, if it's useful. It also has the advantage that the image is resampled on the fly, so you can still pan and zoom without sacrificing resolution where you need it.
I think you're just missing the extent=(left, right, bottom, top) keyword argument in plt.imshow.
x = np.random.randn(2, 10)
y = np.ones((4, 10))
x[0] = 0 # To make it clear which side is up, etc
y[0] = -1
plt.imshow(x, extent=(0, 10, 0, 2))
plt.imshow(y, extent=(0, 10, 2, 6))
# This is necessary, else the plot gets scaled and only shows the last array
plt.ylim(0, 6)
plt.colorbar()
plt.show()

Categories