labeled intervals in matplotlib

labeled intervals in matplotlib - python

I'm making a reference to the question on Plotting labeled intervals in matplotlib/gnuplot, the problem with the solution exposed there, is that doesn't work with only one line of data in the files. This is the code I'm trying:
#!/usr/bin/env python
#
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, MinuteLocator, SecondLocator
import numpy as np
from StringIO import StringIO
import datetime as dt
a=StringIO("""MMEX 2016-01-29T12:38:22 2016-01-29T12:39:03 SUCCESS
""")
#Converts str into a datetime object.
conv = lambda s: dt.datetime.strptime(s, '%Y-%m-%dT%H:%M:%S')
#Use numpy to read the data in.
data = np.genfromtxt(a, converters={1: conv, 2: conv},
names=['caption', 'start', 'stop', 'state'], dtype=None)
cap, start, stop = data['caption'], data['start'], data['stop']
#Check the status, because we paint all lines with the same color
#together
is_ok = (data['state'] == 'SUCCESS')
not_ok = np.logical_not(is_ok)
#Get unique captions and there indices and the inverse mapping
captions, unique_idx, caption_inv = np.unique(cap, 1, 1)
#Build y values from the number of unique captions.
y = (caption_inv + 1) / float(len(captions) + 1)
#Plot function
def timelines(y, xstart, xstop, color='b'):
"""Plot timelines at y from xstart to xstop with given color."""
plt.hlines(y, xstart, xstop, color, lw=4)
plt.vlines(xstart, y+0.005, y-0.005, color, lw=2)
plt.vlines(xstop, y+0.005, y-0.005, color, lw=2)
#Plot ok tl black
timelines(y[is_ok], start[is_ok], stop[is_ok], 'k')
#Plot fail tl red
timelines(y[not_ok], start[not_ok], stop[not_ok], 'r')
#Setup the plot
ax = plt.gca()
ax.xaxis_date()
myFmt = DateFormatter('%Y-%m-%dT%H:%M:%S')
ax.xaxis.set_major_formatter(myFmt)
ax.xaxis.set_major_locator(SecondLocator(interval=3600)) # used to be SecondLocator(0, interval=20)
#To adjust the xlimits a timedelta is needed.
delta = (stop.max() - start.min())/10
plt.yticks(y[unique_idx], captions)
plt.ylim(0,1)
plt.xlim(start.min()-delta, stop.max()+delta)
plt.xlabel('Time')
plt.xticks(rotation=70)
plt.show(block=True)
When I try this code, I get the following error:
Traceback (most recent call last):
File "./testPlot.py", line 49, in <module>
timelines(y[is_ok], start[is_ok], stop[is_ok], 'k')
ValueError: boolean index array should have 1 dimension
Also, when I try to add a dummy line on the data, let's said "MMEX 2016-01-01T00:00:00 2016-01-01T00:00:00 SUCCESS", the plot works but doesn't look good.
Any suggestions? I tried to put this question on the same post when I found the solution, but I don't have enough reputation...
Thanks in advance

The issue is that when you only read 1 item with np.genfromtxt, it is producing scalars (0-dimensions). We need them to be at least 1D.
You can add these lines just above where you define your timelines function, and then everything works ok.
This makes use of the numpy function np.atleast_1d(), to turn the scalars into 1D numpy arrays.
#Check the dimensions are at least 1D (for 1-item data input)
if start.ndim < 1:
start = np.atleast_1d(start)
if stop.ndim < 1::
stop = np.atleast_1d(stop)
if is_ok.ndim < 1:
is_ok = np.atleast_1d(is_ok)
if not_ok.ndim < 1:
not_ok = np.atleast_1d(is_ok)
The output:

Related

how to change the color of subplot datafile in matplotlib

In Short:
I want to change the color of blue marker in the graph. So that I can do comparison with other plots easily.
You can download the data files and script from this link
Problem Explanation
I have two data files, full.dat and part.dat(Note: part.dat is also there in full.dat).
I got the plotting scripts from the internet, and it is working very well. But as a noob in Python and Matplotlib, I am facing difficulties in changing the color of part.dat.
Please see the graph first, then the following scripts.
Script-1: Function and definitions: let's say: "func.py"
# This was written by Levi Lentz for the Kolpak Group at MIT
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
import sys
#This function extracts the high symmetry points from the output of bandx.out
def Symmetries(fstring):
f = open(fstring,'r')
x = np.zeros(0)
for i in f:
if "high-symmetry" in i:
x = np.append(x,float(i.split()[-1]))
f.close()
return x
# This function takes in the datafile, the fermi energy, the symmetry file, a subplot, and the label
# It then extracts the band data, and plots the bands, the fermi energy in red, and the high symmetry points
def bndplot(datafile_full,datafile,fermi,symmetryfile,subplot,**kwargs):
if 'shift_fermi' in kwargs:
bool_shift_efermi = kwargs['shift_fermi']
else:
bool_shift_efermi = 0
if 'color' in kwargs:
color_bnd=kwargs['color']
else:
color_bnd='black'
if 'linestyle' in kwargs:
line_bnd=kwargs['linestyle']
else:
line_bnd='solid'
z = np.loadtxt(datafile_full) #This loads the full.dat file
x = np.unique(z[:,0]) #This is all the unique x-points
[a,b,w]=np.loadtxt(datafile,unpack=True) #Weight
bands = []
bndl = len(z[z[:,0]==x[1]]) #This gives the number of bands in the calculation
Fermi = float(fermi)
if bool_shift_efermi:
fermi_shift=Fermi
else:
fermi_shift=0
axis = [min(x),max(x)]
for i in range(0,bndl):
bands.append(np.zeros([len(x),2])) #This is where we storre the bands
for i in range(0,len(x)):
sel = z[z[:,0] == x[i]] #Here is the energies for a given x
test = []
for j in range(0,bndl): #This separates it out into a single band
bands[j][i][0] = x[i]
#bands[j][i][1] = np.multiply(sel[j][1],13.605698066)
bands[j][i][1] = sel[j][1]
#Here we plots the bands
for i in bands:
subplot.plot(i[:,0],i[:,1]-fermi_shift,color=color_bnd,linestyle=line_bnd, linewidth=0.7,alpha=0.5)
# plt.scatter(a,b-fermi_shift,c=w,cmap='viridis',alpha=0.5)
# plt.colorbar()
if 'legend' in kwargs:
#empty plot to generate legend
subplot.plot([None],[None],color=color_bnd,linestyle=line_bnd,label=kwargs['legend'])
temp = Symmetries(symmetryfile)
for j in temp: #This is the high symmetry lines
x1 = [j,j]
subplot.axvline(x=j,linestyle='dashed',color='black',alpha=0.75)
subplot.plot([min(x),max(x)],[Fermi-fermi_shift,Fermi-fermi_shift],color='red',linestyle='dotted')
subplot.set_xticks(temp)
subplot.set_xticklabels([])
if 'name_k_points' in kwargs:
if len(kwargs['name_k_points'])==len(temp):
subplot.set_xticklabels(kwargs['name_k_points'])
if 'range' in kwargs:
range_plot=kwargs['range']
subplot.set_ylim([range_plot[0],range_plot[1]])
subplot.set_xlim([axis[0],axis[1]])
subplot.set_xlabel('k')
subplot.set_ylabel('E-E$_f$')
plt.scatter(a,b-fermi_shift,s=70*np.array(w))
if 'legend' in kwargs:
plt.legend()
script-2 Plotting script: let's say: "plot.py"
#!/usr/bin/python3
from func import *
El='el'
orb='orb'
plt.rcParams["figure.figsize"]=(4,15)
datafile_full='bands.dat.gnu'
#datafile=El+'_'+orb+'.dat.all'
datafile=El+'_'+orb+'.dat.all'
fermi = 10.2382
symmetryfile='band.out'
bool_shift_efermi= True
fig, ax = plt.subplots()
#bndplot(datafile,fermi,symmetryfile,ax)
bndplot(datafile_full,datafile,fermi,symmetryfile,ax,shift_fermi=1,color='black',linestyle='solid',name_k_points=['K','G','M','K','H','A','L','H'], legend=El+', '+orb+'-orbital')
#ax.set_ylim(-5,5)
ax.set_ylim(-10,12)
fig.set_figheight(6)
fig.set_figwidth(4)
plt.rcParams.update({'font.size': 22})
fig.savefig("el-orb.eps")
plt.show()
In script-2, there is an option to change the color, however I want to change the color of blue marker/solid-circles(please see the graph) so that I can compare with other graphs.
Whenever I change the color, it changes the line color only.
Please help me out I am trying to understand Matplotlib uses and examples from past few hrs However as a noob I was not able to figure out how to do.

Python : Multiple Line\any form of graph by defining Matching column values with large datasets

Trying to Plot this Raw JSON data into graph, doesn't want to have a individual graph for each entity/LDEV value ( will be difficult to compare). For Example : I wanted to create a graph with multiple line plot for LDEV_NUMBER - 00:42:26 , with X-axis value as Record time and Y- Axis value as "READ_IO_COUNT", Similarly, next line graph will represent next LDEV and so on.
import io
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
data = b'RECORD_TIME,LDEV_NUMBER,READ_IO_COUNT,WRITE_IO_COUNT,READ_MBYTES,WRITE_MBYTES,READ_RESPONSE_RATE,WRITE_RESPONSE_RATE,TOTAL_RESPONSE_RATE\r\ntime_t,string(16),ulong,ulong,ulong,ulong,float,float,float\r\n2020-03-16 00:00:42,"00:42:26",217,0,1,0,9.200517E+01,0.000000E+00,9.200517E+01\r\n2020-03-16 00:01:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:02:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:03:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:04:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:05:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:06:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:07:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:08:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:09:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:10:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:11:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:12:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:13:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:14:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:15:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:16:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:17:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:18:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:19:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:20:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:21:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:22:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:23:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:24:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:25:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:26:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:27:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:28:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:29:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:30:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:31:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:32:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:33:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:34:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:35:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:36:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:37:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:38:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:39:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:40:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:41:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:42:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:43:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:44:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:45:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:46:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:47:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:48:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:49:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:50:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:51:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:52:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:53:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:54:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:55:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:56:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:57:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:58:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:59:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:00:33,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:01:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:02:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:03:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:04:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:05:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:06:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:07:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:08:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:09:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:10:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:11:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:12:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:13:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:14:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:15:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:16:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:17:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:18:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:19:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:20:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:21:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:22:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:23:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:24:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:25:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:26:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:27:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:28:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:29:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:30:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:31:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:32:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:33:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:34:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:35:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:36:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:37:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:38:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:39:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:40:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:41:02,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:42:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:43:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:44:01,"00:42:26",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:55:02,"00:42:28",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:56:02,"00:42:28",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:57:01,"00:42:28",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:58:01,"00:42:28",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:59:02,"00:42:28",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 02:00:33,"00:42:28",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:17:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:18:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:19:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:20:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:21:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:22:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:23:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:24:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:25:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:26:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:27:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:28:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:29:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:30:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:31:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:32:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:33:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:34:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:35:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:36:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:37:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:38:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:39:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:40:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:41:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:42:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:43:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:44:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:45:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:46:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:47:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:48:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:49:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:50:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:51:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:52:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:53:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:54:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:55:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:56:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:57:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:58:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 00:59:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:00:33,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:01:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:02:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:03:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:04:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:05:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:06:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:07:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:08:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:09:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:10:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:11:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:12:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:13:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:14:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:15:01,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:16:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:17:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:18:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 01:59:02,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n2020-03-16 02:00:33,"00:42:2B",0,0,0,0,0.000000E+00,0.000000E+00,0.000000E+00\r\n\r\n'
df = pd.read_csv(io.StringIO(data.decode('ascii')))
df1=df.drop(df.index[0])
Lun_name=["00:42:2A","00:42:2B","00:42:27","00:42:26","00:42:28","00:42:29"]
for i in range(len(Lun_name)):
plt.plot(df.loc[df['LDEV_NUMBER'] =='%s'%Lun_name[i]]['RECORD_TIME'], df.loc[df['LDEV_NUMBER'] == '%s'%Lun_name[i]]['READ_IO_COUNT'])
plt.show()
Axes values are disoriented when i try to plot this using above code.
[READ IOPS]

with help of my colleague , i resolved it.Issue here is Y- Axis plotting data value are (for Ex: TOTAL_MBYTES ) with datatype as object , after converting it to integer, the axes looks fine . We did a slight modifications to the code as below
for i in range(len(portname)):
# plt.plot(df.loc[df['LDEV_NUMBER'] =='%s'%Lun_name[i]]['RECORD_TIME'], df.loc[df['LDEV_NUMBER'] == '%s'%Lun_name[i]]['READ_IO_COUNT'])
X = list(df1.loc[df1['PORT_NAME'] == '%s' % portname[i]]['DATETIME'])
Y = list(df1.loc[df1['PORT_NAME'] == '%s' % portname[i]]['TOTAL_MBYTES'])
ax = plt.axes()
ax.xaxis.set_minor_locator(dates.HourLocator(interval=4)) # every 4 hours
ax.xaxis.set_minor_formatter(dates.DateFormatter('%H:%M')) # hours and minutes
ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) # every day
ax.xaxis.set_major_formatter(dates.DateFormatter('\n%d-%m-%Y'))
for i in range(len(Y)):
Y[i] = int(Y[i])
plt.plot(X, Y)
plt.ylabel('Processor throughput')
plt.xlabel('Time')
plt.legend(portname)
plt.savefig('port_throughput.png')

IndexError: too many indices for array for an array that is definitely as big

I'm trying to make a movie by taking png images of an updating plot and stitching them together. There are three variables: degrees, ksB, and mp. Only mp changes each frame; the other two are constant. The data for mp for all times is stored in X. This is the relevant part of the code:
def plot(fname, haveMLPY=False):
# Load data from .npz file.
data = np.load(fname)
X = data["X"]
T = data["T"]
N = X.shape[1]
A = data["vipWeights"]
degrees = A.sum(1)
ksB = data["ksB"]
# Initialize a figure.
figure = plt.figure()
# Generate a plottable axis as the first subplot in 1 rows and 1 columns.
axis = figure.add_subplot(1,1,1)
# MP is the first (0th) variable. Plot one trajectory for each cell over time.
axis.plot(T, X[:,:,0], color="black")
# Decorate the plot.
axis.set_xlabel("time [hours]")
axis.set_ylabel("MP [nM]")
axis.set_title("PER mRNA concentration across all %d cells" % N)
firstInd = int(T.size / 2)
if haveMLPY:
import circadian.analysis
# Generate a and plot Signal object, which encapsulates wavelet analysis.
signal = circadian.analysis.Signal(X[firstInd:, 0, 0], T[firstInd:])
signal.showSpectrum(show=False)
files=[]
# filename for the name of the resulting movie
filename = 'animation'
mp = X[10**4-1,:,0]
from mpl_toolkits.mplot3d import Axes3D
for i in range(10**4):
print i
mp = X[i,:,0]
data2 = np.c_[degrees, ksB, mp]
# Find best fit surface for data2
# regular grid covering the domain of the data
mn = np.min(data2, axis=0)
mx = np.max(data2, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()
order = 2 # 1: linear, 2: quadratic
if order == 1:
# best-fit linear plane
A = np.c_[data2[:,0], data2[:,1], np.ones(data2.shape[0])]
C,_,_,_ = scipy.linalg.lstsq(A, data2[:,2]) # coefficients
# evaluate it on grid
Z = C[0]*X + C[1]*Y + C[2]
# or expressed using matrix/vector product
#Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape)
elif order == 2:
# best-fit quadratic curve
A = np.c_[np.ones(data2.shape[0]), data2[:,:2], np.prod(data2[:,:2], axis=1), data2[:,:2]**2]
C,_,_,_ = scipy.linalg.lstsq(A, data2[:,2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(degrees, ksB, mp)
ax.set_xlabel('degrees')
ax.set_ylabel('ksB')
ax.set_zlabel('mp')
# form a filename
fname2 = '_tmp%03d.png'%i
# save the frame
savefig(fname2)
# append the filename to the list
files.append(fname2)
# call mencoder
os.system("mencoder 'mf://_tmp*.png' -mf type=png:fps=10 -ovc lavc -lavcopts vcodec=wmv2 -oac copy -o " + filename + ".mpg")
# cleanup
for fname2 in files: os.remove(fname2)
Basically, all the data is stored in X. The format X[i, i, i] means X[time, neuron, data type]. Each time through the loop, I want to update the time, but still plot mp (the 0th variable) for all the neurons.
When I run this code, I get "IndexError: too many indices for array". I asked it to print i to see when the code was going wrong. I get an error when i = 1, meaning that the code loops through once but then has the error the second time.
However, I have data for 10^4 time steps. You can see in the first line of the provided code, I access X[10**4-1, :, 0] successfully. That's why it's confusing to me why X[1,:,0] would be out of range. If anybody could explain why/help me get around this, that would be great.
The traceback error is
Traceback (most recent call last):
File"/Users/angadanand/Documents/LiClipseWorkspace/Circadian/scripts /runMeNets.py", line 196, in module
plot(fname)
File"/Users/angadanand/Documents/LiClipseWorkspace/Circadian/scripts /runMeNets.py", line 142, in plot
mp = X[i,:,0]
IndexError: too many indices for array
Thanks!

Your problem is that you overwrite your X inside your loop:
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
So afterwards it will have another shape and contain different data. I would suggest changing this second X to x_grid and check where you need this "other" X and where the original.
for example:
X_grid, Y_grid = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))

Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()

I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift.
The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure.
import numpy as np
import matplotlib.pyplot as py
import csv
z = open("Sheet4CSV.csv","rU")
data = csv.reader(z)
x = []
y = []
ylow = []
yupp = []
xlow = []
xupp = []
redshift = []
for r in data:
x.append(float(r[2]))
y.append(float(r[5]))
xlow.append(float(r[3]))
xupp.append(float(r[4]))
ylow.append(float(r[6]))
yupp.append(float(r[7]))
redshift.append(float(r[1]))
from operator import sub
xerr_l = map(sub,x,xlow)
xerr_u = map(sub,xupp,x)
yerr_l = map(sub,y,ylow)
yerr_u = map(sub,yupp,y)
py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$")
py.ylabel("$Iterative\ Tx\ pipeline\ keV$")
py.xlim(0,12)
py.ylim(0,12)
py.title("Redmapper Clusters comparison of Tx pipelines")
ax1 = py.subplot(111)
##Problem starts here after the previous line##
for p in redshift:
for i in xrange(84):
p=redshift[i]
if 0.1<=p<0.2:
ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$")
exit
if 0.2<=p<0.25:
ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$")
exit
if 0.25<=p<=0.3:
ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$")
exit
##There seems nothing wrong after this point##
py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars")
cof = np.polyfit(x,y,1)
p = np.poly1d(cof)
l = np.linspace(0,12,100)
py.plot(l,p(l),"black",label="Best fit")
py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$")
py.grid()
box = ax1.get_position()
ax1.set_position([box.x1,box.y1,box.width, box.height])
py.legend(loc='center left',bbox_to_anchor=(1,0.5))
py.show()
In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!!
I essentially need 1 label for each bin...
Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-)
Sorry I cannot post my image here due to low reputation!
The data 'appended' for x, y and redshift lists from the csv file are as follows:
x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]
y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]
redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]

Working with numerical data like this, you should really consider using a numerical library, like numpy.
The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below.
It's also not clear why you call exit after each plotting action.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547])
y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677])
redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19])
bin3 = 0.25 <= redshift
bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25)
bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2)
plt.ion()
labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$")
colors = ('r', 'g', 'b')
for bin, label, co in zip( (bin1, bin2, bin3), labels, colors):
plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label)
plt.legend()
plt.show()

Plot really big file in python (5GB) with x axis offset

I am trying to plot a very big file (~5 GB) using python and matplotlib. I am able to load the whole file in memory (the total available in the machine is 16 GB) but when I plot it using simple imshow I get a segmentation fault. This is most probable to the ulimit which I have set to 15000 but I cannot set higher. I have come to the conclusion that I need to plot my array in batches and therefore made a simple code to do that. My main isue is that when I plot a batch of the big array the x coordinates start always from 0 and there is no way I can overlay the images to create a final big one. If you have any suggestion please let me know. Also I am not able to install new packages like "Image" on this machine due to administrative rights. Here is a sample of the code that reads the first 12 lines of my array and make 3 plots.
import os
import sys
import scipy
import numpy as np
import pylab as pl
import matplotlib as mpl
import matplotlib.cm as cm
from optparse import OptionParser
from scipy import fftpack
from scipy.fftpack import *
from cmath import *
from pylab import *
import pp
import fileinput
import matplotlib.pylab as plt
import pickle
def readalllines(file1,rows,freqs):
file = open(file1,'r')
sizer = int(rows*freqs)
i = 0
q = np.zeros(sizer,'float')
for i in range(rows*freqs):
s =file.readline()
s = s.split()
#print s[4],q[i]
q[i] = float(s[4])
if i%262144 == 0:
print '\r ',int(i*100.0/(337*262144)),' percent complete',
i += 1
file.close()
return q
parser = OptionParser()
parser.add_option('-f',dest="filename",help="Read dynamic spectrum from FILE",metavar="FILE")
parser.add_option('-t',dest="dtime",help="The time integration used in seconds, default 10",default=10)
parser.add_option('-n',dest="dfreq",help="The bandwidth of each frequency channel in Hz",default=11.92092896)
parser.add_option('-w',dest="reduce",help="The chuncker divider in frequency channels, integer default 16",default=16)
(opts,args) = parser.parse_args()
rows=12
freqs = 262144
file1 = opts.filename
s = readalllines(file1,rows,freqs)
s = np.reshape(s,(rows,freqs))
s = s.T
print s.shape
#raw_input()
#s_shift = scipy.fftpack.fftshift(s)
#fig = plt.figure()
#fig.patch.set_alpha(0.0)
#axes = plt.axes()
#axes.patch.set_alpha(0.0)
###plt.ylim(0,8)
plt.ion()
i = 0
for o in range(0,rows,4):
fig = plt.figure()
#plt.clf()
plt.imshow(s[:,o:o+4],interpolation='nearest',aspect='auto', cmap=cm.gray_r, origin='lower')
if o == 0:
axis([0,rows,0,freqs])
fdf, fdff = xticks()
print fdf
xticks(fdf+o)
print xticks()
#axis([o,o+4,0,freqs])
plt.draw()
#w, h = fig.canvas.get_width_height()
#buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8)
#buf.shape = (w,h,4)
#buf = np.rol(buf, 3, axis=2)
#w,h,_ = buf.shape
#img = Image.fromstring("RGBA", (w,h),buf.tostring())
#if prev:
# prev.paste(img)
# del prev
#prev = img
i += 1
pl.colorbar()
pl.show()

If you plot any array with more than ~2k pixels across something in your graphics chain will down sample the image in some way to display it on your monitor. I would recommend down sampling in a controlled way, something like
data = convert_raw_data_to_fft(args) # make sure data is row major
def ds_decimate(row,step = 100):
return row[::step]
def ds_sum(row,step):
return np.sum(row[:step*(len(row)//step)].reshape(-1,step),1)
# as per suggestion from tom10 in comments
def ds_max(row,step):
return np.max(row[:step*(len(row)//step)].reshape(-1,step),1)
data_plotable = [ds_sum(d) for d in data] # plug in which ever function you want
or interpolation.

Matplotlib is pretty memory-inefficient when plotting images. It creates several full-resolution intermediate arrays, which is probably why your program is crashing.
One solution is to downsample the image before feeding it into matplotlib, as #tcaswell suggests.
I also wrote some wrapper code to do this downsampling automatically, based on your screen resolution. It's at https://github.com/ChrisBeaumont/mpl-modest-image, if it's useful. It also has the advantage that the image is resampled on the fly, so you can still pan and zoom without sacrificing resolution where you need it.

I think you're just missing the extent=(left, right, bottom, top) keyword argument in plt.imshow.
x = np.random.randn(2, 10)
y = np.ones((4, 10))
x[0] = 0 # To make it clear which side is up, etc
y[0] = -1
plt.imshow(x, extent=(0, 10, 0, 2))
plt.imshow(y, extent=(0, 10, 2, 6))
# This is necessary, else the plot gets scaled and only shows the last array
plt.ylim(0, 6)
plt.colorbar()
plt.show()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

labeled intervals in matplotlib - python

Related

how to change the color of subplot datafile in matplotlib

Python : Multiple Line\any form of graph by defining Matching column values with large datasets

IndexError: too many indices for array for an array that is definitely as big

Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()

Plot really big file in python (5GB) with x axis offset

Categories

Resources