matplotlig speed on plot a gps point on a chart - python
with the following code I'm trying to add a gps point over a map using a raspberry pi 3:
from gps import *
import matplotlib.pyplot as plt
import time, inspect, os
logPath = "/home/solergy/IBUZ/LOGS/"
inputMapFilePath = logPath + "srcMap/" + "TBM-min.png"
actualOutMapFilePath = logPath + "Map_GPS"
TRX = -12.653 #top right longitude
TRY = 41.8675 #top right latitude
BLX = -12.6332 #bottom left longitude
BLY = 41.8619 #bottom left latitude
gpsd = gps(mode=WATCH_ENABLE|WATCH_NEWSTYLE)
im = plt.imread(inputMapFilePath)
try:
while True:
report = gpsd.next() #
if report['class'] == 'TPV':
GPStime = str(getattr(report,'time',''))
lat = str(getattr(report,'lat',0.0))
lon = str(getattr(report,'lon',0.0))
speed = str(getattr(report,'speed','nan'))
# Create Image File
pos_y = float(lat)
pos_x = -float(lon)
actualTime = time.strftime("%H-%M-%S", time.localtime())
plt.text(BLX, BLY, actualTime)
plt.imshow(im, extent=[BLX, TRX, BLY, TRY], zorder=1)
plt.scatter(x=[pos_x], y=[pos_y], s=3, c='r', zorder=2)
cur_axes = plt.gca()
cur_axes.axes.get_xaxis().set_visible(False)
cur_axes.axes.get_yaxis().set_visible(False)
plt.savefig(actualOutMapFilePath, dpi=150, type="png")
plt.close('all')
print (actualTime , GPStime)
except (KeyboardInterrupt, SystemExit): #when you press ctrl+c
f.close()
f_now.close()
my problem is that I need to update this map at least every second (10Hz is better). The problem is not the gps part but the matplotlib that produce this image. Any idea about how to achieve a higher speed for this code?
Antonio
An obvious optimization would be avoid clearing and redrawing everything at every iteraiton, but instead updating the XY values of the dot. See How to update a plot in matplotlib?
I would expect a considerable speedup (especially from avoiding the plt.imshow call), but it is hard to promise anything about performance without access to the actual data (or even hardware).
Related
Matplotlib connects end of data after animation refreshes
I'm having an issue exactly like this post, but slightly more frustrating. I'm using matplotlib to read from a file that is being fed data from another application. For some reason, the ends of the data only connect after the animation (animation.FuncAnimation) has completed its first refresh. Here are some images: This is before the refresh: And this is after the refresh: Any ideas as to why this could be happening? Here is my code: import json import itertools import dateutil.parser import matplotlib.pyplot as plt import matplotlib.animation as animation from matplotlib import style import scipy.signal as sig import numpy as np import pylab as plt sensors = {} data = [] lastLineReadNum = 0 class Sensor: def __init__(self, name, points = 0, lastReading = 0): self.points = points self.lastReading = lastReading self.name = name self.x = [] self.y = [] class ScanResult: def __init__(self, name, id, rssi, macs, time): self.name = name self.id = id self.rssi = rssi self.macs = macs # Is not an integer, but a datetime.datetime self.time = time def readJSONFile(filepath): with open(filepath, "r") as read_file: global lastLineReadNum # Load json results into an object that holds important info for line in itertools.islice(read_file, lastLineReadNum, None): temp = json.loads(line) # Only reads the most recent points... data.append(ScanResult(name = temp["dev_id"], id = temp["hardware_serial"], rssi = temp["payload_fields"]["rssis"], macs = temp["payload_fields"]["macs"], time = dateutil.parser.parse(temp["metadata"]["time"]))) lastLineReadNum += 1 return data style.use('fivethirtyeight') fig = plt.figure() ax1 = fig.add_subplot(1, 1, 1) def smooth(y, box_pts): box = np.ones(box_pts)/box_pts y_smooth = np.convolve(y, box, mode='same') return y_smooth def determineClosestSensor(): global sensors #sensors.append(Sensor(time = xs3, rssi = ys3)) def determineXAxisTime(scanresult): return ((scanresult.time.hour * 3600) + (scanresult.time.minute * 60) + (scanresult.time.second)) / 1000.0 def animate(i): data = readJSONFile(filepath = "C:/python_testing/rssi_logging_json.json") for scan in data: sensor = sensors.get(scan.name) # First time seeing the sensor if(sensor == None): sensors[scan.name] = Sensor(scan.name) sensor = sensors.get(scan.name) sensor.name = scan.name sensor.x.append(determineXAxisTime(scan)) sensor.y.append(scan.rssi) else: sensor.x.append(determineXAxisTime(scan)) sensor.y.append(scan.rssi) ax1.clear() #basic smoothing using nearby averages #y_smooth3 = smooth(np.ndarray.flatten(np.asarray(sensors.get("sentrius_sensor_3").y)), 1) for graphItem in sensors.itervalues(): smoothed = smooth(np.ndarray.flatten(np.asarray(graphItem.y)), 1) ax1.plot(graphItem.x, smoothed, label = graphItem.name, linewidth = 2.0) ax1.legend() determineClosestSensor() fig.suptitle("Live RSSI Graph from Sentrius Sensors", fontsize = 14) def main(): ani = animation.FuncAnimation(fig, animate, interval = 15000) plt.show() if __name__ == "__main__": main()
As far as I can tell you are regenerating your data in each animation step by appending to the existing datasets, but then this means that your last x point from the first step is followed by the first x point in the second step, leading to a rewind in the plot. This appears as the line connecting the last datapoint with the first one; the rest of the data is unchanged. The relevant part of animate: def animate(i): data = readJSONFile(filepath = "C:/python_testing/rssi_logging_json.json") for scan in data: sensor = sensors.get(scan.name) # First time seeing the sensor if(sensor is None): # always check for None with `is`! ... # stuff here else: sensor.x.append(determineXAxisTime(scan)) # always append! sensor.y.append(scan.rssi) # always append! ... # rest of the stuff here So, in each animation step you 1. load the same JSON file 2. append the same data to an existing sensor identified by sensors.get(scan.name) 3. plot stuff without ever using i. Firstly, your animate should naturally make use of the index i: you're trying to do something concerning step i. I can't see i being used anywhere. Secondly, your animate should be as lightweigh as possible in order to get a smooth animation. Load your data once before plotting, and only handle the drawing differences in animate. This will involve slicing or manipulating your data as a function of i. Of course if the file really does change from step to step, and this is the actual dynamics in the animation (i.e. i is a dummy variable that is never used), all you need to do is zero-initialize all the plotting data in each step. Start with a clean slate. Then you'll stop seeing the lines corresponding to these artificial jumps in the data. But again, if you want a lightweigh animate, you should look into manipulating the underlying data of existing plots rather than replotting everything all the time (especially since calls to ax1.plot will keep earlier points on the canvas, which is not what you usually want in an animation).
try changing : ani = animation.FuncAnimation(fig, animate, interval = 15000) to : ani = animation.FuncAnimation(fig, animate, interval = 15000, repeat = False)
Create a static (not redrawn everytime I zoom, or move around) plot in matplotlib
I am rather new to python in general, but have found it very useful and much more intuitive than other programming languages. I'm currently trying to plot a spectrum with a lot of data points, and it seems that every time I move around, zoom in our out, matplotlib redraws the figure, which takes some time (5 to 20 seconds every time I move). This makes scanning through the spectrum pretty tedious, and I was thinking if maybe I could find a way to create the figure once and for all, and then just show a part of it, and move around in the static figure, without redrawing it, that would save me a lot of idle time. My question is this : is there a (reasonably easy) way to do this in matplotlib or should I start looking into other plotting software ? I've looked around in the documentation but to be honest, I don't understand most of it. Thanks for the input ! Cheers In case anyone is wondering, here is my code : import numpy as np import matplotlib.pyplot as plt def show_shifted_lines(z, nb_lines, color='red'): # Draws a vertical line with its name at the location # of the line (shifted by (1+z) ) # Writes the name of the line for i in range(nb_lines): plt.text(rest_wl[i]*(1.+z), 200., line_name[i], color=color, rotation=50, rotation_mode='anchor') # Writes the vertical line for j in range(20): plt.text(rest_wl[i]*(1.+z), 190-8*j, '|', color=color) return f = open('spectrum.dat', 'r') ## Plot Spectrum ## wavelength = [] flux = [] for line in f : line = line.strip() columns = line.split() wavelength.append(float(columns[0])) flux.append(float(columns[1])) flux = np.asarray(flux, dtype=float) wavelength = np.asarray(wavelength, dtype=float) plt.plot(wavelength, flux, color='black') plt.xlabel(r'Wavelength (A)') plt.ylabel(r'Flux (erg s$^{-1}$ cm$^{-2}$ A$^{-1}$)') plt.grid(True) f.close() ## Show location of redshifted lines ## f = open('list_of_restframe_lines.txt','r') line_name = [] rest_wl = [] # Redshifts of various absorption-line systems z1 = 3.04976 z2 = 2.27831 z3 = 1.80335 z4 = 2.218 z5 = 2.2155 z6 = 2.2164 z7 = 2.8913 # Create array for not-shifted lines for line in f : line = line.strip() columns = line.split() line_name.append(columns[0]) rest_wl.append(float(columns[1])) rest_wl = np.asarray(rest_wl, dtype=float) f.close() show_shifted_lines(z1, len(rest_wl)) show_shifted_lines(z2, len(rest_wl), 'magenta') show_shifted_lines(z3, len(rest_wl), 'lightgreen') show_shifted_lines(z4, len(rest_wl), 'green') show_shifted_lines(z5, len(rest_wl), 'darkorange') show_shifted_lines(z6, len(rest_wl), 'orange') show_shifted_lines(z7, len(rest_wl), 'blue') plt.show() Ultimately, my spectrum looks something like this : Example of small part of a spectrum with redshifted absorbing systems shown
How to add significance levels on bar graph using Python's Matplotlib?
I have written some code to graph some data in Python's Matplotlib. The plot currently: The code to produce this plot: groups=['Control','30min','24hour'] cell_lysate_avg=[11887.42595, 4862.429689, 3414.337554] cell_lysate_sd=[1956.212855, 494.8437915, 525.8556207] cell_lysate_avg=[i/1000 for i in cell_lysate_avg] cell_lysate_sd=[i/1000 for i in cell_lysate_sd] media_avg=[14763.71106,8597.475539,6374.732852] media_sd=[240.8983759, 167.005365, 256.1374017] media_avg=[i/1000 for i in media_avg] #to get ng/ml media_sd=[i/1000 for i in media_sd] fig, ax = plt.subplots() index = numpy.arange(len(groups)) #where to put the bars bar_width=0.45 opacity = 0.5 error_config = {'ecolor': '0.3'} cell_lysate_plt=plt.bar(index,cell_lysate_avg,bar_width,alpha=opacity,color='black',yerr=cell_lysate_sd,error_kw=error_config,label='Cell Lysates') media_plt=plt.bar(index+bar_width,media_avg,bar_width,alpha=opacity,color='green',yerr=media_sd,error_kw=error_config,label='Media') plt.xlabel('Groups',fontsize=15) plt.ylabel('ng/ml',fontsize=15) plt.title('\n'.join(wrap('Average Over Biological Repeats for TIMP1 ELISA (n=3)',45)),fontsize=15) plt.xticks(index + bar_width, groups) plt.legend() ax.tick_params(axis='x', labelsize=14) ax.tick_params(axis='y', labelsize=14) I have calculated the various two tailed t tests associated with this data and I want to display using standard scientific journal representation - i.e. a line connecting two bars with a star which represents a significance level of (say) >0.05. Can anybody tell me how to do this?
As far as I know there is no standard scientific journal representation for showing significance. The exact way you draw it is a matter of taste. This is probably the reason why matplotlib has no specific function for significance bars (at least to my knowledge). You could just do it manually. E.g: from matplotlib.markers import TICKDOWN def significance_bar(start,end,height,displaystring,linewidth = 1.2,markersize = 8,boxpad =0.3,fontsize = 15,color = 'k'): # draw a line with downticks at the ends plt.plot([start,end],[height]*2,'-',color = color,lw=linewidth,marker = TICKDOWN,markeredgewidth=linewidth,markersize = markersize) # draw the text with a bounding box covering up the line plt.text(0.5*(start+end),height,displaystring,ha = 'center',va='center',bbox=dict(facecolor='1.', edgecolor='none',boxstyle='Square,pad='+str(boxpad)),size = fontsize) pvals = [0.001,0.1,0.00001] offset =1 for i,p in enumerate(pvals): if p>=0.05: displaystring = r'n.s.' elif p<0.0001: displaystring = r'***' elif p<0.001: displaystring = r'**' else: displaystring = r'*' height = offset + max(cell_lysate_avg[i],media_avg[i]) bar_centers = index[i] + numpy.array([0.5,1.5])*bar_width significance_bar(bar_centers[0],bar_centers[1],height,displaystring) Instead of the stars you could of course also explicitly write p<0.05 or something similar. You can then spend hours fiddling with the parameters until it looks just right.
Making a timelapse over geographical positions using pyplot
I am making a scatter plot of positions marked with latitude and longitude which works all right as it shows all the positions in a time period in a static image. But I was wondering if there is any easy way of utilizing that I have the unixtime to every position - so I can show the movements as a timelapse - kind of looping through the positions and showing an animation of the movement. EDIT: I have set up a dynamicly updating plot that plots all the positions one by one, now I just have to add the basemap in the background. The codes come from this answer. import matplotlib.pyplot as plt import sqlite3 as lite from operator import itemgetter def getData(): con = lite.connect('database.db') with con: cur = con.cursor() cur.execute('SELECT latitude, longitude, unixtime FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80') all_rows = [[int(x[0]), int(x[1]), int(x[2])] for x in cur] all_rows = sorted(all_rows, key=itemgetter(2)) return all_rows plt.ion() class DynamicUpdate(): #Suppose we know the x range min_x = 0 max_x = 10000 def on_launch(self): #Set up plot self.figure, self.ax = plt.subplots() self.lines, = self.ax.plot([],[], 'o') #Autoscale on unknown axis and known lims on the other self.ax.set_autoscaley_on(True) self.ax.set_xlim(-50, 50) self.ax.set_ylim(40, 80) #Other stuff self.ax.grid() def on_running(self, xdata, ydata): #Update data (with the new _and_ the old points) self.lines.set_xdata(xdata) self.lines.set_ydata(ydata) #Need both of these in order to rescale self.ax.relim() self.ax.autoscale_view() #We need to draw *and* flush self.figure.canvas.draw() self.figure.canvas.flush_events() #Example def __call__(self): import numpy as np import time self.on_launch() xdata = [] ydata = [] all_rows = getData() for x in all_rows: a,b,f = zip(x) xdata.append(b) ydata.append(a) self.on_running(xdata, ydata) return xdata, ydata d = DynamicUpdate() d() Old Code: This shows the static data map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5, resolution = 'c', area_thresh = 1000, llcrnrlon=-30, llcrnrlat=50, urcrnrlon=40, urcrnrlat=80) map.drawcoastlines() map.fillcontinents(color='black') con = lite.connect('database.db') with con: cur = con.cursor() cur.execute('SELECT latitude, longitude FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80') data = cur.fetchall() y,x = zip(*data) x,y = map(x,y) plt.scatter(x,y, s=0.07, alpha=0.6, color="#e74c3c", edgecolors='none') plt.show()
There are are few ways to do animations in matplotlib, the matplotlib.animation provides a framework but this can be a little involved. Probabaly the easiest way to do it is using plt.ion(). I don't know how you access your date with cur.execute but does something like this work: map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5, resolution = 'c', area_thresh = 1000, llcrnrlon=-30, llcrnrlat=50, urcrnrlon=40, urcrnrlat=80) fig, ax = plt.subplots(1,1) plt.ion() plt.show() map.drawcoastlines(ax=ax) map.fillcontinents(color='black',ax=ax) con = lite.connect('database.db') with con: cur = con.cursor() i=0 for unixtime in range(1406851200,1409529600): cur.execute('SELECT latitude, longitude FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80 AND unixtime ==' + str(unixtime)) data = cur.fetchall() y,x = zip(*data) x,y = map(x,y) pts = ax.scatter(x,y, s=0.07, alpha=0.6, color="#e74c3c", edgecolors='none') plt.draw() plt.pause(0.0001) #i += 1 #plt.savefig('./out.{0:07d}.png'.format(i)) pts.remove()
Even though I've gotten an adequate answer over, I found that it was a hassle to get the plot exactly like I wanted, and still animate it - so I did it utilizing some linux tools to make a movie of snapshots instead. This is what I did, for future reference and for others having the same problem: Timelapse animation, the lazy way I simply made a plot over all geographical positions for every hour for the whole timespan. This can be done down to every minute, second etc: con = lite.connect('database/SAISREAL.db') with con: cur = con.cursor() i = 0 for k in range(0,137*24): #This is the timespan - every hour for 137 days map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5, resolution = 'c', area_thresh = 1000, llcrnrlon=-30, llcrnrlat=50, urcrnrlon=40, urcrnrlat=80) map.drawcoastlines() map.fillcontinents(color='#27ae60') start = 0+k*60*60 #This is again the timespan end = 0+(k+1)*60*60 Ok - so now I've established the timespan which I will query data from, as well as drawn the map overlay cur.execute('SELECT distinct userid, latitude, longitude FROM geodata WHERE unixtime > {start} AND unixtime < {end}'.format(start = start, end = end)) data = cur.fetchall() if len(data)>0: #Simply check if there is data available i = i+1 filename = '' if i<10: filename = '0000'+str(i) elif i<100: filename = '000'+str(i) elif i<1000: filename = '00'+str(i) else: filename = '0'+str(i) f,y,x = zip(*data) x,y = map(x,y) The whole filename thing is used later, when I convert the images into a movie - its important that they are named sequentially where everyone has the same number of digits. plt.title( str(datetime.datetime.fromtimestamp(int(end)).strftime('%Y-%m-%d')) + ' kl '+str(datetime.datetime.fromtimestamp(int(end)).strftime('%H'))) plt.scatter(x,y, s=8, alpha=0.7, color="#2980b9", edgecolors='none') Here I just plotted the info with a timestamp as title. plt.savefig('Fisheriesplot/fishplot/'+str(filename)+'.png', format='png') plt.clf() And then, saving the picture. This gives some 3000 .png images - this can obviously be done with other file formats. Before I either make them to a GIF or a movie, I want to remove the transparent background - to make them appear nicer (less colour shifting between frames) mkdir batch for file in *.png ; do convert "${file}" -background black -alpha remove -flatten -alpha off "batch/${file}" ; done cd batch If the goal is to make a gif - skip the rest and do this: convert -delay 10 -loop 0 *.png animaion.gif Option1: Make a movie out of .png ffmpeg -y -f image2 -framerate 20 -i %05d.png -vcodec png -b 8000k a20k.avi Just do this is in the folder. Set bitrakte and framerate as you want it- notice that this movie can be quite big. Option2: Convert images to other format, then make movie mogrify -format jpg *.png This is done in terminal in the same folder as the pictures. Then I want to move all the jpg's in their own folder: mkdir jpgfolder mv *.jpg jpgfolder And now, lastly I can make the movie: cd jpgfolder ffmpeg -y -f image2 -framerate 4 -i %05d.jpg -vcodec mpeg4 -b 800k a88k.avi The framerate, which is set to 4 here, should be set to whatever you want. Notice that %05d.jpg says that every file has a leading 0, and has a total of five digits. If its four digits, write 4 etc. Note that this isn't the most streamlined or smart way to do this, but it is a solution for everyone not wanting to change your code, other than putting it in a loop.
Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()
I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift. The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure. import numpy as np import matplotlib.pyplot as py import csv z = open("Sheet4CSV.csv","rU") data = csv.reader(z) x = [] y = [] ylow = [] yupp = [] xlow = [] xupp = [] redshift = [] for r in data: x.append(float(r[2])) y.append(float(r[5])) xlow.append(float(r[3])) xupp.append(float(r[4])) ylow.append(float(r[6])) yupp.append(float(r[7])) redshift.append(float(r[1])) from operator import sub xerr_l = map(sub,x,xlow) xerr_u = map(sub,xupp,x) yerr_l = map(sub,y,ylow) yerr_u = map(sub,yupp,y) py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$") py.ylabel("$Iterative\ Tx\ pipeline\ keV$") py.xlim(0,12) py.ylim(0,12) py.title("Redmapper Clusters comparison of Tx pipelines") ax1 = py.subplot(111) ##Problem starts here after the previous line## for p in redshift: for i in xrange(84): p=redshift[i] if 0.1<=p<0.2: ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$") exit if 0.2<=p<0.25: ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$") exit if 0.25<=p<=0.3: ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$") exit ##There seems nothing wrong after this point## py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars") cof = np.polyfit(x,y,1) p = np.poly1d(cof) l = np.linspace(0,12,100) py.plot(l,p(l),"black",label="Best fit") py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$") py.grid() box = ax1.get_position() ax1.set_position([box.x1,box.y1,box.width, box.height]) py.legend(loc='center left',bbox_to_anchor=(1,0.5)) py.show() In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!! I essentially need 1 label for each bin... Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-) Sorry I cannot post my image here due to low reputation! The data 'appended' for x, y and redshift lists from the csv file are as follows: x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547] y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677] redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]
Working with numerical data like this, you should really consider using a numerical library, like numpy. The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below. It's also not clear why you call exit after each plotting action. import numpy as np import matplotlib.pyplot as plt x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]) y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]) redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]) bin3 = 0.25 <= redshift bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25) bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2) plt.ion() labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$") colors = ('r', 'g', 'b') for bin, label, co in zip( (bin1, bin2, bin3), labels, colors): plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label) plt.legend() plt.show()