matplotlib widget updates the wrong data - python

I'm making a plot to compare band structure calculations from two different methods. This means plotting multiple lines for each set of data. I want to have a set of widgets that controls each set of data separately. The code below works if I only plot one set of data, but I can't get the widgets to work properly for two sets of data.
#!/usr/bin/env python3
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, TextBox
#cols = ['blue', 'red', 'green', 'purple']
cols = ['#3f54bf','#c14142','#59bf3f','#b83fbf']
finam = ['wan_band.dat','wan_band.pwx.dat']
#finam = ['wan_band.dat'] # this works
lbot = len(finam)*0.09 + 0.06
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=lbot)
ax.margins(x=0) # lines go to the edge of the horizontal axes
def setlines(lines, txbx1, txbx2):
''' turn lines on/off based on text box values '''
try:
mn = int(txbx1) - 1
mx = int(txbx2) - 1
for ib in range(len(lines)):
if (ib<mn) or (ib>mx):
lines[ib].set_visible(False)
else :
lines[ib].set_visible(True)
plt.draw()
except ValueError as err:
print('Invalid range')
#end def setlines(cnt, lines, txbx1, txbx2):
def alphalines(lines, valin):
''' set lines' opacity '''
maxval = int('ff',16)
maxval = hex(int(valin*maxval))[2:]
for ib in range(bcnt):
lines[ib].set_color(cols[cnt]+maxval)
plt.draw()
#end def alphalines(lines, valtxt):
lines = [0]*len(finam) # 2d list to hold Line2Ds
txbox1 = [0]*len(finam) # list of Lo Band TextBoxes
txbox2 = [0]*len(finam) # lsit of Hi Band TextBoxes
alslid = [0]*len(finam) # list of Line Opacity Sliders
for cnt, fnam in enumerate(finam):
ptcnt = 0 # point count
fid = open(fnam, 'r')
fiit = iter(fid)
for line in fiit:
if line.strip() == '' :
break
ptcnt += 1
fid.close()
bandat_raw = np.loadtxt(fnam)
bcnt = int(np.round((bandat_raw.shape[0] / (ptcnt))))
print(ptcnt)
print(bcnt)
# get views of the raw data that are easier to work with
kbandat = bandat_raw[:ptcnt,0] # k point length along path
ebandat = bandat_raw.reshape((bcnt,ptcnt,2))[:,:,1] # band energy # k-points
lines[cnt] = [0]*bcnt # point this list element to another list
for ib in range(bcnt):
#l, = plt.plot(kbandat, ebandat[ib], c=cols[cnt],lw=1.0)
l, = ax.plot(kbandat, ebandat[ib], c=cols[cnt],lw=1.0)
lines[cnt][ib] = l
y0 = 0.03 + 0.07*cnt
bxht = 0.035
axbox1 = plt.axes([0.03, y0, 0.08, bxht]) # x0, y0, width, height
axbox2 = plt.axes([0.13, y0, 0.08, bxht])
txbox1[cnt] = TextBox(axbox1, '', initial=str(1))
txbox2[cnt] = TextBox(axbox2, '', initial=str(bcnt))
txbox1[cnt].on_submit( lambda x: setlines(lines[cnt], x, txbox2[cnt].text) )
txbox2[cnt].on_submit( lambda x: setlines(lines[cnt], txbox1[cnt].text, x) )
axalpha = plt.axes([0.25, y0, 0.65, bxht])
alslid[cnt] = Slider(axalpha, '', 0.1, 1.0, valinit=1.0)
salpha = alslid[cnt]
alslid[cnt].on_changed( lambda x: alphalines(lines[cnt], x) )
#end for cnt, fnam in enumerate(finam):
plt.text(0.01, 1.2, 'Lo Band', transform=axbox1.transAxes)
plt.text(0.01, 1.2, 'Hi Band', transform=axbox2.transAxes)
plt.text(0.01, 1.2, 'Line Opacity', transform=axalpha.transAxes)
plt.show()
All the widgets only control the last data set plotted instead of the individual data sets I tried to associate with each widget. Here is a sample output:
Here the bottom slider should be changing the blue lines' opacity, but instead it changes the red lines' opacity. Originally the variables txbox1, txbox2, and alslid were not lists. I changed them to lists though to ensure they weren't garbage collected but it didn't change anything.
Here is the test data set1 and set2 I've been using. They should be saved as files 'wan_band.dat' and 'wan_band.pwx.dat' as per the hard coded list finam in the code.

I figured it out, using a lambda to partially execute some functions with an iterator value meant they were always being evaluated with the last value of the iterator. Switching to functools.partial fixed the issue.

Related

`update` function doesn't work correctly for bokeh interactors in python

I have a source code that plots the alphashape of a stock price. There's a slider to update the plot dynamically. But the update function doesn't work as expected.
Here's the source code.
x=[76.84,76.85,76.86,76.87,76.88,76.9,76.91,76.92,76.93,76.94,76.97,76.97,76.98,76.99,77.0,77.03,77.03,77.04,77.05,77.06,77.09,77.09,77.1,77.11,77.12,77.15,77.16,77.16,77.17,77.18,77.21,77.22,77.22,77.23,77.24,77.27,77.28,77.28,77.29,77.3,77.33,77.34,77.35,77.35,77.36,77.39,77.4,77.41,77.41,77.42,77.45,77.46,77.47,77.47,77.48,77.51,77.52,77.53,77.54,77.54,77.57,77.58,77.59,77.6,77.6,77.63,77.64,77.65,77.66,77.66,77.69,77.7,77.71,77.72,77.73,77.75,77.76,77.77,77.78,77.79,77.81,77.82,77.83,77.84,77.85,77.87,77.88,77.89,77.9,77.91,77.93,77.94,77.95,77.96,77.97,77.99,78.0,78.01,78.02,78.03,78.05,78.06,78.07,78.08,78.09,78.13,78.14,78.15,78.17,78.18,78.19,78.2,78.21,78.24,78.24,78.25,78.26,78.27,78.3,78.3,78.31,78.32,78.33,78.36,78.36,78.37,78.38,78.39,78.42,78.43,78.43,78.44,78.45,78.48,78.49,78.49,78.5,78.51,78.54,78.55,78.55,78.56,78.57,78.6,78.61,78.62,78.62,78.63,78.66,78.67,78.68,78.68,78.69,78.72,78.73,78.74,78.74,78.75,78.78,78.79,78.8,78.81,78.81,78.84,78.85,78.86,78.87,78.87,78.91,78.92,78.93,78.94,78.96,78.97,78.98,78.99,79.0,79.02,79.03,79.04,79.05,79.06,79.08,79.09,79.1,79.11,79.12,79.2,79.21,79.22,79.23,79.24,79.26,79.27,79.28,79.29,79.3,79.32,79.33,79.34,79.35,79.36,79.38,79.39,79.4,79.41,79.42,79.44,79.45,79.46,79.47,79.48,79.51,79.51,79.52,79.53,79.54,79.57,79.57,79.58,79.59,79.6,79.63,79.63,79.64,79.65,79.66,79.69,79.7,79.7,79.71,79.72,79.75,79.76,79.76,79.77,79.78,79.81,79.82,79.82,79.83,79.84,79.87,79.88,79.89,79.89,79.9,79.94,79.95,79.95,79.96,79.99,80.0,80.01,80.02,80.02,80.05,80.06,80.07,80.08,80.08,80.11,80.12,80.13,80.14,80.14,80.17,80.18,80.19,80.2,80.21,80.23,80.24,80.25,80.26,80.27,80.29,80.3,80.31,80.32,80.33,80.35,80.36,80.37,80.38,80.39,80.41,80.42,80.43,80.44,80.45,80.47,80.48,80.49,80.5,80.51,80.53,80.54,80.55,80.56,80.57,80.59,80.6,80.61,80.62,80.63,80.65,80.66,80.67,80.68,80.69,80.71,80.72,80.73,80.74,80.75,80.78,80.78,80.79,80.8,80.81,80.84,80.84,80.85,80.86,80.87,80.9,80.9,80.91,80.92,80.93,80.96,80.97,80.97,80.98,80.99,81.02,81.03,81.03,81.04,81.05,81.08,81.09,81.1,81.1,81.11,81.14,81.15,81.16,81.16,81.17,81.2,81.21,81.22,81.22,81.23,81.28,81.29,81.29,81.32,81.33,81.34,81.35,81.35,81.38,81.39,81.4,81.41,81.41,81.44,81.45,81.46,81.47,81.48,81.5,81.51,81.52,81.53,81.54,81.56,81.57,81.58,81.59,81.6,81.62,81.63,81.64,81.65,81.66,81.68,81.69,81.7,81.71,81.72,81.74,81.75,81.76,81.77,81.78,81.8,81.81,81.82,81.83,81.84,81.86,81.87,81.88,81.89,81.9,81.92,81.93,81.94,81.95,81.96,81.98,81.99,82.0,82.01,82.02,82.05,82.06,82.07,82.08,82.11,82.11,82.12,82.13,82.14,82.17,82.18,82.18,82.19,82.2,82.23,82.24,82.24,82.25,82.26,82.29,82.3,82.3,82.31,82.32,82.35,82.36,82.37,82.37,82.38,82.41,82.42,82.43,82.43,82.44,82.59,82.6,82.61,82.62,82.62,82.65,82.66,82.67,82.68,82.68,82.71,82.72,82.73,82.74,82.75,82.77,82.78,82.79,82.8,82.81,82.83,82.84,82.85,82.86,82.87,82.89,82.9,82.91,82.92,82.93,82.95,82.96,82.97,82.98,82.99,83.01,83.02,83.03,83.04,83.05,83.07,83.08,83.1,83.11,83.13,83.14,83.15,83.16,83.17,83.19,83.2,83.21,83.22,83.23,83.26,83.26,83.27,83.28,83.29,83.32,83.32,83.33,83.34,83.35,83.38,83.38,83.39,83.4,83.41,83.44,83.45,83.45,83.46,83.47,83.5,83.51,83.51,83.52,83.53,83.56,83.57,83.57,83.58,83.59,83.62,83.63,83.64,83.64,83.65,83.68,83.69,83.7,83.7,83.71,83.74,83.75,83.76,83.76,83.77,83.8,83.81,83.82,83.83,83.83,83.86,83.87,83.88,83.89,83.89,83.92,83.93,83.94,83.95,83.95,83.98,83.99,84.0,84.01,84.02,84.04,84.05,84.06,84.07,84.08,84.1,84.11,84.12,84.13,84.14,84.16,84.17,84.18,84.19,84.2,84.22,84.23,84.24,84.25,84.26,84.28,84.29,84.3,84.31,84.32,84.34,84.35,84.36,84.37,84.38,84.43,84.44,84.46,84.47,84.48,84.49,84.5,84.53,84.53,84.54,84.55,84.56,84.59,84.59,84.6,84.61,84.62,84.65,84.65,84.66,84.67,84.68,84.71,84.72,84.72,84.73,84.74,84.77,84.78,84.78,84.79,84.8,84.83,84.84,84.84,84.85,84.86,84.89,84.9,84.91,84.91,84.92,84.95,84.96,84.97,84.97,84.98,85.01,85.02,85.03,85.03,85.04,85.07,85.08,85.09,85.1,85.1,85.13,85.14,85.15,85.16,85.16,85.19,85.2,85.22,85.22,85.25,85.26,85.27,85.28,85.29,85.31,85.32,85.33,85.34,85.35,85.37,85.38,85.39,85.4,85.41,85.43,85.44,85.45,85.46,85.47,85.61,85.62,85.63,85.64,85.65,85.67,85.68,85.69,85.7,85.71,85.73,85.74,85.75,85.76,85.77,85.8,85.8,85.81,85.82,85.83,85.86,85.86,85.87,85.88,85.89,85.92,85.92,85.93,85.94,85.95,85.98,85.99,85.99,86.0,86.01,86.04,86.05,86.05,86.06,86.07,86.1,86.11,86.11,86.12,86.13,86.16,86.17,86.18,86.18,86.19,86.22,86.23,86.24,86.28,86.29,86.3,86.3,86.31,86.34,86.35,86.36,86.37,86.37,86.4,86.41,86.42,86.43,86.43,86.46,86.47,86.48,86.49,86.5,86.52,86.53,86.54,86.55,86.56,86.58,86.59,86.6,86.61,86.62,86.64,86.65,86.66,86.67,86.68,86.7,86.71,86.72,86.73,86.74,86.78,86.79,86.8,86.82,86.83,86.84,86.85,86.86,86.88,86.89,86.9,86.91,86.92,86.94,86.95,86.96,86.97,86.98,87.0,87.01,87.02,87.03,87.04,87.07,87.07,87.08,87.09,87.1,87.13,87.13,87.14,87.15,87.16,87.19,87.19,87.2,87.21,87.22,87.25,87.26,87.26,87.27,87.28,87.31,87.32,87.32,87.33,87.34,87.37,87.38,87.38,87.39,87.4,87.43,87.44,87.45,87.45,87.46,87.49,87.5,87.51,87.51,87.52,87.55,87.56,87.61,87.62,87.63,87.64,87.64,87.67,87.68,87.69,87.7,87.7,87.73,87.74,87.75,87.76,87.77,87.79,87.8,87.81,87.82,87.83,87.85,87.86,87.87,87.88,87.89,87.91,87.92,87.93,87.94,87.95,87.97,87.98,87.99,88.0,88.01,88.03,88.04,88.05,88.06,88.07,88.09,88.1,88.11,88.12,88.13,88.15,88.16,88.17,88.18,88.19,88.21,88.22,88.23,88.24,88.25,88.27,88.28,88.29,88.3,88.31,88.34,88.34,88.35,88.4,88.4,88.41,88.42,88.43,88.46,88.46,88.47,88.48,88.49,88.52,88.53,88.53,88.54,88.55,88.7,88.71,88.72,88.72,88.73,88.76,88.77,88.78,88.78,88.79,88.82,88.83,88.84,88.85,88.85,88.88,88.89,88.9,88.91,88.91,88.94,88.95,88.96,88.97,88.97,89.0,89.01,89.02,89.03,89.04,89.06,89.07,89.08,89.09,89.1,89.12,89.13,89.14,89.15,89.16,89.18,89.19,89.2,89.21,89.22,89.24,89.25,89.26,89.27,89.28,89.3,89.31,89.32,89.33,89.34,89.36,89.37,89.38,89.39,89.42,89.43,89.44,89.45,89.46,89.48,89.49,89.5,89.51,89.52,89.54,89.55,89.56,89.57,89.58,89.61]
y=[2.29,2.41,2.4,2.38,2.43,2.42,2.38,2.36,2.4,2.37,2.36,2.37,2.34,2.32,2.31,2.25,2.25,2.21,2.2,2.21,2.21,2.21,2.21,2.19,2.17,2.1,2.08,2.08,2.12,2.15,2.1,2.09,2.1,2.08,2.08,2.01,2.0,1.98,1.98,1.95,1.92,1.92,1.92,1.92,1.92,1.88,1.88,1.91,1.91,1.88,1.89,1.87,1.85,1.84,1.83,1.88,1.93,1.88,1.82,1.82,2.08,2.13,2.35,2.32,2.37,2.34,2.25,2.35,2.33,2.34,2.32,2.34,2.39,2.53,2.49,2.53,2.54,2.55,2.53,2.52,2.52,2.54,2.66,2.71,2.81,2.92,3.09,2.99,3.03,2.98,3.01,2.98,2.93,2.91,2.93,2.91,2.89,2.92,2.9,2.87,2.9,2.9,2.93,2.83,2.78,2.67,2.6,2.66,2.61,2.61,2.61,2.54,2.56,2.51,2.52,2.55,2.6,2.6,2.67,2.63,2.62,2.63,2.61,2.58,2.59,2.59,2.62,2.59,2.58,2.61,2.63,2.6,2.63,2.63,2.61,2.6,2.58,2.58,2.57,2.58,2.58,2.58,2.58,2.57,2.58,2.58,2.58,2.58,2.55,2.52,2.53,2.53,2.51,2.46,2.48,2.45,2.54,2.53,2.49,2.51,2.49,2.48,2.49,2.47,2.48,2.49,2.48,2.5,2.5,2.55,2.53,2.52,2.51,2.49,2.5,2.49,2.49,2.47,2.46,2.48,2.45,2.45,2.43,2.43,2.45,2.45,2.45,2.45,2.45,2.45,2.45,2.45,2.46,2.45,2.44,2.44,2.45,2.45,2.47,2.56,2.52,2.48,2.47,2.5,2.54,2.54,2.58,2.61,2.63,2.63,2.63,2.61,2.59,2.59,2.56,2.57,2.58,2.56,2.57,2.61,2.59,2.6,2.6,2.58,2.6,2.59,2.6,2.61,2.61,2.59,2.6,2.62,2.62,2.6,2.61,2.59,2.59,2.59,2.59,2.61,2.67,2.65,2.63,2.63,2.6,2.56,2.59,2.59,2.59,2.58,2.58,2.57,2.58,2.55,2.55,2.58,2.58,2.57,2.58,2.83,2.88,2.93,2.79,2.82,2.81,2.86,2.86,2.85,2.82,2.82,2.82,2.78,2.78,2.82,2.79,2.8,2.79,2.79,2.78,2.72,2.73,2.71,2.72,2.73,2.73,2.74,2.74,2.72,2.73,2.73,2.71,2.68,2.71,2.75,2.84,2.91,2.89,2.92,2.97,2.96,2.94,2.99,3.04,2.97,2.99,2.97,2.99,2.98,2.99,3.0,3.01,2.99,2.98,2.99,2.99,2.99,3.01,2.96,2.97,3.0,2.98,2.97,2.96,2.96,3.0,3.0,2.99,2.98,2.99,2.99,2.99,2.99,2.99,2.99,2.98,2.98,2.98,2.98,3.02,3.03,3.03,3.05,3.09,3.08,3.1,3.12,3.14,3.13,3.12,3.14,3.15,3.13,3.15,3.14,3.14,3.14,3.14,3.13,3.11,3.08,3.08,3.08,3.08,3.1,3.11,3.11,3.11,3.09,3.13,3.17,3.28,3.43,3.52,3.47,3.45,3.45,3.45,3.44,3.46,3.46,3.45,3.44,3.45,3.45,3.45,3.45,3.45,3.47,3.5,3.54,3.52,3.5,3.5,3.5,3.44,3.45,3.45,3.45,3.43,3.45,3.48,3.48,3.45,3.46,3.43,3.46,3.45,3.43,3.43,3.42,3.42,3.43,3.42,3.41,3.39,3.38,3.38,3.38,3.4,3.39,3.38,3.39,3.37,3.37,3.38,3.38,3.38,3.38,3.38,3.38,3.37,3.36,3.37,3.36,3.36,3.37,3.36,3.41,3.41,3.4,3.39,3.39,3.37,3.37,3.36,3.36,3.36,3.36,3.36,3.37,3.36,3.37,3.39,3.45,3.42,3.39,3.4,3.4,3.39,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.38,3.42,3.42,3.41,3.39,3.39,3.39,3.37,3.38,3.4,3.41,3.44,3.43,3.43,3.43,3.43,3.42,3.42,3.42,3.47,3.46,3.47,3.53,3.65,3.59,3.76,3.85,3.77,3.9,3.76,3.75,3.8,3.73,3.7,3.66,3.68,3.66,3.69,3.68,3.69,3.69,3.61,3.61,3.61,3.59,3.59,3.59,3.63,3.61,3.62,3.63,3.62,3.61,3.61,3.62,3.69,3.66,3.69,3.68,3.66,3.65,3.66,3.68,3.78,3.76,3.77,3.74,3.75,3.77,3.75,3.7,3.7,3.73,3.74,3.79,3.83,3.87,3.86,3.8,3.81,3.78,3.8,3.78,3.78,3.84,3.81,3.81,3.82,3.78,3.75,3.76,3.74,3.72,3.71,3.72,3.78,3.78,3.77,3.76,3.74,3.74,3.75,3.75,3.73,3.72,3.71,3.68,3.7,3.67,3.64,3.56,3.57,3.56,3.61,3.62,3.59,3.57,3.59,3.55,3.54,3.53,3.52,3.53,3.53,3.58,3.6,3.57,3.53,3.53,3.54,3.55,3.57,3.57,3.58,3.64,3.63,3.6,3.6,3.6,3.59,3.6,3.6,3.61,3.61,3.62,3.64,3.64,3.64,3.69,3.73,3.71,3.69,3.69,3.69,3.65,3.66,3.66,3.72,3.73,3.7,3.7,3.72,3.74,3.74,3.74,3.79,3.85,3.9,3.88,3.93,3.86,3.94,4.0,4.0,3.97,3.94,3.93,3.91,3.92,3.94,3.94,3.94,3.99,3.98,4.01,3.99,3.92,3.82,3.71,3.81,3.77,3.76,3.81,3.79,3.83,3.83,3.88,3.89,3.84,3.84,3.83,3.79,3.81,3.8,3.81,3.82,3.83,3.8,3.81,3.81,3.83,3.83,3.86,3.92,3.93,3.97,3.97,3.96,3.95,3.94,3.96,3.98,3.88,3.98,4.0,4.02,4.04,4.08,4.09,4.09,4.16,4.22,4.21,4.19,4.19,4.18,4.19,4.2,4.19,4.2,4.21,4.27,4.3,4.29,4.26,4.29,4.29,4.34,4.36,4.35,4.33,4.33,4.36,4.34,4.33,4.34,4.37,4.35,4.36,4.39,4.38,4.41,4.4,4.4,4.39,4.39,4.41,4.42,4.46,4.48,4.53,4.63,4.65,4.71,4.81,4.91,5.0,4.95,5.04,5.01,4.98,4.9,4.95,4.91,4.8,4.9,4.86,4.76,4.77,4.77,4.79,4.8,4.79,4.81,4.89,4.87,4.87,4.87,4.8,4.79,4.75,4.69,4.69,4.71,4.78,4.76,4.74,4.73,4.8,4.81,4.84,4.83,4.83,4.83,4.79,4.75,4.75,4.66,4.69,4.7,4.68,4.7,4.73,4.72,4.75,4.75,4.75,4.71,4.72,4.71,4.69,4.68,4.64,4.65,4.65,4.66,4.66,4.64,4.65,4.64,4.62,4.63,4.6,4.52,4.45,4.53,4.49,4.5,4.48,4.37,4.39,4.4,4.41,4.43,4.47,4.46,4.45,4.42,4.44,4.45,4.45,4.44,4.43,4.41,4.41,4.44,4.41,4.38,4.38,4.37,4.37,4.38,4.32,4.24,4.29,4.31,4.29,4.27,4.28,4.28,4.28,4.32,4.32,4.33,4.33,4.32,4.33,4.39,4.47,4.47,4.53,4.53,4.53,4.52,4.54,4.51,4.53,4.53,4.53,4.54,4.54,4.58,4.56,4.58,4.56,4.55,4.53,4.54,4.54,4.55,4.54,4.53,4.52,4.49,4.45,4.45,4.46,4.46,4.48,4.46,4.47,4.47,4.49,4.47,4.47,4.48,4.51,4.57,4.57,4.59,4.61,4.57,4.57,4.6,4.64,4.64,4.63,4.65,4.65,4.64,4.64,4.66,4.72,4.73,4.76,4.74,4.8,4.78,4.72,4.76,4.86,4.86,4.88,4.86,4.83,4.85,4.85,4.84,4.81,4.82,4.82,4.82,4.81,4.82,4.85,4.85,4.84,4.82,4.81,4.78,4.81,4.79,4.75,4.78,4.8,4.79,4.78,4.76,4.77,4.77,4.77,4.78,4.79,4.79,4.76,4.75,4.74,4.73,4.74,4.75,4.8,4.81,4.84,4.82,4.8,4.81,4.8,4.77,4.81,4.8,4.81,4.84,4.86,4.83,4.82,4.81,4.8,4.78,4.81,4.81,4.82,4.88,4.84,4.84,4.83,4.83,4.85,4.85,4.83,4.81,4.82,4.79,4.8,4.79,4.78,4.8,4.79,4.78,4.77,4.78,4.77,4.76,]
from alphashape import alphashape
from shapely.geometry import mapping
from bokeh.plotting import figure
from ipywidgets import interact
from bokeh.io import output_notebook, show, push_notebook
def alphashape_func(x, y, alpha):
length = range(len(x))
# date count
pnt = [[x[i],y[i]] for i in length]
# return a shapely.polygon/multipolygon
alpha_shape = alphashape(pnt, alpha=alpha)
# convert shapely.polygon/multipolygon to list
map = mapping(alpha_shape)['coordinates']
poly_shp = [i[0] for i in map]
bound_len = len(poly_shp)
# single alpha shape case
if bound_len == 1:
bound_x = [i[0] for i in poly_shp]
bound_y = [i[1] for i in poly_shp]
# multiple alpha shape case
else:
bound_x = [[i[0] for i in poly_shp[j]] for j in range(bound_len)]
bound_y = [[i[1] for i in poly_shp[j]] for j in range(bound_len)]
# return a dict containing 2 lists: x & y.
return {'x':bound_x, 'y':bound_y}
alpha = 5
alpha_high_pnt = alphashape_func(x,y,alpha)
plot = figure(sizing_mode='stretch_width', output_backend="webgl")
# line_pnt(plot, max_processed_xy['x'], max_processed_xy['y'],legend_label ='processed_xy',line_color='yellow', line_width=2)
alpha_shape_plt = plot.multi_line(xs=alpha_high_pnt['x'],ys=alpha_high_pnt['y'], line_color='cyan',legend_label = 'alpha_high_pnt')
# create an update function
def update(alpha=5):
alpha_high_pnt = alphashape_func(x,y,alpha)
alpha_shape_plt.data_source.data['xs'] = alpha_high_pnt['x']
alpha_shape_plt.data_source.data['ys'] = alpha_high_pnt['y']
# push new values to the notebook
push_notebook()
output_notebook()
show(plot)
interact(update, alpha=(0,25,1))
(the dynamic slider only works when you run it in jupyter in a web browser)
When I drag the slider, it shows an error message:
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('xs', 54), ('ys', 99)
I don't see the reason of this error, since when I manually adjust the alpha value, the lengths of xs and ys equal.
Can anyone help?
===================== update ======================
Based on #bigreddot suggestion, I update the code to this, the doesn't match problem is resolved, but the plot doesn't refresh yet.
from alphashape import alphashape
from shapely.geometry import mapping
from bokeh.plotting import figure
from bokeh.io import output_notebook, show, push_notebook
from bokeh.models import ColumnDataSource
from ipywidgets import interact
output_notebook()
def alphashape_func(x, y, alpha):
length = range(len(x))
# date count
pnt = [[x[i],y[i]] for i in length]
# return a shapely.polygon/multipolygon
alpha_shape = alphashape(pnt, alpha=alpha)
# convert shapely.polygon/multipolygon to list
map = mapping(alpha_shape)['coordinates']
poly_shp = [i[0] for i in map]
bound_len = len(poly_shp)
# single alpha shape case
if bound_len == 1:
bound_x = [i[0] for i in poly_shp]
bound_y = [i[1] for i in poly_shp]
# multiple alpha shape case
else:
bound_x = [[i[0] for i in poly_shp[j]] for j in range(bound_len)]
bound_y = [[i[1] for i in poly_shp[j]] for j in range(bound_len)]
# return a dict containing 2 lists: x & y.
return {'x':bound_x, 'y':bound_y}
alpha = 5
plot = figure(sizing_mode='stretch_width', output_backend="webgl")
source = ColumnDataSource(data=alphashape_func(x,y,alpha))
alpha_shape_plt = plot.multi_line(source=source, xs='x',ys='y', line_color='cyan',legend_label = 'alpha_high_pnt')
print
# create an update function
def update(alpha=5):
source.data = alphashape_func(x,y,alpha)
# push new values to the notebook
push_notebook()
interact(update, alpha=(0,25,1))
show(plot)
In between this line:
alpha_shape_plt.data_source.data['xs'] = alpha_high_pnt['x']
and this line:
alpha_shape_plt.data_source.data['ys'] = alpha_high_pnt['y']
the CDS columns are not all the same length. If you need to update with data that has a new length you should collect all the updates up front in a new_data dict and then set
source.data = new_data
to update the CDS "all at once". This is more efficient in any case, as well, since it results in fewer property update change events being sent out.

Matplotlib PathPatch Colors and Legends not Matching

I have a dataset that is a list of lists.
Each list is a category to be plotted as a box plot.
Each list has a list of up to 9 components to be plotted into subplots.
The functions I am using is below was based on this answer. I pulled it out of my work and added some mock data. Should be a minimal example below.
neonDict = {
0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8
}
import matplotlib as mpl
import matplotlib.pyplot as plt
def coloredBoxPlot(axis, data,edgeColor,fillColor):
bp = axis.boxplot(data,vert=False,patch_artist=True)
for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
plt.setp(bp[element], color=edgeColor)
for patch in bp['boxes']:
patch.set(facecolor=fillColor)
return bp
def plotCalStats(data, prefix='Channel', savedir=None,colors=['#00597c','#a8005c','#00aeea','#007d50','#400080','#e07800'] ):
csize = mpl.rcParams['figure.figsize']
cdpi = mpl.rcParams['figure.dpi']
mpl.rcParams['figure.figsize'] = (12,8)
mpl.rcParams['figure.dpi'] = 1080
pkdata = []
labels = []
lstyles = []
fg, ax = plt.subplots(3,3)
for pk in range(len(neonDict)):
px = pk // 3
py = pk % 3
ax[px,py].set_xlabel('Max Pixel')
ax[px,py].set_ylabel('')
ax[px,py].set_title(str(neonDict[pk]) + ' nm')
pkdata.append([])
for cat in range(len(data)):
bp = ''
for acal in data[cat]:
for apeak in acal.peaks:
pkdata[apeak].append(acal.peaks[apeak][0])
for pk in range(9):
px = pk // 3
py = pk % 3
bp = coloredBoxPlot(ax[px,py], pkdata[pk], colors[cat], '#ffffff')
if len(data[cat]) > 0:
#print(colors[cat])
#print(bp['boxes'][0].get_edgecolor())
labels.append(prefix+' '+str(cat))
lstyles.append(bp['boxes'][0])
fg.legend(lstyles,labels)
fg.suptitle('Calibration Summary by '+prefix)
fg.tight_layout()
if savedir is not None:
plt.savefig(savedir + 'Boxplots.png')
plt.show()
mpl.rcParams['figure.figsize'] = csize
mpl.rcParams['figure.dpi'] = cdpi
return
class acal:
def __init__(self):
self.peaks = {}
for x in range(9):
self.peaks[x] = (np.random.randint(20*x,20*(x+1)),)
mockData = [[acal() for y in range(100)] for x in range(6)]
#Some unused channels
mockData[2] = []
mockData[3] = []
mockData[4] = []
plotCalStats(mockData)
So the issue is that the plot colors do not match the legend. Even if I restrict the data to only add a label if data exists (ensuring thus there is no issue with calling boxplots with an empty data set and not getting an appropriate PathPatch.
The printouts verify the colors are correctly stored in the PathPatch. (I can add my digits -> hex converter) if that is questioned.
Attached is the output. One can see I get a purple box but no purple in the legend. Purple is the 4th category which is empty.
Any ideas why the labels don't match the actual style? Thanks much!
EDITS:
To address question on 'confusing'.
I have six categories of data, each category is coming from a single event. Each event has 9 components. I want to compare all events, for each individual component, for each category on a single plot as shown below.
Each subplot is a individual component comprised from the series of data for each categorical (Channel).
So the link I have provided, (like I said, is adapted from) shows how to create a single box plot on one axis for 2 data sets. I've basically done the same thing for 6 data sets on 9 axis, where 3 data sets are empty (but don't have to be, I did it to illustrate the issue. If I have all 6 data sets there, how can you tell the colors are messed up?????)
Regarding the alpha:
The alphas are always 'ff' when giving only RGB data to matplotlib. If I call get_edgecolors, it will return a tuple (RGBA) where A = 1.0.
See commented out print statement.
EDIT2:
If I restrict it down to a single category, it makes the box plot view less confusing.
Single Example (see how box plot color is orange, figure says it's blue)
All colors off
Feel like this used to work....
Uncertain how the error presented as it did, but the issue has to do with reformatting the data before creating the box plot.
By removing pkdata.append([]) during the creation of the subplots before looping the categories and adding:
pkdata = [[],[],[],[],[],[],[],[],[]] during each iteration of the category loop fixed the issue. The former was sending in all previous channel data...
Output is now better. Full sol attached.
Likely, since the plot uses data from pkdata, the empty channel (data[cat]) plotted previous data (from data[cat-1]) as that was still in pkdata (actually, all previous data[cat] was still in pkdata) which was then plotted. I only check data[cat] for data on each loop to add to the legend. The legend was set up for channels 0,1,5, for example.. but we saw data for channel: 0 as 0, 0+1 as 1, 0+1 as 2, 0+1 as 3, 0+1 as 4, 0+1+5 as 5... thus channel 4 (purple) had data to plot but wasn't added to the legend. Giving the impression of 'misaligned' legends but rather unlegend data...
The single channel data is actually all 6 channels overlapping, the final channel 5 color being orange, overlapping all previous, namely the original channel 0 data to whom the data belongs and was properly added to the legend.
neonDict = {
0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8
}
import matplotlib as mpl
import matplotlib.pyplot as plt
def getHex(r,g,b,a=1.0):
colors = [int(r * 255 ),int(g * 255 ),int(b * 255 ),int(a * 255) ]
s = '#'
for x in range(4):
cs = hex(colors[x])
if len(cs) == 3:
cs = cs + '0'
s += cs.replace('0x','')
return s
def getRGB(colstr):
try:
a = ''
r = int(colstr[1:3],16) / 255
g = int(colstr[3:5],16) / 255
b = int(colstr[5:7],16) / 255
if len (colstr) == 7:
a = 1.0
else:
a = int(colstr[7:],16) / 255
return (r,g,b,a)
except Exception as e:
print(e)
raise e
return
def compareHexColors(col1,col2):
try:
## ASSUME #RBG or #RBGA
## If less than 7, append the ff for the colors
if len(col1) < 9:
col1 += 'ff'
if len(col2) < 9:
col2 += 'ff'
return col1.lower() == col2.lower()
except Exception as e:
raise e
return False
def coloredBoxPlot(axis, data,edgeColor,fillColor):
bp = axis.boxplot(data,vert=False,patch_artist=True)
for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
plt.setp(bp[element], color=edgeColor)
for patch in bp['boxes']:
patch.set(facecolor=fillColor)
return bp
def plotCalStats(data, prefix='Channel', savedir=None,colors=['#00597c','#a8005c','#00aeea','#007d50','#400080','#e07800'] ):
csize = mpl.rcParams['figure.figsize']
cdpi = mpl.rcParams['figure.dpi']
mpl.rcParams['figure.figsize'] = (12,8)
mpl.rcParams['figure.dpi'] = 1080
pkdata = []
labels = []
lstyles = []
fg, ax = plt.subplots(3,3)
for pk in range(len(neonDict)):
px = pk // 3
py = pk % 3
ax[px,py].set_xlabel('Max Pixel')
ax[px,py].set_ylabel('')
ax[px,py].set_title(str(neonDict[pk]) + ' nm')
for cat in range(len(data)):
bp = ''
pkdata = [[],[],[],[],[],[],[],[],[]]
for acal in data[cat]:
for apeak in acal.peaks:
pkdata[apeak].append(acal.peaks[apeak][0])
for pk in range(9):
px = pk // 3
py = pk % 3
bp = coloredBoxPlot(ax[px,py], pkdata[pk], colors[cat], '#ffffff')
if len(data[cat]) > 0:
print(compareHexColors(colors[cat],getHex(*bp['boxes'][0].get_edgecolor())))
labels.append(prefix+' '+str(cat))
lstyles.append(bp['boxes'][0])
fg.legend(lstyles,labels)
fg.suptitle('Calibration Summary by '+prefix)
fg.tight_layout()
if savedir is not None:
plt.savefig(savedir + 'Boxplots.png')
plt.show()
mpl.rcParams['figure.figsize'] = csize
mpl.rcParams['figure.dpi'] = cdpi
return
class acal:
def __init__(self,center):
self.peaks = {}
for x in range(9):
self.peaks[x] = [10*x + (center) + (np.random.randint(10)-1)/2.0,0,0]
mockData = [[acal(x) for y in range(1000)] for x in range(6)]
#Some unused channels
mockData[2] = []
mockData[3] = []
mockData[4] = []
plotCalStats(mockData)

How to animate a 2D scatter plot given X, Y coordinates and time with appearing and disappearing points?

I have a data frame like the below:
Every row represents a person. They stay at 3 different locations for some time given on the dataframe. The first few people don't stay at location1 but they "born" at location2. The rest of them stay at every locations (3 locations).
I would like to animate every person at the given X, Y coordinates given on the data frame and represent them as dots or any other shape. Here is the flow:
Every person should appear at the first given location (location1) at the given time. Their color should be blue at this state.
Stay at location1 until location2_time and then appear at location2. Their color should be red at this state.
Stay at location2 until location3_time and then appear at location3. Their color should be red at this state.
Stay at location3 for 3 seconds and disappear forever.
There can be several people on the visual at the same time. How can I do that?
There are some good answers on the below links. However, on these solutions, points don't disappear.
How can i make points of a python plot appear over time?
How to animate a scatter plot?
The following is an implementation with python-ffmpeg, pandas, matplotlib, and seaborn. You can find output video on my YouTube channel (link is unlisted).
Each frame with figures is saved directly to memory. New figures are generated only when the state of the population changes (person appears/moves/disappears).
You should definetely separate this code into smaller chunks if you are using this in a Python package:
from numpy.random import RandomState, SeedSequence
from numpy.random import MT19937
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ffmpeg
RESOLUTION = (12.8, 7.2) # * 100 pixels
NUMBER_OF_FRAMES = 900
class VideoWriter:
# Courtesy of https://github.com/kylemcdonald/python-utils/blob/master/ffmpeg.py
def __init__(
self,
filename,
video_codec="libx265",
fps=15,
in_pix_fmt="rgb24",
out_pix_fmt="yuv420p",
input_args=None,
output_args=None,
):
self.filename = filename
self.process = None
self.input_args = {} if input_args is None else input_args
self.output_args = {} if output_args is None else output_args
self.input_args["r"] = self.input_args["framerate"] = fps
self.input_args["pix_fmt"] = in_pix_fmt
self.output_args["pix_fmt"] = out_pix_fmt
self.output_args["vcodec"] = video_codec
def add(self, frame):
if self.process is None:
height, width = frame.shape[:2]
self.process = (
ffmpeg.input(
"pipe:",
format="rawvideo",
s="{}x{}".format(width, height),
**self.input_args,
)
.filter("crop", "iw-mod(iw,2)", "ih-mod(ih,2)")
.output(self.filename, **self.output_args)
.global_args("-loglevel", "quiet")
.overwrite_output()
.run_async(pipe_stdin=True)
)
conv = frame.astype(np.uint8).tobytes()
self.process.stdin.write(conv)
def close(self):
if self.process is None:
return
self.process.stdin.close()
self.process.wait()
def figure_to_array(figure):
"""adapted from: https://stackoverflow.com/questions/21939658/"""
figure.canvas.draw()
buf = figure.canvas.tostring_rgb()
n_cols, n_rows = figure.canvas.get_width_height()
return np.frombuffer(buf, dtype=np.uint8).reshape(n_rows, n_cols, 3)
# Generate data for the figure
rs1 = RandomState(MT19937(SeedSequence(123456789)))
time_1 = np.round(rs1.rand(232) * NUMBER_OF_FRAMES).astype(np.int16)
time_2 = time_1 + np.round(rs1.rand(232) * (NUMBER_OF_FRAMES - time_1)).astype(np.int16)
time_3 = time_2 + np.round(rs1.rand(232) * (NUMBER_OF_FRAMES - time_2)).astype(np.int16)
loc_1_x, loc_1_y, loc_2_x, loc_2_y, loc_3_x, loc_3_y = np.round(rs1.rand(6, 232) * 100, 1)
df = pd.DataFrame({
"loc_1_time": time_1,
"loc_1_x": loc_1_x,
"loc_1_y": loc_1_y,
"loc_2_time": time_2,
"loc_2_x": loc_2_x,
"loc_2_y": loc_2_y,
"loc_3_time": time_3,
"loc_3_x": loc_3_x,
"loc_3_y": loc_3_y,
})
"""The stack answer starts here"""
# Add extra column for disappear time
df["disappear_time"] = df["loc_3_time"] + 3
all_times = df[["loc_1_time", "loc_2_time", "loc_3_time", "disappear_time"]]
change_times = np.unique(all_times)
# Prepare ticks for plotting the figure across frames
x_values = df[["loc_1_x", "loc_2_x", "loc_3_x"]].values.flatten()
x_ticks = np.array(np.linspace(x_values.min(), x_values.max(), 6), dtype=np.uint8)
y_values = df[["loc_1_y", "loc_2_y", "loc_3_y"]].values.flatten()
y_ticks = np.array(np.round(np.linspace(y_values.min(), y_values.max(), 6)), dtype=np.uint8)
sns.set_theme(style="whitegrid")
video_writer = VideoWriter("endermen.mp4")
if 0 not in change_times:
# Generate empty figure if no person arrive at t=0
fig, ax = plt.subplots(figsize=RESOLUTION)
ax.set_xticklabels(x_ticks)
ax.set_yticklabels(y_ticks)
ax.set_title("People movement. T=0")
video_writer.add(figure_to_array(fig))
loop_range = range(1, NUMBER_OF_FRAMES)
else:
loop_range = range(NUMBER_OF_FRAMES)
palette = sns.color_palette("tab10") # Returns three colors from the palette (we have three groups)
animation_data_df = pd.DataFrame(columns=["x", "y", "location", "index"])
for frame_idx in loop_range:
if frame_idx in change_times:
plt.close("all")
# Get person who appears/moves/disappears
indexes, loc_nums = np.where(all_times == frame_idx)
loc_nums += 1
for i, loc in zip(indexes, loc_nums):
if loc != 4:
x, y = df[[f"loc_{loc}_x", f"loc_{loc}_y"]].iloc[i]
if loc == 1: # location_1
animation_data_df = animation_data_df.append(
{"x": x, "y": y, "location": loc, "index": i},
ignore_index=True
)
else:
data_index = np.where(animation_data_df["index"] == i)[0][0]
if loc in (2, 3): # location_2 or 3
animation_data_df.loc[[data_index], :] = x, y, loc, i
elif loc == 4: # Disappear
animation_data_df.iloc[data_index] = np.nan
current_palette_size = np.sum(~np.isnan(np.unique(animation_data_df["location"])))
fig, ax = plt.subplots(figsize=RESOLUTION)
sns.scatterplot(
x="x", y="y", hue="location", data=animation_data_df, ax=ax, palette=palette[:current_palette_size]
)
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticks)
ax.set_yticks(y_ticks)
ax.set_yticklabels(y_ticks)
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
ax.set_title(f"People movement. T={frame_idx}")
video_writer.add(figure_to_array(fig))
video_writer.close()
Edit: There was a bug in which location_3 wasn't removed after 3 seconds. Fixed now.
Modifying the code from this question to only include the positions you want automatically removes the old ones if the old position isn't included in the new ones. This doesn't change if you want to animate by time or iterations or anything else. I have opted to use iterations here since it's easier and I don't know how you are handling your dataset. The code does have one bug though, the last point (or points if they last the same amount of time) remaining won't disappear, this can be solved easily if you don't want to draw anything again, if you do though for exaple in case you there is a gap in the data with no people and then the data resumes I haven't found any workarounds
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
#The t0,t1,t2,t3 are the times (in iterations) that the position changes
#If t0 is None then the person will never be displayed
people = [
# t0 x1 y1 t1 x2 y2 t2 x3 y3 t4
[ 0, 1, 0.1, 1, 2, 0.2, 2, 3, 0.3, 3],
[ 2, None, None, None, 2, 1, 3, 4, 1, 7],
[ 2, float("NaN"), float("NaN"), float("NaN"), 2, 0.8, 4, 4, 0.8, 10],
]
fig = plt.figure()
plt.xlim(0, 5)
plt.ylim(0, 1)
graph = plt.scatter([], [])
def animate(i):
points = []
colors = []
for person in people:
if person[0] is None or math.isnan(person[0]) or i < person[0]:
continue
# Position 1
elif person[3] is not None and not (math.isnan(person[3])) and i <= person[3]:
new_point = [person[1], person[2]]
color = "b"
# Position 2
elif person[6] is not None and not (math.isnan(person[6])) and i <= person[6]:
new_point = [person[4], person[5]]
color = "r"
# Position 3
elif person[9] is not None and not (math.isnan(person[9])) and i <= person[9]:
new_point = [person[7], person[8]]
color = "r"
else:
people.remove(person)
new_point = []
if new_point != []:
points.append(new_point)
colors.append(color)
if points != []:
graph.set_offsets(points)
graph.set_facecolors(colors)
else:
# You can use graph.remove() to fix the last point not disappiring but you won't be able to plot anything after that
# graph.remove()
pass
return graph
ani = FuncAnimation(fig, animate, repeat=False, interval=500)
plt.show()

How to Create a Boxplot / Group Boxplot from [Min ,Q1 ,Q2 ,Q3 ,Max] in Python? [duplicate]

From what I can see, boxplot() method expects a sequence of raw values (numbers) as input, from which it then computes percentiles to draw the boxplot(s).
I would like to have a method by which I could pass in the percentiles and get the corresponding boxplot.
For example:
Assume that I have run several benchmarks and for each benchmark I've measured latencies ( floating point values ). Now additionally, I have precomputed the percentiles for these values.
Hence for each benchmark, I have the 25th, 50th, 75th percentile along with the min and max.
Now given these data, I would like to draw the box plots for the benchmarks.
As of 2020, there is a better method than the one in the accepted answer.
The matplotlib.axes.Axes class provides a bxp method, which can be used to draw the boxes and whiskers based on the percentile values. Raw data is only needed for the outliers, and that is optional.
Example:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
boxes = [
{
'label' : "Male height",
'whislo': 162.6, # Bottom whisker position
'q1' : 170.2, # First quartile (25th percentile)
'med' : 175.7, # Median (50th percentile)
'q3' : 180.4, # Third quartile (75th percentile)
'whishi': 187.8, # Top whisker position
'fliers': [] # Outliers
}
]
ax.bxp(boxes, showfliers=False)
ax.set_ylabel("cm")
plt.savefig("boxplot.png")
plt.close()
This produces the following image:
To draw the box plot using just the percentile values and the outliers ( if any ) I made a customized_box_plot function that basically modifies attributes in a basic box plot ( generated from a tiny sample data ) to make it fit according to your percentile values.
The customized_box_plot function
def customized_box_plot(percentiles, axes, redraw = True, *args, **kwargs):
"""
Generates a customized boxplot based on the given percentile values
"""
box_plot = axes.boxplot([[-9, -4, 2, 4, 9],]*n_box, *args, **kwargs)
# Creates len(percentiles) no of box plots
min_y, max_y = float('inf'), -float('inf')
for box_no, (q1_start,
q2_start,
q3_start,
q4_start,
q4_end,
fliers_xy) in enumerate(percentiles):
# Lower cap
box_plot['caps'][2*box_no].set_ydata([q1_start, q1_start])
# xdata is determined by the width of the box plot
# Lower whiskers
box_plot['whiskers'][2*box_no].set_ydata([q1_start, q2_start])
# Higher cap
box_plot['caps'][2*box_no + 1].set_ydata([q4_end, q4_end])
# Higher whiskers
box_plot['whiskers'][2*box_no + 1].set_ydata([q4_start, q4_end])
# Box
box_plot['boxes'][box_no].set_ydata([q2_start,
q2_start,
q4_start,
q4_start,
q2_start])
# Median
box_plot['medians'][box_no].set_ydata([q3_start, q3_start])
# Outliers
if fliers_xy is not None and len(fliers_xy[0]) != 0:
# If outliers exist
box_plot['fliers'][box_no].set(xdata = fliers_xy[0],
ydata = fliers_xy[1])
min_y = min(q1_start, min_y, fliers_xy[1].min())
max_y = max(q4_end, max_y, fliers_xy[1].max())
else:
min_y = min(q1_start, min_y)
max_y = max(q4_end, max_y)
# The y axis is rescaled to fit the new box plot completely with 10%
# of the maximum value at both ends
axes.set_ylim([min_y*1.1, max_y*1.1])
# If redraw is set to true, the canvas is updated.
if redraw:
ax.figure.canvas.draw()
return box_plot
USAGE
Using inverse logic ( code at the very end ) I extracted the percentile values from this example
>>> percentiles
(-1.0597368367634488, 0.3977683984966961, 1.0298955252405229, 1.6693981537742526, 3.4951447843464449)
(-0.90494930553559483, 0.36916539612108634, 1.0303658700697103, 1.6874542731392828, 3.4951447843464449)
(0.13744105279440233, 1.3300645202649739, 2.6131540656339483, 4.8763411136047647, 9.5751914834437937)
(0.22786243898199182, 1.4120860286080519, 2.637650402506837, 4.9067126578493259, 9.4660357513550899)
(0.0064696168078617741, 0.30586770128093388, 0.70774153557312702, 1.5241965711101928, 3.3092932063051976)
(0.007009744579241136, 0.28627373934008982, 0.66039691869500572, 1.4772725266672091, 3.221716765477217)
(-2.2621660374110544, 5.1901313713883352, 7.7178532139979357, 11.277744848353247, 20.155971739152388)
(-2.2621660374110544, 5.1884411864079532, 7.3357079047721054, 10.792299385806913, 18.842012119715388)
(2.5417888074435702, 5.885996170695587, 7.7271286220368598, 8.9207423361593179, 10.846938621419374)
(2.5971767318505856, 5.753551925927133, 7.6569980004033464, 8.8161056254143233, 10.846938621419374)
Note that to keep this short I haven't shown the outliers vectors which will be the 6th element of each of the percentile array.
Also note that all usual additional kwargs / args can be used since they are simply passed to the boxplot method inside it :
>>> fig, ax = plt.subplots()
>>> b = customized_box_plot(percentiles, ax, redraw=True, notch=0, sym='+', vert=1, whis=1.5)
>>> plt.show()
EXPLANATION
The boxplot method returns a dictionary mapping the components of the boxplot to the individual matplotlib.lines.Line2D instances that were created.
Quoting from the matplotlib.pyplot.boxplot documentation :
That dictionary has the following keys (assuming vertical boxplots):
boxes: the main body of the boxplot showing the quartiles and the median’s confidence intervals if enabled.
medians: horizonal lines at the median of each box.
whiskers: the vertical lines extending to the most extreme, n-outlier data points. caps: the horizontal lines at the ends of the whiskers.
fliers: points representing data that extend beyond the whiskers (outliers).
means: points or lines representing the means.
For example observe the boxplot of a tiny sample data of [-9, -4, 2, 4, 9]
>>> b = ax.boxplot([[-9, -4, 2, 4, 9],])
>>> b
{'boxes': [<matplotlib.lines.Line2D at 0x7fe1f5b21350>],
'caps': [<matplotlib.lines.Line2D at 0x7fe1f54d4e50>,
<matplotlib.lines.Line2D at 0x7fe1f54d0e50>],
'fliers': [<matplotlib.lines.Line2D at 0x7fe1f5b317d0>],
'means': [],
'medians': [<matplotlib.lines.Line2D at 0x7fe1f63549d0>],
'whiskers': [<matplotlib.lines.Line2D at 0x7fe1f5b22e10>,
<matplotlib.lines.Line2D at 0x7fe20c54a510>]}
>>> plt.show()
The matplotlib.lines.Line2D objects have two methods that I'll be using in my function extensively. set_xdata ( or set_ydata ) and get_xdata ( or get_ydata ).
Using these methods we can alter the position of the constituent lines of the base box plot to conform to your percentile values ( which is what the customized_box_plot function does ). After altering the constituent lines' position, you can redraw the canvas using figure.canvas.draw()
Summarizing the mappings from percentile to the coordinates of the various Line2D objects.
The Y Coordinates :
The max ( q4_end - end of 4th quartile ) corresponds to the top most cap Line2D object.
The min ( q1_start - start of the 1st quartile ) corresponds to the lowermost most cap Line2D object.
The median corresponds to the ( q3_start ) median Line2D object.
The 2 whiskers lie between the ends of the boxes and extreme caps ( q1_start and q2_start - lower whisker; q4_start and q4_end - upper whisker )
The box is actually an interesting n shaped line bounded by a cap at the lower portion. The extremes of the n shaped line correspond to the q2_start and the q4_start.
The X Coordinates :
The Central x coordinates ( for multiple box plots are usually 1, 2, 3... )
The library automatically calculates the bounding x coordinates based on the width specified.
INVERSE FUNCTION TO RETRIEVE THE PERCENTILES FROM THE boxplot DICT:
def get_percentiles_from_box_plots(bp):
percentiles = []
for i in range(len(bp['boxes'])):
percentiles.append((bp['caps'][2*i].get_ydata()[0],
bp['boxes'][i].get_ydata()[0],
bp['medians'][i].get_ydata()[0],
bp['boxes'][i].get_ydata()[2],
bp['caps'][2*i + 1].get_ydata()[0],
(bp['fliers'][i].get_xdata(),
bp['fliers'][i].get_ydata())))
return percentiles
NOTE:
The reason why I did not make a completely custom boxplot method is because, there are many features offered by the inbuilt box plot that cannot be fully reproduced.
Also excuse me if I may have unnecessarily explained something that may have been too obvious.
Here is an updated version of this useful routine. Setting the vertices directly appears to work for both filled boxes (patchArtist=True) and unfilled ones.
def customized_box_plot(percentiles, axes, redraw = True, *args, **kwargs):
"""
Generates a customized boxplot based on the given percentile values
"""
n_box = len(percentiles)
box_plot = axes.boxplot([[-9, -4, 2, 4, 9],]*n_box, *args, **kwargs)
# Creates len(percentiles) no of box plots
min_y, max_y = float('inf'), -float('inf')
for box_no, pdata in enumerate(percentiles):
if len(pdata) == 6:
(q1_start, q2_start, q3_start, q4_start, q4_end, fliers_xy) = pdata
elif len(pdata) == 5:
(q1_start, q2_start, q3_start, q4_start, q4_end) = pdata
fliers_xy = None
else:
raise ValueError("Percentile arrays for customized_box_plot must have either 5 or 6 values")
# Lower cap
box_plot['caps'][2*box_no].set_ydata([q1_start, q1_start])
# xdata is determined by the width of the box plot
# Lower whiskers
box_plot['whiskers'][2*box_no].set_ydata([q1_start, q2_start])
# Higher cap
box_plot['caps'][2*box_no + 1].set_ydata([q4_end, q4_end])
# Higher whiskers
box_plot['whiskers'][2*box_no + 1].set_ydata([q4_start, q4_end])
# Box
path = box_plot['boxes'][box_no].get_path()
path.vertices[0][1] = q2_start
path.vertices[1][1] = q2_start
path.vertices[2][1] = q4_start
path.vertices[3][1] = q4_start
path.vertices[4][1] = q2_start
# Median
box_plot['medians'][box_no].set_ydata([q3_start, q3_start])
# Outliers
if fliers_xy is not None and len(fliers_xy[0]) != 0:
# If outliers exist
box_plot['fliers'][box_no].set(xdata = fliers_xy[0],
ydata = fliers_xy[1])
min_y = min(q1_start, min_y, fliers_xy[1].min())
max_y = max(q4_end, max_y, fliers_xy[1].max())
else:
min_y = min(q1_start, min_y)
max_y = max(q4_end, max_y)
# The y axis is rescaled to fit the new box plot completely with 10%
# of the maximum value at both ends
axes.set_ylim([min_y*1.1, max_y*1.1])
# If redraw is set to true, the canvas is updated.
if redraw:
ax.figure.canvas.draw()
return box_plot
Here is a bottom-up approach where the box_plot is build up using matplotlib's vline, Rectangle, and normal plot functions
def boxplot(df, ax=None, box_width=0.2, whisker_size=20, mean_size=10, median_size = 10 , line_width=1.5, xoffset=0,
color=0):
"""Plots a boxplot from existing percentiles.
Parameters
----------
df: pandas DataFrame
ax: pandas AxesSubplot
if to plot on en existing axes
box_width: float
whisker_size: float
size of the bar at the end of each whisker
mean_size: float
size of the mean symbol
color: int or rgb(list)
If int particular color of property cycler is taken. Example of rgb: [1,0,0] (red)
Returns
-------
f, a, boxes, vlines, whisker_tips, mean, median
"""
if type(color) == int:
color = plt.rcParams['axes.prop_cycle'].by_key()['color'][color]
if ax:
a = ax
f = a.get_figure()
else:
f, a = plt.subplots()
boxes = []
vlines = []
xn = []
for row in df.iterrows():
x = row[0] + xoffset
xn.append(x)
# box
y = row[1][25]
height = row[1][75] - row[1][25]
box = plt.Rectangle((x - box_width / 2, y), box_width, height)
a.add_patch(box)
boxes.append(box)
# whiskers
y = (row[1][95] + row[1][5]) / 2
vl = a.vlines(x, row[1][5], row[1][95])
vlines.append(vl)
for b in boxes:
b.set_linewidth(line_width)
b.set_facecolor([1, 1, 1, 1])
b.set_edgecolor(color)
b.set_zorder(2)
for vl in vlines:
vl.set_color(color)
vl.set_linewidth(line_width)
vl.set_zorder(1)
whisker_tips = []
if whisker_size:
g, = a.plot(xn, df[5], ls='')
whisker_tips.append(g)
g, = a.plot(xn, df[95], ls='')
whisker_tips.append(g)
for wt in whisker_tips:
wt.set_markeredgewidth(line_width)
wt.set_color(color)
wt.set_markersize(whisker_size)
wt.set_marker('_')
mean = None
if mean_size:
g, = a.plot(xn, df['mean'], ls='')
g.set_marker('o')
g.set_markersize(mean_size)
g.set_zorder(20)
g.set_markerfacecolor('None')
g.set_markeredgewidth(line_width)
g.set_markeredgecolor(color)
mean = g
median = None
if median_size:
g, = a.plot(xn, df['median'], ls='')
g.set_marker('_')
g.set_markersize(median_size)
g.set_zorder(20)
g.set_markeredgewidth(line_width)
g.set_markeredgecolor(color)
median = g
a.set_ylim(np.nanmin(df), np.nanmax(df))
return f, a, boxes, vlines, whisker_tips, mean, median
This is how it looks in action:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
nopts = 12
df = pd.DataFrame()
df['mean'] = np.random.random(nopts) + 7
df['median'] = np.random.random(nopts) + 7
df[5] = np.random.random(nopts) + 4
df[25] = np.random.random(nopts) + 6
df[75] = np.random.random(nopts) + 8
df[95] = np.random.random(nopts) + 10
out = boxplot(df)

Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()

I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift.
The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure.
import numpy as np
import matplotlib.pyplot as py
import csv
z = open("Sheet4CSV.csv","rU")
data = csv.reader(z)
x = []
y = []
ylow = []
yupp = []
xlow = []
xupp = []
redshift = []
for r in data:
x.append(float(r[2]))
y.append(float(r[5]))
xlow.append(float(r[3]))
xupp.append(float(r[4]))
ylow.append(float(r[6]))
yupp.append(float(r[7]))
redshift.append(float(r[1]))
from operator import sub
xerr_l = map(sub,x,xlow)
xerr_u = map(sub,xupp,x)
yerr_l = map(sub,y,ylow)
yerr_u = map(sub,yupp,y)
py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$")
py.ylabel("$Iterative\ Tx\ pipeline\ keV$")
py.xlim(0,12)
py.ylim(0,12)
py.title("Redmapper Clusters comparison of Tx pipelines")
ax1 = py.subplot(111)
##Problem starts here after the previous line##
for p in redshift:
for i in xrange(84):
p=redshift[i]
if 0.1<=p<0.2:
ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$")
exit
if 0.2<=p<0.25:
ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$")
exit
if 0.25<=p<=0.3:
ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$")
exit
##There seems nothing wrong after this point##
py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars")
cof = np.polyfit(x,y,1)
p = np.poly1d(cof)
l = np.linspace(0,12,100)
py.plot(l,p(l),"black",label="Best fit")
py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$")
py.grid()
box = ax1.get_position()
ax1.set_position([box.x1,box.y1,box.width, box.height])
py.legend(loc='center left',bbox_to_anchor=(1,0.5))
py.show()
In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!!
I essentially need 1 label for each bin...
Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-)
Sorry I cannot post my image here due to low reputation!
The data 'appended' for x, y and redshift lists from the csv file are as follows:
x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]
y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]
redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]
Working with numerical data like this, you should really consider using a numerical library, like numpy.
The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below.
It's also not clear why you call exit after each plotting action.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547])
y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677])
redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19])
bin3 = 0.25 <= redshift
bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25)
bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2)
plt.ion()
labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$")
colors = ('r', 'g', 'b')
for bin, label, co in zip( (bin1, bin2, bin3), labels, colors):
plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label)
plt.legend()
plt.show()

Categories