Removing data from a line graph interactively in Jupiter notebook - python

I have a NumPy array which contains data from several samples. Some of the samples are outliers and need to be removed via visual inspection. Is there a way to make an interactive line plot in a jupyter notebook where a user can select a line on the plot by clicking it and have that line disappear/be highlighted and the data be marked for removal?
So far the best I have come up with is using Plotly:
import plotly.graph_objects as go
x = np.linspace(0,100)
y = np.random.randint(5, size=(5, 100))
fig = go.Figure()
for line in range(5):
fig.add_trace(go.Line(x=x, y=y[:,line],mode='lines'))
f = go.FigureWidget(fig)
f
Plotly output line graph
Using this code I can get a line graph with lines that are selectable by selecting the corresponding label in the figure legend, but this quickly becomes unfeasible with more samples. Is there a way to do this without plotting a legend and having the lines be selectable directly in the graph?
Thanks

You can use click events which allow you to define a callback that is bound to each trace. Here is an example of a callback function called update_trace that will remove a trace when it's clicked on (the #out.capture decorator isn't necessary, but can be useful for debugging using print statements):
import numpy as np
import plotly.graph_objects as go
from ipywidgets import Output, VBox
np.random.seed(42)
x = np.linspace(0,100)
y = np.random.randint(5, size=(5, 50))
fig = go.Figure()
for line in range(5):
fig.add_trace(go.Scatter(x=x, y=y[line,:],mode='lines',visible=True,name=f'trace_{line+1}'))
f = go.FigureWidget(fig)
out = Output()
#out.capture(clear_output=False)
def update_trace(trace, points, selector):
## determine whether trace was clicked on
if points.point_inds == []:
pass
else:
selected_trace_name = trace.name
for f_trace in f.data:
if (selected_trace_name == f_trace.name) & (f_trace.visible == True):
f_trace.visible = False
print(f"removing {selected_trace_name}")
traces = f.data
for trace in traces:
trace.on_click(update_trace)
VBox([f, out])

Related

Dynamically adding and removing Bokeh legends

I'm trying to develop a relatively complex plotting application, which has a huge selection of data to plot. Using dropdowns, the user can select which lines they would like to plot. I've developed a largely simplified version of the code (shown below) to illustrate what my application is like.
import bokeh.plotting.figure as bk_figure
import random
import numpy as np
from bokeh.io import show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook # enables plot interface in J notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
global x, ys
output_notebook()
plot = bk_figure(plot_width=950, plot_height=800, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines
for i in range(len(lines)):
ys.append(x*i)
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
source = ColumnDataSource(data={'x':x,'y':y})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)
I've decided to dynamically add and remove line glyphs from the plot as they are selected in the MultiSelect. This is because if I simply hide the lines, the performance of the program suffers, given that there are so many line options in the real dataset.
Problem:
I want to add a legend to the plot which only contains entries for the Line glyphs that are currently in the plot (there are far too many line options in the real dataset to have all of them visible in the legend at all times.) I've been having issues finding any resources to help with this: for most applications, something like this is sufficient, but this doesn't work with the way I've defined the lines I'm plotting.
I've been adding legends manually, for example:
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
#create first legend
legend_item = [LegendItem(label=lines[0],\
renderers=[glyph])]
legend = Legend(items=legend_item)
plot.add_layout(legend,place='right')
but I can't figure out how to effectively remove the legend layouts from the plot once I've added them. After reading the source code for add_layout, I realized that you could get a list of layouts in a given location by using something like getattr(plot,'right'). Trying to use this, I replaced the change_line function with the following:
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
#remove old legend
right_attrs_copy = list(getattr(plot,'right'))
for legend in right_attrs_copy:
getattr(plot,'right').remove(legend)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
source = ColumnDataSource(data={'x':x,'y':y})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
legend_items.append(LegendItem(label='line '+str(line),\
renderers=[glyph]))
#create legend
legend = Legend(items=legend_items)
plot.add_layout(legend,place='right')
Checking the attributes of the plot, this appears to add and remove legends and lines correctly, but it causes the plot to completely stop visually updating.
Does anyone know how to accomplish this behavior? It's possible that I'm not even adding the legend in the correct way, but I couldn't figure out how else to add them when lines are defined as Glyph objects.
Basic glyphs provide much flexibility compared to chart/model classes. A basic line (not Line) glyph can be used here.
In the code below, I am adding basic glyphs to the chart. I am saving the glyphs in a dictionary which can be actioned later (as OP said its a complex application, I am sure this will be used later). I have commented the ColumnDataSource creation, as it will accessible through data_source.data of respective glyphs (now saved in dictionary).
Also, since now we are creating lines one by one, color needs to be provided for different lines. I have used a bokeh.palette function to generate a number of colors. More on this can be read here
import bokeh.plotting.figure as bk_figure
import random
import numpy as np
from bokeh.io import show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook # enables plot interface in J notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
import bokeh.palettes
#change the number as per the max number of glyphs in system
palette = bokeh.palettes.inferno(5)
global x, ys
output_notebook()
plot = bk_figure(plot_width=950, plot_height=800, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines
for i in range(len(lines)):
ys.append(x*i)
linedict = {}
#add line 0 to plot initially
#source = ColumnDataSource(data={'x':x,'y':ys[0]})
#glyph = Line(x='x',y='y')
#glyph = plot.add_glyph(source,glyph)
l1 = plot.line(x = x, y= ys[0], legend=str(0), color = palette[0])
linedict[str(0)] = l1
def change_line(attr,old,new):
#remove old lines
render_copy = list(plot.renderers)
for line in render_copy:
plot.renderers.remove(line)
legend_items = []
#add selected lines to plot
for i,line in enumerate(line_select.value):
y = ys[int(line)]
#source = ColumnDataSource(data={'x':x,'y':y})
l1 = plot.line(x = x, y= y, legend=line, color = palette[i])
#linedict[line] = l1
glyph = Line(x='x',y='y', legend=line, color = palette[i])
glyph = plot.add_glyph(source,glyph)
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)
After much anguish, I finally figured it out (this link was helpful). #Eugene Pakhomov was correct in that the fact that I removed lines and legends in my initial code was a problem. Instead, the key was to initialize a new line only when the user requested to plot a new maximum number of lines. In all other cases, you can simply edit the data_source of existing lines. This allows the program to avoid having all the lines plotted and hidden when the user only wants to plot one or two of the total options.
Instead of deleting and remaking the legend, you can set it to be empty on every update, then add entries as needed.
The following code worked for me in a Jupyter Notebook running bokeh 1.4.0:
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Legend, LegendItem, Line
from bokeh.models.widgets import MultiSelect
from bokeh.io import output_notebook
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.palettes import Category10 as palette
output_notebook()
plot = bk_figure(plot_width=750, plot_height=600, title="Legend Test Plot"\
, x_axis_label="X Value", y_axis_label="Y Value")
lines = ['0','1','2']
line_select = MultiSelect(title='Line Select', value = [lines[0]],options=lines)
x = np.linspace(0,10,10)
ys = []
#generates three different lines with 0,1, and 2 slope
for i in range(len(lines)):
ys.append(x*i)
#add line 0 to plot initially
source = ColumnDataSource(data={'x':x,'y':ys[0]})
glyph = Line(x='x',y='y')
glyph = plot.add_glyph(source,glyph)
#intialize Legend
legend = Legend(items=[LegendItem(label=lines[0],renderers=[glyph])])
plot.add_layout(legend)
def change_line(attr,old,new):
plot.legend.items = [] #reset the legend
#add selected lines to plot
for i,line in enumerate(line_select.value):
line_num = int(line)
color = palette[10][i]
#if i lines have already been plotted in the past, just edit an existing line
if i < len(plot.renderers):
#edit the existing line's data source
plot.renderers[i]._property_values['data_source'].data = {'x':x, 'y':ys[line_num]}
#Add a new legend entry
plot.legend.items.append(LegendItem(label=line,renderers=[plot.renderers[i]]))
#otherwise, initialize an entirely new line
else:
#create a new glyph with a new data source
source = ColumnDataSource(data={'x':x,'y':ys[line_num]})
glyph = Line(x='x',y='y',line_color=color)
glyph = plot.add_glyph(source,glyph)
#Add a new legend entry
plot.legend.items.append(LegendItem(label=line,renderers=[plot.renderers[i]]))
#'Remove' all extra lines by making them contain no data
#instead of outright deleting them, which Bokeh dislikes
for extra_line_num in range(i+1,len(plot.renderers)):
plot.renderers[extra_line_num]._property_values['data_source'].data = {'x':[],'y':[]}
line_select.on_change('value',change_line)
layout = column(line_select,plot)
def modify_doc(doc):
doc.add_root(row(layout,width=800))
doc.title = "PlumeDataVis"
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

interactive scatter highlight in bokeh

I am trying to visualise sensor output in relation to its path.
I plot path as scatter in one figure and some range of signal amplitude in the second figure. I need to visualise (highlight) a path point at which the particular reading was taken.
I started using bokeh as a backend and in general, got very good results with visualisations I need. But I am stuck on this particular interaction.
I would like to have some marker like a vertical line anchored in the middle of the figure. When I move/scroll the amplitude plot (the bottom one), I would like to highlight the point on the path plot where the reading closest to the marker line was taken.
The example code:
(I would like to anchor the marker line and add interaction between the red dot and the vertical line taking an index of the signal, which is not implemented.)
import numpy as np
import pandas as pd
from bokeh.io import output_file
from bokeh.models import ColumnDataSource, HoverTool, Span
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
output_file('interactive_path_sig.html', title="interactive path")
class InteractivePath():
def __init__(self):
x = np.arange(0, 1000, 0.5)
self.df = pd.DataFrame({"x": x,
"y": np.sin(x),
"z": np.cos(x)})
self.source = ColumnDataSource(self.df)
def plot_path(self):
plt = figure(title = "Sensor Path")
plt.scatter(x="x", y="y",source=self.source,
line_color=None, size = 6)
# TODO implement interaction instead of hard coded index
index=500 # this is where I think I need to create working callback
print("x={}, y={}".format(self.df['x'][index], self.df['y'][index]))
plt.circle(x=self.df['x'][index], y=self.df['y'][index],
fill_color="red", size=15)
hover = HoverTool()
hover.tooltips=[("index", "#index"), ("senosr","#z")]
plt.add_tools(hover)
return plt
def plot_signal(self):
plt = figure(x_range=(450, 550), title="Signal Amplitude")
plt.line(x="index", y="z", source=self.source, line_color="black", line_width=2)
# TODO implement interaction instead of hard coded index
index = 500 # I think this needs emit some singal to other plot
vline = Span(location=index, dimension='height', line_color='red', line_width=3)
plt.renderers.extend([vline])
return plt
def get_grid(self):
""" place visualisation in a grid and display"""
grid = gridplot([[self.plot_path()], [self.plot_signal()]],
sizing_mode='stretch_both',)
return grid
def vis_main(self):
""" use all visualisations"""
show(self.get_grid())
if __name__=="__main__":
vis = InteractivePath()
vis.vis_main()
So a few pointers:
I think you'll want both of those plots in the same method because the columndatasource is common between them, and you can set CustomJS behaviors between them if they're in the same scope.
The index that you're using already exists within your self.df which will be easier to interact with once it's on your plot, since you can handle it with JS plot behavior instead of going back to a python variable and reloading data.
Instead of drawing a new glyph for your 'highlighted' point, consider using the 'hover' or 'selected' functionality built in. hover_color='red' for example could replace drawing and moving another class of glyph. If you want to leave statically selected so you can generate a nice report without a mouse in a screenshot, defining a callback using the built-in selected property of ColumnDataSource
I can post some actual code blocks with more specific examples, but if any of these points is a hard stop for your actual use case, it'll drive solution.
Edit:
So I got pretty close using one class method - the issue is being able to edit the second plot from the first method, not the actual change to the ColumnDataSource itself.
def plot_it(self):
self.plot_signal = figure(x_range=(450, 550), y_range=(-1, 1), title='signal')
self.plot_signal.line(x='index', y='z', source=self.source)
self.plot_signal.segment(x0=500, y0=-2, x1=500, y1=2, source=self.source)
self.plot_path = figure(title='sensor')
self.plot_path.scatter(x='x', y='y', source=self.source, hover_color='red')
jscode='''
var data = source.data;
var plot_signal = plot_signal;
var index = cb_data.index['1d'].indices;
var xmin = 0;
var xmax = 0;
if (index > 0) {
xmin = index[0] - 50;
xmax = index[0] + 50;
plot_signal.x_range.end = xmax;
plot_signal.x_range.start = xmin;
plot_signal.change.emit();
}
hover_callback = CustomJS(args=dict(source=self.source, plot_signal=self.plot_signal), code=jscode)
hover.tooltips = [('index', '#index'), ('sensor', '#z')]
self.plot_path.add_tools(hover)
def get_grid(self):
self.plot_it()
grid = gridplot([[self.plot_path], [self.plot_signal]])
return grid
That should do everything but move the line segment. I couldn't find the segment naming convention to add plot_signal.SOMEOBJECT.x0 and .x1 but it would just get added to the if (index > 0) block just like using index[0]. I took some of the style options out because I'm transcribing from another computer.
This question on moving a line segment might give you the syntax on the segment JSON object.

Bokeh: linking a line plot and a scatter plot

I have a line plot and a scatter plot that are conceptually linked by sample IDs, i.e. each dot on the 2D scatter plot corresponds to a line on the line plot.
While I have done linked plotting before using scatter plots, I have not seen examples of this for the situation above - where I select dots and thus selectively view lines.
Is it possible to link dots on a scatter plot to a line on a line plot? If so, is there an example implementation available online?
Searching the web for bokeh link line and scatter plot yields no examples online, as of 14 August 2018.
I know this is a little late - but maybe this snippet of code will help?
import numpy as np
from bokeh.io import output_file, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.models import Circle,MultiLine
def play():
x = np.linspace(0,10,100)
y = np.random.rand(100)
xs = np.random.rand(100,3)
ys = np.random.normal(size=(100,3))
xp = [list(xi) for xi in xs] # Because Multi-List does not like numpy arrays
yp = [list(yi) for yi in ys]
output_file('play.html')
source = ColumnDataSource(data=dict(x=x,y=y,xp=xp,yp=yp))
TOOLS = 'box_select'
left = figure(tools=TOOLS,plot_width=700,plot_height=700)
c1 = left.circle('x','y',source=source)
c1.nonselection_glyph = Circle(fill_color='gray',fill_alpha=0.4,
line_color=None)
c1.selection_glyph = Circle(fill_color='orange',line_color=None)
right = figure(tools=TOOLS,plot_width=700,plot_height=700)
c2 = right.multi_line(xs='xp',ys='yp',source=source)
c2.nonselection_glyph = MultiLine(line_color='gray',line_alpha=0.2)
c2.selection_glyph = MultiLine(line_color='orange')
p = gridplot([[left, right]])
show(p)
As things turn out, I was able to make this happen by using HoloViews rather than Bokeh. The relevant example for making this work comes from the Selection1d tap stream.
http://holoviews.org/reference/streams/bokeh/Selection1D_tap.html#selection1d-tap
I will do an annotated version of the example below.
First, we begin with imports. (Note: all of this assumes work is being done in the Jupyter notebook.)
import numpy as np
import holoviews as hv
from holoviews.streams import Selection1D
from scipy import stats
hv.extension('bokeh')
First off, we set some styling options for the charts. In my experience, I usually build the chart before styling it, though.
%%opts Scatter [color_index=2 tools=['tap', 'hover'] width=600] {+framewise} (marker='triangle' cmap='Set1' size=10)
%%opts Overlay [toolbar='above' legend_position='right'] Curve (line_color='black') {+framewise}
This function below generates data.
def gen_samples(N, corr=0.8):
xx = np.array([-0.51, 51.2])
yy = np.array([0.33, 51.6])
means = [xx.mean(), yy.mean()]
stds = [xx.std() / 3, yy.std() / 3]
covs = [[stds[0]**2 , stds[0]*stds[1]*corr],
[stds[0]*stds[1]*corr, stds[1]**2]]
return np.random.multivariate_normal(means, covs, N)
data = [('Week %d' % (i%10), np.random.rand(), chr(65+np.random.randint(5)), i) for i in range(100)]
sample_data = hv.NdOverlay({i: hv.Points(gen_samples(np.random.randint(1000, 5000), r2))
for _, r2, _, i in data})
The real magic begins here. First off, we set up a scatterplot using the hv.Scatter object.
points = hv.Scatter(data, ['Date', 'r2'], ['block', 'id']).redim.range(r2=(0., 1))
Then, we create a Selection1D stream. It pulls in points from the points object.
stream = Selection1D(source=points)
We then create a function to display the regression plot on the right. There's an empty plot that is the "default", and then there's a callback that hv.DynamicMap calls on.
empty = (hv.Points(np.random.rand(0, 2)) * hv.Curve(np.random.rand(0, 2))).relabel('No selection')
def regression(index):
if not index:
return empty
scatter = sample_data[index[0]]
xs, ys = scatter['x'], scatter['y']
slope, intercep, rval, pval, std = stats.linregress(xs, ys)
xs = np.linspace(*scatter.range(0)+(2,))
reg = slope*xs+intercep
return (scatter * hv.Curve((xs, reg))).relabel('r2: %.3f' % slope)
Now, we create the DynamicMap which dynamically loads the regression curve data.
reg = hv.DynamicMap(regression, kdims=[], streams=[stream])
# Ignoring annotation for average - it is not relevant here.
average = hv.Curve(points, 'Date', 'r2').aggregate(function=np.mean)
Finally, we display the plots.
points * average + reg
The most important thing I learned from building this is that the indices for the points have to be lined up with the indices for the regression curves.
I hope this helps others building awesome viz using HoloViews!

Plot doesn't refresh to plot new points when using matplotlib

I'm trying to create a plot that updates when given a set of points ([x,y]) but the figure gets stuck on the first plot points and won't plot the rest of the data. I looped a function call but it gets stuck on the first call. I need to be able to give the function multiple sets of single x and y values, and have them plot in a graph.
This is the code I have so far.
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
from numpy import *
from time import sleep
import random as rd
class graphUpdater():
def __init__(self):
# Initialize arrays to be plotted
self.xs = []
self.ys = []
style.use('fivethirtyeight') # Figure Style
self.fig = plt.figure() # Initialize figure
self.ax1 = self.fig.add_subplot(111) # Create a subplot
# Ensure the figure auto-scales to fit all points. Might be overkill
self.ax1.set_autoscalex_on(True)
self.ax1.set_autoscaley_on(True)
self.ax1.set_autoscale_on(True)
self.ax1.autoscale(enable = True, axis = 'both', tight = False)
self.ax1.autoscale_view(False, True, True)
# Function that plots the arrays xs and ys. Also plots a linear regression of the data
def plotPoint(self):
self.ax1.clear() # Clears previous values to save memory
xp = linspace(min(self.xs), max(self.xs)) # x-range for regression
if(len(self.xs) > 1): # Conditional for regression, can't linearise 1 point
p1 = polyfit(self.xs, self.ys, 1) # Get the coefficients of the polynomial (slope of line)
self.ax1.plot(xp, polyval(p1, xp)) # Plot the line
self.ax1.plot(self.xs, self.ys, "+") # Plot the raw data points
self.ax1.set_xlabel('(L/A)*I') # Axis and title labels
self.ax1.set_ylabel('V')
self.ax1.set_title('DC Potential Drop')
def appendPlot(self, x, y):
self.xs.append(float(x)) # Append xs with x value
self.ys.append(float(y)) # Append ys with y value
self.plotPoint() # Call the plotPoint function to plot new array values
plt.show(block=False) # Plot and release so graphs can be over written
# Call the function
plsWork = graphUpdater() # I'm very hopeful
i = 0
while(i < 50):
plsWork.appendPlot(i, rd.randint(0, 20))
i += 1
sleep(0.1)
quit_case = input("Hit 'Enter' to Quit") # Conditional so the plot won't disappear
It doesn't work fully. If you put a breakpoint on the quit_case line and run it on debugger on pycharm it plots the graph "properly".
Don't use plt.show(block=False) and don't use time.sleep. Instead, matplotlib provides an animation module, which can be used to avoid such problems as here.
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
from numpy import *
from time import sleep
import random as rd
#%matplotlib notebook use in case of running this in a Jupyter notebook
class graphUpdater():
def __init__(self):
# Initialize arrays to be plotted
self.xs = []
self.ys = []
style.use('fivethirtyeight') # Figure Style
self.fig = plt.figure() # Initialize figure
self.ax1 = self.fig.add_subplot(111) # Create a subplot
# Ensure the figure auto-scales to fit all points. Might be overkill
self.ax1.set_autoscalex_on(True)
self.ax1.set_autoscaley_on(True)
self.ax1.set_autoscale_on(True)
self.ax1.autoscale(enable = True, axis = 'both', tight = False)
self.ax1.autoscale_view(False, True, True)
# Function that plots the arrays xs and ys. Also plots a linear regression of the data
def plotPoint(self):
self.ax1.clear() # Clears previous values to save memory
xp = linspace(min(self.xs), max(self.xs)) # x-range for regression
if(len(self.xs) > 1): # Conditional for regression, can't linearise 1 point
p1 = polyfit(self.xs, self.ys, 1) # Get the coefficients of the polynomial (slope of line)
self.ax1.plot(xp, polyval(p1, xp)) # Plot the line
self.ax1.plot(self.xs, self.ys, "+") # Plot the raw data points
self.ax1.set_xlabel('(L/A)*I') # Axis and title labels
self.ax1.set_ylabel('V')
self.ax1.set_title('DC Potential Drop')
def appendPlot(self, x, y):
self.xs.append(float(x)) # Append xs with x value
self.ys.append(float(y)) # Append ys with y value
self.plotPoint() # Call the plotPoint function to plot new array values
# Call the function
plsWork = graphUpdater() # I'm very hopeful
f = lambda i: plsWork.appendPlot(i, rd.randint(0, 20))
ani = animation.FuncAnimation(plsWork.fig, f, frames=50, interval=100, repeat=False)
plt.show()

Plot dynamically changing graph using matplotlib in Jupyter Notebook

I have a M x N 2D array: ith row represents that value of N points at time i.
I want to visualize the points [1 row of the array] in the form of a graph where the values get updated after a small interval. Thus the graph shows 1 row at a time, then update the values to next row, so on and so forth.
I want to do this in a jupyter notebook. Looking for reference codes.
I tried following things but no success:
http://community.plot.ly/t/updating-graph-with-new-data-every-100-ms-or-so/812
https://pythonprogramming.net/live-graphs-matplotlib-tutorial/
Create dynamic updated graph with Python
Update Lines in matplotlib
Here's an alternative, possibly simpler solution:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
m = 100
n = 100
matrix = np.random.normal(0,1,m*n).reshape(m,n)
fig = plt.figure()
ax = fig.add_subplot(111)
plt.ion()
fig.show()
fig.canvas.draw()
for i in range(0,100):
ax.clear()
ax.plot(matrix[i,:])
fig.canvas.draw()
I had been particularly looking for a good answer for the scenario where one thread is pumping data and we want Jupyter notebook to keep updating graph without blocking anything. After looking through about dozen or so related answers, here are some of the findings:
Caution
Do not use below magic if you want a live graph. The graph update does not work if the notebook uses below:
%load_ext autoreload
%autoreload 2
You need below magic in your notebook before you import matplotlib:
%matplotlib notebook
Method 1: Using FuncAnimation
This has a disadvantage that graph update occurs even if your data hasn't been updated yet. Below example shows another thread updating data while Jupyter notebook updating graph through FuncAnimation.
%matplotlib notebook
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from random import randrange
from threading import Thread
import time
class LiveGraph:
def __init__(self):
self.x_data, self.y_data = [], []
self.figure = plt.figure()
self.line, = plt.plot(self.x_data, self.y_data)
self.animation = FuncAnimation(self.figure, self.update, interval=1000)
self.th = Thread(target=self.thread_f, daemon=True)
self.th.start()
def update(self, frame):
self.line.set_data(self.x_data, self.y_data)
self.figure.gca().relim()
self.figure.gca().autoscale_view()
return self.line,
def show(self):
plt.show()
def thread_f(self):
x = 0
while True:
self.x_data.append(x)
x += 1
self.y_data.append(randrange(0, 100))
time.sleep(1)
g = LiveGraph()
g.show()
Method 2: Direct Update
The second method is to update the graph as data arrives from another thread. This is risky because matplotlib is not thread safe but it does seem to work as long as there is only one thread doing updates.
%matplotlib notebook
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from random import randrange
from threading import Thread
import time
class LiveGraph:
def __init__(self):
self.x_data, self.y_data = [], []
self.figure = plt.figure()
self.line, = plt.plot(self.x_data, self.y_data)
self.th = Thread(target=self.thread_f, daemon=True)
self.th.start()
def update_graph(self):
self.line.set_data(self.x_data, self.y_data)
self.figure.gca().relim()
self.figure.gca().autoscale_view()
def show(self):
plt.show()
def thread_f(self):
x = 0
while True:
self.x_data.append(x)
x += 1
self.y_data.append(randrange(0, 100))
self.update_graph()
time.sleep(1)
from live_graph import LiveGraph
g = LiveGraph()
g.show()
I explored this and produced the following which is largely self-documenting:
import matplotlib.pyplot as plt
%matplotlib notebook
print('This text appears above the figures')
fig1 = plt.figure(num='DORMANT')
print('This text appears betweeen the figures')
fig2 = plt.figure()
print('This text appears below the figures')
fig1.canvas.set_window_title('Canvas active title')
fig1.suptitle('Figure title', fontsize=20)
# Create plots inside the figures
ax1 = fig1.add_subplot(111)
ax1.set_xlabel('x label')
ax2 = fig2.add_subplot(111)
# Loop to update figures
end = 40
for i in range(end):
ax2.cla() # Clear only 2nd figure's axes, figure 1 is ADDITIVE
ax1.set_title('Axes title') # Reset as removed by cla()
ax1.plot(range(i,end), (i,)*(end-i))
ax2.plot(range(i,end), range(i,end), 'rx')
fig1.canvas.draw()
fig2.canvas.draw()
Another simple solution, based on IPython.display functions display and clear_output. I found it here. Here is the code (based on #graham-s's answer):
from IPython.display import display, clear_output
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
m = 100
n = 100
matrix = np.random.normal(0, 1, size=(m, n))
fig = plt.figure()
ax = fig.add_subplot(111)
for i in range(m):
ax.clear()
ax.plot(matrix[i, :])
display(fig)
clear_output(wait=True)
plt.pause(0.2)
It uses %matplotlib inline instead of notebook, and does not produce small image as mentioned by #MasterScrat. Works both in Jupyter Notebook and in Jupyter Lab. Sometimes image blinks that's not very nice, but usable for quick investigations.
If you need to keep axes ranges between different frames, add ax.set_xlim/ax.set_ylim after ax.clear().
With a moderate modification of #Shital Shah's solution, I've created a more general framework which can simply apply to various scenario:
import matplotlib
from matplotlib import pyplot as plt
class LiveLine:
def __init__(self, graph, fmt=''):
# LiveGraph object
self.graph = graph
# instant line
self.line, = self.graph.ax.plot([], [], fmt)
# holder of new lines
self.lines = []
def update(self, x_data, y_data):
# update the instant line
self.line.set_data(x_data, y_data)
self.graph.update_graph()
def addtive_plot(self, x_data, y_data, fmt=''):
# add new line in the same figure
line, = self.graph.ax.plot(x_data, y_data, fmt)
# store line in lines holder
self.lines.append(line)
# update figure
self.graph.update_graph()
# return line index
return self.lines.index(line)
def update_indexed_line(self, index, x_data, y_data):
# use index to update that line
self.lines[index].set_data(x_data, y_data)
self.graph.update_graph()
class LiveGraph:
def __init__(self, backend='nbAgg', figure_arg={}, window_title=None,
suptitle_arg={'t':None}, ax_label={'x':'', 'y':''}, ax_title=None):
# save current backend for later restore
self.origin_backend = matplotlib.get_backend()
# check if current backend meets target backend
if self.origin_backend != backend:
print("original backend:", self.origin_backend)
# matplotlib.use('nbAgg',warn=False, force=True)
plt.switch_backend(backend)
print("switch to backend:", matplotlib.get_backend())
# set figure
self.figure = plt.figure(**figure_arg)
self.figure.canvas.set_window_title(window_title)
self.figure.suptitle(**suptitle_arg)
# set axis
self.ax = self.figure.add_subplot(111)
self.ax.set_xlabel(ax_label['x'])
self.ax.set_ylabel(ax_label['y'])
self.ax.set_title(ax_title)
# holder of lines
self.lines = []
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def close(self):
# check if current beckend meets original backend, if not, restore it
if matplotlib.get_backend() != self.origin_backend:
# matplotlib.use(self.origin_backend,warn=False, force=True)
plt.switch_backend(self.origin_backend)
print("restore to backend:", matplotlib.get_backend())
def add_line(self, fmt=''):
line = LiveLine(graph=self, fmt=fmt)
self.lines.append(line)
return line
def update_graph(self):
self.figure.gca().relim()
self.figure.gca().autoscale_view()
self.figure.canvas.draw()
With above 2 class, you can simply reproduce #Graham S's example:
import numpy as np
m = 100
n = 100
matrix = np.random.normal(0,1,m*n).reshape(m,n)
with LiveGraph(backend='nbAgg') as h:
line1 = h.add_line()
for i in range(0,100):
line1.update(range(len(matrix[i,:])), matrix[i,:])
Note that, the default backend is nbAgg, you can pass other backend like qt5Agg. When it is finished, it'll restore to your original backend.
and #Tom Hale's example:
with LiveGraph(figure_arg={'num':'DORMANT2'}, window_title='Canvas active title',
suptitle_arg={'t':'Figure title','fontsize':20},
ax_label={'x':'x label', 'y':''}, ax_title='Axes title') as g:
with LiveGraph() as h:
line1 = g.add_line()
line2 = h.add_line('rx')
end = 40
for i in range(end):
line1.addtive_plot(range(i,end), (i,)*(end-i))
line2.update(range(i,end), range(i,end))
Also, you can update particular line in the additive plot of #Tom Hale's example:
import numpy as np
with LiveGraph(figure_arg={'num':'DORMANT3'}, window_title='Canvas active title',
suptitle_arg={'t':'Figure title','fontsize':20},
ax_label={'x':'x label', 'y':''}, ax_title='Axes title') as g:
line1 = g.add_line()
end = 40
for i in range(end):
line_index = line1.addtive_plot(range(i,end), (i,)*(end-i))
for i in range(100):
j = int(20*(1+np.cos(i)))
# update line of index line_index
line1.update_indexed_line(line_index, range(j,end), (line_index,)*(end-j))
Note that, the second for loop is just for updating a particular line with index line_index. you can change that index to other line's index.
In my case, I use it in machine learning training loop to progressively update learning curve.
import numpy as np
import time
# create a LiveGraph object
g = LiveGraph()
# add 2 lines
line1 = g.add_line()
line2 = g.add_line()
# create 2 list to receive training result
list1 = []
list2 = []
# training loop
for i in range(100):
# just training
time.sleep(0.1)
# get training result
list1.append(np.random.normal())
list2.append(np.random.normal())
# update learning curve
line1.update(np.arange(len(list1)), list1)
line2.update(np.arange(len(list2)), list2)
# don't forget to close
g.close()
In addition to #0aslam0 I used code from here. I've just changed animate function to get next row every next time. It draws animated evolution (M steps) of all N points.
from IPython.display import HTML
import numpy as np
from matplotlib import animation
N = 5
M = 100
points_evo_array = np.random.rand(M,N)
# First set up the figure, the axis, and the plot element we want to animate
fig = plt.figure()
ax = plt.axes(xlim=(0, M), ylim=(0, np.max(points_evo_array)))
lines = []
lines = [ax.plot([], [])[0] for _ in range(N)]
def init():
for line in lines:
line.set_data([], [])
return lines
def animate(i):
for j,line in enumerate(lines):
line.set_data(range(i), [points_evo_array[:i,j]])
return lines
# call the animator. blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate,np.arange(1, M), init_func=init, interval=10, blit=True)
HTML(anim.to_html5_video())
Hope it will be useful
Here is a library that deals with real-time plotting/logging data (joystick), although I am not sure it is working with jupyter. You can install it using the usual pip install joystick.
Hard to make a working solution without more details on your data. Here is an option:
import joystick as jk
import numpy as np
class test(jk.Joystick):
# initialize the infinite loop decorator
_infinite_loop = jk.deco_infinite_loop()
def _init(self, *args, **kwargs):
"""
Function called at initialization, see the docs
"""
# INIT DATA HERE
self.shape = (10, 4) # M, N
self.data = np.random.random(self.shape)
self.xaxis = range(self.shape[1])
############
# create a graph frame
self.mygraph = self.add_frame(
jk.Graph(name="TheName", size=(500, 500), pos=(50, 50),
fmt="go-", xnpts=self.shape[1], freq_up=5, bgcol="w",
xylim=(0, self.shape[1]-1, None, None)))
#_infinite_loop(wait_time=0.5)
def _generate_fake_data(self): # function looped every 0.5 second
"""
Loop starting with the simulation start, getting data and
pushing it to the graph every 0.5 seconds
"""
# NEW (RANDOM) DATA
new_data = np.random.random(self.shape[1])
# concatenate data
self.data = np.vstack((self.data, new_data))
# push new data to the graph
self.mygraph.set_xydata(self.xaxis, self.data[-1])
t = test()
t.start()
t.stop()
t.exit()
This code will create a graph that is auto-updating 5 times a second (freq_up=5), while new data is (randomly) generated every 0.5 seconds (wait_time=0.5) and pushed to the graph for display.
If you don't want the Y-axis to wiggle around, type t.mygraph.xylim = (0, t.shape[1]-1, 0, 1).
I don't know much about matplotlib or jupyter. However, Graphs interest me. I just did some googling and came across this post. Seems like you have to render the graph as an HTML video to see a dynamic graph.
I tried that post. This is the notebook, if you wish to try. Note that the kernel (python 2) takes sometime to build the video. You can read more about it here.
Now you want to display a graph row to row. I tried this. In that notebook, I have a dump_data with 10 rows. I randomly take one and plot them and display as video.
It was interesting to learn about jupyter. Hope this helps.

Categories