My current goal is to animate two sets of data simultaneous on a single plot with 2 subplots. I previously asked a question here about clearing the axes data with animated seaborn graphs, but rather than clearing the data on the axes, I need to append/update them (i.e.: something like the animated plots here). To make matters more complicated, one of my sets of data are stored in numpy arrays whereas the other set is stored in a pandas dataframe.
I tried using some test data before using my actual data, but that doesn't work, especially when I try to animate the data stored in the dataframe:
import math, os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import animation
def update(i,pass_data,fig,axes,sort_yrs,ani,x_val,y_val):
fil_pass_data = pass_data[pass_data['year'] == sort_yrs[i]]
fig.suptitle(r"Year = {}".format(sorted_yrs[i]),fontsize = 35,y = 1.000)
axes[1] = sns.lineplot(x = 'year',y = 'passengers',data = fil_pass_data,hue = 'month',
hue_order = sort_yrs,palette = 'Set1',legend = False)
ani1.set_data(x_val[:i],y_val[:i])
#Read/Generate Data
passenger_df = sns.load_dataset('flights')
yrs_of_int = ['Jan','Mar','Jun','Jul','Dec']
fil_pass_df = passenger_df[passenger_df['month'].isin(yrs_of_int)]
sorted_yrs = sorted(fil_pass_df['year'].unique())
x = np.linspace(0,2*np.pi,fil_pass_df['year'].unique().shape[0])
y = np.sin(x)
#Initialize figure and plot initial points
fig,axes = plt.subplots(ncols = 2,figsize = (15,7))
axes[0].plot(x[0],y[0],color = 'tab:red',label = 'Initial Point',marker = 'o',markersize = 5)
sns.lineplot(x = 'year',y = 'passengers',
data = fil_pass_df[fil_pass_df['year'] == sorted_yrs[0]],
hue = 'month',hue_order = yrs_of_int,palette = 'Set1')
ani1, = axes[0].plot(x[0],y[0],color = 'tab:blue',label = 'Dynamic Response')
#Formatting
axes[0].legend(loc='lower right')
axes[0].set_ylim(-1.1*np.pi,1.1*np.pi)
axes[0].set_xlim(-0.1,1.1*2*np.pi)
axes[1].set_xlim(fil_pass_df['year'].min()-1,fil_pass_df['year'].min()+1)
axes[1].set_ylim(100,700)
axes[0].set_title('Test Plot',fontsize = 15,pad = 5)
axes[1].set_title('Passengers by Month',fontsize = 15,pad = 5)
axes[0].set_ylabel(r"$\sin(x)$",fontsize = 20,labelpad = 10)
axes[0].set_xlabel(r"$x$",fontsize = 20,labelpad = 10)
axes[1].set_ylabel("Passenger Count",fontsize = 20,labelpad = 10)
axes[1].set_xlabel("Year",fontsize = 20,labelpad = 10)
#Create animation and save it
animation = animation.FuncAnimation(fig, update, fargs =
(fil_pass_df,fig,axes,sorted_yrs,ani1,x,y),
frames=range(0,len(sorted_yrs)),interval=0.5,
blit=False,repeat=True)
animation.save('test.mp4',
fps = 1, extra_args=['-vcodec', 'libx264'],dpi = 200)
The test mp4 file that is generated from here animates the left most plot (numpy array data), but fails to plot the right most plot (seaborn data). I have four theories as to why it doesn't work:
I'm not supposed to initialize the right most figure parameters before the update call. Instead, the figure parameters need to be set in update.
The fact that I'm specifying hue in the update function is somehow messing up with matplotlib's animation.
The fact that my data is either in numpy arrays or a pandas dataframe.
I'm overthinking this and forgot to one command in my code that makes this work.
I tried moving the right figure formatting into update but that didn't seem to work, so it might be bullet points #2-4.
Does anyone know why this is happening/how to solve it? For subplot animations, is there a rule as to whether everything should be stored in the same data type?
Related
Last week I asked a question about finding a way to interpolate a surface from multiple curves (data from multiple Excel files) and someone referred me to a question which explains how to use scipy.interpolate.RBFInterpolator (How can I perform two-dimensional interpolation using scipy?).
I tried this method but I am getting a bad surface fitting (see the pictures below). Does anyone understand what is wrong with my code? I tried to change the kernel parameter but "linear" seems to be the best. Am I doing an error when I am using np.meshgrid? Thanks for the help.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from scipy.interpolate import RBFInterpolator
fig = plt.figure(figsize=(15,10),dpi=400)
ax = fig.gca(projection='3d')
# List all the results files in the folder (here 'Sress_Strain') to plot them.
results_list = os.listdir(r"C:/Users/bdhugu/Desktop/Strain_Stress")
for i in range(len(results_list)):
if i == 0:
results = pd.read_excel(r"C:/Users/bdhugu/Desktop/Strain_Stress/"+results_list[i])
strain = results["Strain (mm/mm)"]
stress = results["Stress (MPa)"]
strain_rate = results["Strain rate (s^-1)"]
if i>0:
new_results = pd.read_excel(r"C:/Users/bdhugu/Desktop/Strain_Stress/"+results_list[i])
new_strain = new_results["Strain (mm/mm)"]
new_stress = new_results["Stress (MPa)"]
new_strain_rate = new_results["Strain rate (s^-1)"]
strain = strain.append(new_strain, ignore_index=False)
stress = stress.append(new_stress, ignore_index=False)
strain_rate = strain_rate.append(new_strain_rate,ignore_index=False)
# RBFINTERPOLATOR METHOD
# ----------------------------------------------------------------------------
x_scattered = strain
y_scattered = strain_rate
z_scattered = stress
scattered_points = np.stack([x_scattered.ravel(), y_scattered.ravel()],-1)
x_dense, y_dense = np.meshgrid(np.linspace(min(strain), max(strain), 20),np.linspace(min(strain_rate), max(strain_rate), 21))
dense_points = np.stack([x_dense.ravel(), y_dense.ravel()], -1)
interpolation = RBFInterpolator(scattered_points, z_scattered.ravel(), smoothing = 0, kernel='linear',epsilon=1, degree=0)
z_dense = interpolation(dense_points).reshape(x_dense.shape)
fig = plt.figure(figsize=(15,10),dpi=400)
ax = plt.axes(projection='3d')
ax.plot_surface(x_dense, y_dense, z_dense ,cmap='viridis', edgecolor='none')
ax.invert_xaxis()
ax.set_title('Surface plot')
plt.show()
Data to interpolate
Surface fitting with RBFInterpolator
I'm trying to create a plot that updates when given a set of points ([x,y]) but the figure gets stuck on the first plot points and won't plot the rest of the data. I looped a function call but it gets stuck on the first call. I need to be able to give the function multiple sets of single x and y values, and have them plot in a graph.
This is the code I have so far.
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
from numpy import *
from time import sleep
import random as rd
class graphUpdater():
def __init__(self):
# Initialize arrays to be plotted
self.xs = []
self.ys = []
style.use('fivethirtyeight') # Figure Style
self.fig = plt.figure() # Initialize figure
self.ax1 = self.fig.add_subplot(111) # Create a subplot
# Ensure the figure auto-scales to fit all points. Might be overkill
self.ax1.set_autoscalex_on(True)
self.ax1.set_autoscaley_on(True)
self.ax1.set_autoscale_on(True)
self.ax1.autoscale(enable = True, axis = 'both', tight = False)
self.ax1.autoscale_view(False, True, True)
# Function that plots the arrays xs and ys. Also plots a linear regression of the data
def plotPoint(self):
self.ax1.clear() # Clears previous values to save memory
xp = linspace(min(self.xs), max(self.xs)) # x-range for regression
if(len(self.xs) > 1): # Conditional for regression, can't linearise 1 point
p1 = polyfit(self.xs, self.ys, 1) # Get the coefficients of the polynomial (slope of line)
self.ax1.plot(xp, polyval(p1, xp)) # Plot the line
self.ax1.plot(self.xs, self.ys, "+") # Plot the raw data points
self.ax1.set_xlabel('(L/A)*I') # Axis and title labels
self.ax1.set_ylabel('V')
self.ax1.set_title('DC Potential Drop')
def appendPlot(self, x, y):
self.xs.append(float(x)) # Append xs with x value
self.ys.append(float(y)) # Append ys with y value
self.plotPoint() # Call the plotPoint function to plot new array values
plt.show(block=False) # Plot and release so graphs can be over written
# Call the function
plsWork = graphUpdater() # I'm very hopeful
i = 0
while(i < 50):
plsWork.appendPlot(i, rd.randint(0, 20))
i += 1
sleep(0.1)
quit_case = input("Hit 'Enter' to Quit") # Conditional so the plot won't disappear
It doesn't work fully. If you put a breakpoint on the quit_case line and run it on debugger on pycharm it plots the graph "properly".
Don't use plt.show(block=False) and don't use time.sleep. Instead, matplotlib provides an animation module, which can be used to avoid such problems as here.
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
from numpy import *
from time import sleep
import random as rd
#%matplotlib notebook use in case of running this in a Jupyter notebook
class graphUpdater():
def __init__(self):
# Initialize arrays to be plotted
self.xs = []
self.ys = []
style.use('fivethirtyeight') # Figure Style
self.fig = plt.figure() # Initialize figure
self.ax1 = self.fig.add_subplot(111) # Create a subplot
# Ensure the figure auto-scales to fit all points. Might be overkill
self.ax1.set_autoscalex_on(True)
self.ax1.set_autoscaley_on(True)
self.ax1.set_autoscale_on(True)
self.ax1.autoscale(enable = True, axis = 'both', tight = False)
self.ax1.autoscale_view(False, True, True)
# Function that plots the arrays xs and ys. Also plots a linear regression of the data
def plotPoint(self):
self.ax1.clear() # Clears previous values to save memory
xp = linspace(min(self.xs), max(self.xs)) # x-range for regression
if(len(self.xs) > 1): # Conditional for regression, can't linearise 1 point
p1 = polyfit(self.xs, self.ys, 1) # Get the coefficients of the polynomial (slope of line)
self.ax1.plot(xp, polyval(p1, xp)) # Plot the line
self.ax1.plot(self.xs, self.ys, "+") # Plot the raw data points
self.ax1.set_xlabel('(L/A)*I') # Axis and title labels
self.ax1.set_ylabel('V')
self.ax1.set_title('DC Potential Drop')
def appendPlot(self, x, y):
self.xs.append(float(x)) # Append xs with x value
self.ys.append(float(y)) # Append ys with y value
self.plotPoint() # Call the plotPoint function to plot new array values
# Call the function
plsWork = graphUpdater() # I'm very hopeful
f = lambda i: plsWork.appendPlot(i, rd.randint(0, 20))
ani = animation.FuncAnimation(plsWork.fig, f, frames=50, interval=100, repeat=False)
plt.show()
I am translating a set of R visualizations to Python. I have the following target R multiple plot histograms:
Using Matplotlib and Seaborn combination and with the help of a kind StackOverflow member (see the link: Python Seaborn Distplot Y value corresponding to a given X value), I was able to create the following Python plot:
I am satisfied with its appearance, except, I don't know how to put the Header information in the plots. Here is my Python code that creates the Python Charts
""" Program to draw the sampling histogram distributions """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
def main():
""" Main routine for the sampling histogram program """
sns.set_style('whitegrid')
markers_list = ["s", "o", "*", "^", "+"]
# create the data dataframe as df_orig
df_orig = pd.read_csv('lab_samples.csv')
df_orig = df_orig.loc[df_orig.hra != -9999]
hra_list_unique = df_orig.hra.unique().tolist()
# create and subset df_hra_colors to match the actual hra colors in df_orig
df_hra_colors = pd.read_csv('hra_lookup.csv')
df_hra_colors['hex'] = np.vectorize(rgb_to_hex)(df_hra_colors['red'], df_hra_colors['green'], df_hra_colors['blue'])
df_hra_colors.drop(labels=['red', 'green', 'blue'], axis=1, inplace=True)
df_hra_colors = df_hra_colors.loc[df_hra_colors['hra'].isin(hra_list_unique)]
# hard coding the current_component to pc1 here, we will extend it by looping
# through the list of components
current_component = 'pc1'
num_tests = 5
df_columns = df_orig.columns.tolist()
start_index = 5
for test in range(num_tests):
current_tests_list = df_columns[start_index:(start_index + num_tests)]
# now create the sns distplots for each HRA color and overlay the tests
i = 1
for _, row in df_hra_colors.iterrows():
plt.subplot(3, 3, i)
select_columns = ['hra', current_component] + current_tests_list
df_current_color = df_orig.loc[df_orig['hra'] == row['hra'], select_columns]
y_data = df_current_color.loc[df_current_color[current_component] != -9999, current_component]
axs = sns.distplot(y_data, color=row['hex'],
hist_kws={"ec":"k"},
kde_kws={"color": "k", "lw": 0.5})
data_x, data_y = axs.lines[0].get_data()
axs.text(0.0, 1.0, row['hra'], horizontalalignment="left", fontsize='x-small',
verticalalignment="top", transform=axs.transAxes)
for current_test_index, current_test in enumerate(current_tests_list):
# this_x defines the series of current_component(pc1,pc2,rhob) for this test
# indicated by 1, corresponding R program calls this test_vector
x_series = df_current_color.loc[df_current_color[current_test] == 1, current_component].tolist()
for this_x in x_series:
this_y = np.interp(this_x, data_x, data_y)
axs.plot([this_x], [this_y - current_test_index * 0.05],
markers_list[current_test_index], markersize = 3, color='black')
axs.xaxis.label.set_visible(False)
axs.xaxis.set_tick_params(labelsize=4)
axs.yaxis.set_tick_params(labelsize=4)
i = i + 1
start_index = start_index + num_tests
# plt.show()
pp = PdfPages('plots.pdf')
pp.savefig()
pp.close()
def rgb_to_hex(red, green, blue):
"""Return color as #rrggbb for the given color values."""
return '#%02x%02x%02x' % (red, green, blue)
if __name__ == "__main__":
main()
The Pandas code works fine and it is doing what it is supposed to. It is my lack of knowledge and experience of using 'PdfPages' in Matplotlib that is the bottleneck. How can I show the header information in Python/Matplotlib/Seaborn that I can show in the corresponding R visalization. By the Header information, I mean What The R visualization has at the top before the histograms, i.e., 'pc1', MRP, XRD,....
I can get their values easily from my program, e.g., current_component is 'pc1', etc. But I don't know how to format the plots with the Header. Can someone provide some guidance?
You may be looking for a figure title or super title, fig.suptitle:
fig.suptitle('this is the figure title', fontsize=12)
In your case you can easily get the figure with plt.gcf(), so try
plt.gcf().suptitle("pc1")
The rest of the information in the header would be called a legend.
For the following let's suppose all subplots have the same markers. It would then suffice to create a legend for one of the subplots.
To create legend labels, you can put the labelargument to the plot, i.e.
axs.plot( ... , label="MRP")
When later calling axs.legend() a legend will automatically be generated with the respective labels. Ways to position the legend are detailed e.g. in this answer.
Here, you may want to place the legend in terms of figure coordinates, i.e.
ax.legend(loc="lower center",bbox_to_anchor=(0.5,0.8),bbox_transform=plt.gcf().transFigure)
I would like to sequentially plot a series of x,y coordinates while marking specified coordinates distinctly. It seems that 'markevery' allows users to do this in matplotlib plots, however, when I provide this property in my animation, I receive the error 'ValueError: markevery is iterable but not a valid form of numpy fancy indexing'. Any thoughts?
My actual 'mark_on' array will be much longer, so I think that using a linecollection isn't reasonable here.
frames = 100
def update_pos(num,data,line):
line.set_data(data[...,:num])
return line,
def traj_ani(data):
fig_traj = plt.figure()
l,= plt.plot([],[],'b', markevery = mark_on, marker = '*')
plt.xlim(-90,90)
plt.ylim(-90,90)
pos_ani = animation.FuncAnimation(fig_traj, update_pos, frames = np.shape(data)[1], fargs = (data,l),
interval = 20, blit = True)
pos_ani.save('AgentTrajectory.mp4')
data = pd.read_csv('xy_pos.csv', header = None, skiprows = [0])
data = np.asarray(data)
mark_on = [20, 50, 100, 300, 600]
traj_ani(data)
Thanks!
Here is a complete, mini example of an animation that works:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import csv
import pandas as pd
import numpy as np
Writer = animation.writers['ffmpeg']
writer = Writer(fps=2000, metadata=dict(artist='Me'), bitrate=1800)
def update_pos(num,data,line):
line.set_data(data[...,:num])
return line,
def traj_ani(data):
fig_traj = plt.figure()
l,= plt.plot([],[],'b')
plt.xlim(0,1)
plt.ylim(0,1)
pos_ani = animation.FuncAnimation(fig_traj, update_pos, frames = 25, fargs = (data,l),
interval = 200, blit = True)
pos_ani.save('AgentTrajectory.mp4')
data = np.random.rand(2,25)
traj_ani(data)
In my full code, I would like to specify certain frames whose x-y coordinate should be marked with either a special character or by a different color.
It seems problematic to set a list of indizes to markevery, which contains indices not present in the ploted array. E.g. if the plotted array has 3 elements but the list set for markevery contains an index 5, a ValueError occurs.
The solution would need to be to set the markevery list in every iteration and make sure it only contains valid indizes.
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
mark_on = np.array([2,5,6,13,17,24])
def update_pos(num,data,line):
line.set_data(data[...,:num])
mark = mark_on[mark_on < num]
line.set_markevery(list(mark))
return line,
def traj_ani(data):
fig_traj = plt.figure()
l,= plt.plot([],[],'b', markevery = [], marker = '*', mfc="red", mec="red", ms=15)
plt.xlim(0,1)
plt.ylim(0,1)
pos_ani = animation.FuncAnimation(fig_traj, update_pos, frames = 25, fargs = (data,l),
interval = 200, blit = True)
plt.show()
data = np.random.rand(2,25)
traj_ani(data)
I have data saved via numpy's savetxt function and am extracting it to plot. When I plot it the script executes without errors but does not show the curves--only empty windows. This is strange because:
The same script makes a fine plot when I import .txt data from another file (also saved using savetxt).
If I create data points inside the script, e.g. with arange, it plots.
The .txt data is getting loaded--I have printed it to the screen.
I checked my backend and it is TkAgg, which the internet agrees it's supposed to be.
My code is
# this script makes the plots of the eigenvalue distributions for the AAS 17-225 paper
# import python modules
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
# set plot options
mpl.rcParams['xtick.major.size'] = 7
mpl.rcParams['xtick.major.width'] = 3.0
mpl.rcParams['ytick.major.size'] = 7
mpl.rcParams['ytick.major.width'] = 3.0
mpl.rcParams['axes.linewidth'] = 3.5
plt.rc('text',usetex=True)
mpl.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}"]
plt.rc('font',family='serif')
plt.rc('axes',labelsize=24)
plt.rc('xtick',labelsize=24)
plt.rc('ytick',labelsize=24)
plt.rc('font',weight='bold')
plt.rc('axes',titlesize=20)
# plot method arguments
lw = 2 # linewidth
left_adj = 0.055 # left adjustment
right_adj = 0.985 # left adjustment
top_adj = 0.975 # left adjustment
bottom_adj = 0.075 # left adjustment
wspace = 0.205 # horizontal space between plots
hspace = 0.2 # verticle space between plots
n_suplot_rows = 2 # number of subplot rows
n_suplot_columns = 3 # number of subplot columns
# load data
dataDir ='/mnt/E0BA55A7BA557B4C/research/independent/recursivequats/paperCode/'
df1 = dataDir+'lamda_0p1_0p1.txt'
df2 = dataDir+'lamda_0.1_0.5.txt'
df3 = dataDir+'lamda_0.1_1.0.txt'
df4 = dataDir+'lamda_0.5_0.5.txt'
df5 = dataDir+'lamda_0.5_1.0.txt'
df6 = dataDir+'lamda_1.0_1.0.txt'
profile1 = np.loadtxt(df1)
profile2 = np.loadtxt(df2)
profile3 = np.loadtxt(df3)
profile4 = np.loadtxt(df4)
profile5 = np.loadtxt(df5)
profile6 = np.loadtxt(df6)
fig = plt.figure()
ax1 = fig.add_subplot(n_suplot_rows,n_suplot_columns,1)
p1, = ax1.plot(profile1[:,1],profile1[:,0],linewidth=lw)
ax2 = fig.add_subplot(n_suplot_rows,n_suplot_columns,2)
p1, = ax2.plot(profile2[:,1],profile2[:,0],linewidth=lw)
ax3 = fig.add_subplot(n_suplot_rows,n_suplot_columns,3)
p1, = ax3.plot(profile3[:,1],profile3[:,0],linewidth=lw)
ax4 = fig.add_subplot(n_suplot_rows,n_suplot_columns,4)
p1, = ax4.plot(profile4[:,1],profile4[:,0],linewidth=lw)
ax5 = fig.add_subplot(n_suplot_rows,n_suplot_columns,5)
p1, = ax5.plot(profile5[:,1],profile5[:,0],linewidth=lw)
ax6 = fig.add_subplot(n_suplot_rows,n_suplot_columns,6)
p1, = ax5.plot(profile6[:,1],profile6[:,0],linewidth=lw)
plt.subplots_adjust(left=left_adj,right=right_adj,top=top_adj,bottom=bottom_adj,wspace=wspace,hspace=hspace)
plt.show()
well, a bit more digging and the problem has been identified. The script is plotting, but the zoom on the plots is so poor that they are obscured by the thick lines on the border. So the problem was a user error.
This is why engineers shouldn't try to be artists...