how to animate a scatterplot of pandas data with matplotlib

how to animate a scatterplot of pandas data with matplotlib - python

I'm currently trying to animate a scatterplot of monthly data saved in a pandas dataframe. So far I made loop, which generates one single plot after another. Now I would like to join them in a single gif (or mp4 I don't care). Is there an easy way to make use of mathplotlibs animation function? I can't get my head around, how to loop sliced data through FuncAnimation. So far I did this:
time = df.monat.unique()
for i in time:
dft = df[(df.monat == i) & (df.xcol < 4000)]
plt.scatter(x=dft['xcol'],
y=dft['ycol'],
s=dft['scol'] / 25,
c=dft['clr'],
linewidth=0,
alpha=0.8)
plt.title('Title ' + str(i), fontsize=10)
plt.xlabel('x label', fontsize=9)
plt.ylabel('y label', fontsize=9)
legend1_line2d = list()
for val in clrdict.values():
legend1_line2d.append(mlines.Line2D([0], [0],
linestyle='none',
marker='o',
alpha=0.6,
markersize=6,
markeredgecolor=None,
markeredgewidth=0,
markerfacecolor=val))
legend1 = plt.legend(legend1_line2d,
names,
frameon=False,
numpoints=1,
fontsize=8,
loc='upper right')
plt.show()

I figured it out by myself:
Generate an empty plot (fig). Like before all unique time-values are stored in a series(time). A simple counter (i) helps generating the correct slice of the data (dft) for each month (df.monat == a value from the series 'time') within the update-function. The update-function is called times the value of the frame-parameter in the anim.FuncAnimation (frames=len(time)).
Hope, this will be helpful for somebody else (most of the explanations for the matplotlib FuncAnimation I've found worked with random numbers - not with specific pandas columns):
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.animation as anim
...
time = df.monat.unique()
fig = plt.figure()
i = 1
def update(i):
plt.clf()
dft = df[(df.monat == time[i]) & (df.xcol < 4000)]
plt.scatter(x=dft['xcol'],
y=dft['ycol'],
s=dft['scol'] / 25,
c=dft['clr'],
linewidth=0,
alpha=0.8)
plt.title('Title ' + str(time[i]), fontsize=10)
plt.xlabel('x label', fontsize=9)
plt.ylabel('y label', fontsize=9)
plt.xlim(0, 900) # fixed dimensions x
plt.ylim(-5, 100) # fixed dimensions y
legend1_line2d = list()
for val in clrdict.values():
legend1_line2d.append(mlines.Line2D([0], [0],
linestyle='none',
marker='o',
alpha=0.6,
markersize=6,
markeredgecolor=None,
markeredgewidth=0,
markerfacecolor=val))
legend1 = plt.legend(legend1_line2d,
names,
frameon=False,
numpoints=1,
fontsize=8,
loc='upper right')
i += 1
ani = anim.FuncAnimation(fig, update, frames=len(time), interval=500)
# plt.show() # this will show the ani over and over
ani.save("test.mp4", dpi=200, fps=1, codec="libx264", bitrate=5000, extra_args=['-pix_fmt', 'yuv420p'])

Related

Plot two legends of own calculated values

I have a problem. I want to show mean and median inside my plot. But unfortunately it overwrites it (see the second code snippet). When I do plt.legend([...,...] the output is not what I want. How can I show the mean and median as linie and plot the legend of them both with the correct color and linestyle?
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from numpy import random
#d = {'distance_km': [1, 100, 12, 14],}
d = {'distance_km': random.randint(100, size=(20)),}
df_calculation = pd.DataFrame(data=d)
sns.set_style("white")
# Calculation of meand and median
mean=df_calculation["distance_km"].mean()
median = df_calculation["distance_km"].median()
plt.figure(figsize=(11,5))
ax = sns.distplot(df_calculation["distance_km"],color='lightblue')
ax.axvline(mean, color='r', linestyle='--')
ax.axvline(median, color='green', linestyle='--')
plt.legend([{f'Mean = {round(df_calculation["distance_km"].mean(), 2)}':mean}, {f'Median = {round(df_calculation["distance_km"].mean(), 2)}':median}])
plt.box(False)
plt.xlabel("\n Km", fontsize = 12)
plt.ylabel("", fontsize = 12)
plt.show()
Here it works bot it would be overwritten but the color is not correct
#Plotting Distribution Drugs Numbers per Condition
#plt.style.use('seaborn-whitegrid')
mean=df_calculation["distance_km"].mean()
median = df_calculation["distance_km"].median()
plt.figure(figsize=(11,5))
sns.set_style("white")
ax = sns.distplot(df_calculation["distance_km"],color='lightblue')
ax.axvline(mean, color='r', linestyle='--')
plt.legend({f'Mean = {round(df_calculation["distance_km"].mean(), 2)}':mean})
ax.axvline(median, color='green', linestyle='--')
plt.legend({f'Median = {round(df_calculation["distance_km"].median(), 2)}':median})
plt.box(False)
plt.xlabel("\n Kilometer", fontsize = 12)
plt.ylabel("", fontsize = 12)
plt.show()

Use the label parameter inside ax.axvline. E.g. in your second code block, instead of:
ax.axvline(mean, color='r', linestyle='--')
plt.legend({f'Mean = {round(df_calculation["distance_km"].mean(), 2)}':mean})
ax.axvline(median, color='green', linestyle='--')
plt.legend({f'Median = {round(df_calculation["distance_km"].median(), 2)}':median})
write:
ax.axvline(mean, color='r', linestyle='--', label=f'mean: {mean}')
ax.axvline(median, color='green', linestyle='--', label=f'median {median}')
plt.legend()
Result:

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable

The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

Python - Pyplot x-axis not showing on graph

pyplot is not showing the x-axis on the graph:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('sitka_weather_2014.csv')
df['AKST'] = pd.to_datetime(df.AKST)
df['Dates'] = df['AKST'].dt.strftime('%b %d, %Y')
df.set_index("Dates", inplace= True)
# Plot Data
fig = plt.figure(dpi=256, figsize=(14, 7))
plt.title("Daily high and low temperature - 2014")
df['Max TemperatureF'].plot(linewidth=1, c='blue', label="Max Temperature °F")
df['Min TemperatureF'].plot(linewidth=1, c='red', label="Min Temperature °F")
plt.grid(True)
plt.rc('grid', linestyle=":", linewidth=1, color='gray')
plt.legend(loc='upper left')
plt.xlabel('', fontsize=10)
plt.ylabel("Temperature (°F)", fontsize=10)
plt.tick_params(axis='both', which='major', labelsize=10)
fig.autofmt_xdate(rotation=45)
plt.show()
The x-axis should be the index of the Pandas Dataframe (df) containing the dates.

Your code is actually fine. I tried to run it with the necessary sitka_weather_2014.csv file and it works.
The problem is that you can't see the x-axis because the size of the figure is too big, and thus the description of the x-axis dissapears. Try to scale your figure e.g. by making the dpi smaller:
fig = plt.figure(dpi=100, figsize=(14, 7)) #dpi=100 instead of dpi=256
Or make the labelsize smaller:
plt.tick_params(axis='both', which='major', labelsize=5) #labelsize=5 instead of labelsize=10
Whatever works best for you. But the code is fine and the description of the x-axis is showing.

You have your xlabel value set to null:
plt.xlabel('', fontsize=10)

How do I animate this graph so that the dot moves and the green line plots over a different range for each element in loop?

I am trying to make this animated so that the dot and the green line move due to the for loop. This code displays 3 different graphs one below the other. The middle graph has no animation section.
x =lag_range
count = 0
plt.ion()
fig, ax = plt.subplots()
for b in x:
plt.subplot(311)
plt.plot(x,pear_corr, color='b', linewidth=1.5, label ='Pearson')
plt.plot(x,spear_corr, color ='r', linewidth=1.5, label='Spearman')
plt.plot(x[count],pear_corr[count],'yo')
plt.legend()
axes = plt.gca()
plt.ylabel('Correlation coefficients')
plt.xlabel('Lag times /days')
axes.set_xlim([min(lag_list),last])
axes.set_ylim(-1,1)
plt.subplot(312)
plt.plot(x,pear_p_values, color='b', linewidth=1.5)
plt.plot(x,spear_p_values, color ='r', linewidth=1.5)
axes = plt.gca()
plt.ylabel('P values')
plt.xlabel('Lag times /days')
axes.set_xlim([min(lag_list),last])
plt.subplot(313)
ax1 = plt.subplot(313)
x_for_p = range(len(x_prices))
ax1.plot(x_for_p, x_prices, color ='grey', linewidth=1.5)
ax1.set_ylabel('Share price', color ='grey')
ax1.tick_params('y', colors='grey')
ax1.set_xlabel('Days')
axes = plt.gca()
axes.set_xlim([min(lag_list),(2*last)])
ax2 = ax1.twinx()
x_for_den = range(b,(b+len(x_prices)))
ax2.plot(x_for_den, y_planes, color='g', linewidth=1.5)
ax2.set_ylabel('Plane density', color='g')
ax2.tick_params('y', colors='g')
count += 1
plt.pause(2)
plt.draw()
cross_corr2_vis(prices, density_p3)

If you could share a working code or just definitions of variables pear_corr, spear_corr, etc., the following code might have not resulted in this simple animation:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
N_points = 1000
x = np.linspace(0,2.*np.pi,N_points)
y = np.sin(x)
fig, ax = plt.subplots()
ax.set_xlim([0,2.*np.pi])
ax.set_ylim([-1,1])
line, = ax.plot( [],[], lw=2, color='g')
sctr = ax.scatter([],[], s=100, color='r')
def animate(i):
line.set_ydata(y[:i+1]) # update
line.set_xdata(x[:i+1])
sctr.set_offsets((x[i],y[i]))
return line,sctr
ani = animation.FuncAnimation(fig, animate, N_points, interval=5, blit=True)
plt.show()

Matplotlib way to annotate bar plots with lines and figures [duplicate]

This question already has answers here:
Annotate bars with values on Pandas bar plots
(4 answers)
Closed 1 year ago.
I would like to create an annotation to a bar chart that compares the value of the bar to two reference values. An overlay such as shown in the picture, a kind of staff gauge, is possible, but I'm open to more elegant solutions.
The bar chart is generated with the pandas API to matplotlib (e.g. data.plot(kind="bar")), so a plus would be if the solution is playing nicely with that.

You may use smaller bars for the target and benchmark indicators. Pandas cannot annotate bars automatically, but you can simply loop over the values and use matplotlib's pyplot.annotate instead.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
a = np.random.randint(5,15, size=5)
t = (a+np.random.normal(size=len(a))*2).round(2)
b = (a+np.random.normal(size=len(a))*2).round(2)
df = pd.DataFrame({"a":a, "t":t, "b":b})
fig, ax = plt.subplots()
df["a"].plot(kind='bar', ax=ax, legend=True)
df["b"].plot(kind='bar', position=0., width=0.1, color="lightblue",legend=True, ax=ax)
df["t"].plot(kind='bar', position=1., width=0.1, color="purple", legend=True, ax=ax)
for i, rows in df.iterrows():
plt.annotate(rows["a"], xy=(i, rows["a"]), rotation=0, color="C0")
plt.annotate(rows["b"], xy=(i+0.1, rows["b"]), color="lightblue", rotation=+20, ha="left")
plt.annotate(rows["t"], xy=(i-0.1, rows["t"]), color="purple", rotation=-20, ha="right")
ax.set_xlim(-1,len(df))
plt.show()

There's no direct way to annotate a bar plot (as far as I am aware) Some time ago I needed to annotate one so I wrote this, perhaps you can adapt it to your needs.
import matplotlib.pyplot as plt
import numpy as np
ax = plt.subplot(111)
ax.set_xlim(-0.2, 3.2)
ax.grid(b=True, which='major', color='k', linestyle=':', lw=.5, zorder=1)
# x,y data
x = np.arange(4)
y = np.array([5, 12, 3, 7])
# Define upper y limit leaving space for the text above the bars.
up = max(y) * .03
ax.set_ylim(0, max(y) + 3 * up)
ax.bar(x, y, align='center', width=0.2, color='g', zorder=4)
# Add text to bars
for xi, yi, l in zip(*[x, y, list(map(str, y))]):
ax.text(xi - len(l) * .02, yi + up, l,
bbox=dict(facecolor='w', edgecolor='w', alpha=.5))
ax.set_xticks(x)
ax.set_xticklabels(['text1', 'text2', 'text3', 'text4'])
ax.tick_params(axis='x', which='major', labelsize=12)
plt.show()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to animate a scatterplot of pandas data with matplotlib - python

Related

Plot two legends of own calculated values

Create separate distplot from countplot

Python - Pyplot x-axis not showing on graph

How do I animate this graph so that the dot moves and the green line plots over a different range for each element in loop?

Matplotlib way to annotate bar plots with lines and figures [duplicate]

Categories

Resources