I have 50 csv files. I use "for loop" to get the dataframe. Now I want plot these 50 figures seperately. 6 subplots in 1 plot. How can I get this? Thanks a lot.
path = 'E:/XXXX/'
files = os.listdir(path)
files_csv = list(filter(lambda x: x[-4:]=='.csv' , files))
for file1 in files_csv:
tmp1=pd.read_csv(path + file1)
my data is like below:
df = pd.DataFrame({'date': [20121231,20130102, 20130105, 20130106, 20130107, 20130108],'price': [25, 163, 235, 36, 40, 82]})
You can create a figure for each frame and use matplotlib.pyplot.subplot function to plot your 6 different plots. Help yourself with the example bellow. Hope this helps.
from math import pi
import numpy as np
import matplotlib.pyplot as plt
x1 = np.linspace(-2*pi, 2*pi, 50)
y1 = np.cos(x1)
x2 = np.linspace(-pi, pi, 50)
y2 = np.cos(x2)
plt.figure()
plt.grid(True)
plt.title('your title ' )
plt.subplot(121)
plt.plot(x1, y1, 'r', label= 'y1 = cos(x1)')
plt.legend(loc=1)
plt.subplot(122)
plt.plot(x2, y2, 'b', label = 'y2 = cos(x2)')
plt.legend(loc=1)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
x1 = np.linspace(-1, 1, 50)
howmanyrowsyouwant = 1 # how many times 6 subplots you want
for x in range(howmanyrowsyouwant):
_, ax = plt.subplots(nrows=1, ncols=6, figsize=(24,4))
ax[0].set_title('title of first')
ax[0].plot(x1) # plot for first subplot
ax[1].set_title('title of second')
ax[1].plot(x1) # plot for second subplot
ax[2].set_title('title of third')
ax[2].plot(x1) # plot for third subplot
ax[3].set_title('title of fourth')
ax[3].plot(x1) # plot for fourth subplot
ax[4].set_title('title of fifth')
ax[4].plot(x1) # plot for fifth subplot
ax[5].set_title('title of sixth')
ax[5].plot(x1) # plot for sixth subplot
This produces six subplots in a row, as many times as you specify.
Related
I have a Dataframe with 6 rows of data and 4 columns. Is there any way to generate a gif scatterplot (y which are the 4 columns in different color versus x which are the index rows) plot in which in every frame of the gif, first data point of the Column 1 and its first respective row data is plotted in different color versus the shared x axis which are the indexes, at the same time, column 2, 3 and 4 first data points are plotted, and this goes progressively until the last 6th point is plotted for all of the columns? If a gif is not possible at all, is there any other way to generate at least movie so that I can include in my ppt slide? I appreciate any feedback you might have! The error I am getting is generating an empty plot and saying: TypeError: cannot unpack non-iterable AxesSubplot object. But I am not sure if this is preventing the result from the plotting.
This is a sample of my data and code effort:
import pandas as pd
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import random
from itertools import count
from IPython import display
row_data = np.arange(0, 6)
column_X = np.random.rand(6,)
column_Y = np.random.rand(6,)
column_Z = np.random.rand(6,)
column_K = np.random.rand(6,)
my_df = pd.DataFrame()
my_df['column_X'] = column_X
my_df['column_Y'] = column_Y
my_df['column_Z'] = column_Z
my_df['column_K'] = column_K
my_df.index = row_data
my_df['index'] = row_data
def animate(j):
fig, ax = plt.subplot(sharex= True)
ax[1]=my_df['column_X', color = 'blue']
ax[2]=my_df['column_Y', color = 'red']
ax[3]=my_df['column_Z', color = 'brown']
ax[4]=my_df['column_K', color = 'green']
y=my_df['index']
x.append()
y.append()
plt.xlabel(color = 'blue')
plt.ylabel(color = 'red')
ax.set_ylabel("progressive sales through time")
ax.set_xlabel("progressive time")
plt.plot(x,y)
animation_1 = animation.FuncAnimation(plt.gcf(),animate,interval=1000)
plt.show()
# Inside Jupyter:
video_1 = animation_1.to_html5_video()
html_code_1 = display.HTML(video_1)
display.display(html_code_1)
plt.tight_layout()
plt.show()
Good question! matplotlib animations can be tricky. I struggled a bit with this one, mainly because you want different colors for the different columns. You need 4 different Line2D objects to do this.
# VSCode notebook magic
%matplotlib widget
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
my_df = pd.DataFrame()
my_df["column_X"] = np.random.rand(6)
my_df["column_Y"] = np.random.rand(6)
my_df["column_Z"] = np.random.rand(6)
my_df["column_K"] = np.random.rand(6)
fig, ax = plt.subplots()
# four y-data lists, x-data is shared
xdata, y1, y2, y3, y4 = [], [], [], [], []
# four Line3D objects with different colors
graph1, = ax.plot([], [], 'ro-')
graph2, = ax.plot([], [], 'go-')
graph3, = ax.plot([], [], 'bo-')
graph4, = ax.plot([], [], 'ko-')
# set up the plot
plt.xlim(-1, 6)
plt.xlabel('Time')
plt.ylim(0, 1)
plt.ylabel('Price')
# animation function
def animate(i):
xdata.append(i)
y1.append(my_df.iloc[i,0])
y2.append(my_df.iloc[i,1])
y3.append(my_df.iloc[i,2])
y4.append(my_df.iloc[i,3])
graph1.set_data(xdata, y1)
graph2.set_data(xdata, y2)
graph3.set_data(xdata, y3)
graph4.set_data(xdata, y4)
return (graph1,graph2,graph3,graph4,)
anim = animation.FuncAnimation(fig, animate, frames=6, interval=500, blit=True)
anim.save('test.mp4')
#plt.show()
Here's the resulting .gif (converted from .mp4 using Adobe Express):
Using the answers to this question I could generate the dist plot as I needed. However, when I want to apply the same solution to multiple plots, it doesn't seem to work as expected. I am seeking for proposed solutions. Here is what I am trying to do:
import seaborn as sns, numpy as np
from scipy import stats
import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
sns.set(); np.random.seed(0)
data01 = np.random.normal(10, 5, 1000)
data02 = np.random.normal(20, 5, 1000)
ax1 = sns.distplot(data01, color = 'blue', kde = True)
x1 = ax1.lines[0].get_xdata()
y1 = ax1.lines[0].get_ydata()
plt.axvline(x1[np.argmax(y1)], color='blue')
ax2 = sns.distplot(data02, color = 'red', kde = True)
x2 = ax2.lines[0].get_xdata()
y2 = ax2.lines[0].get_ydata()
plt.axvline(x2[np.argmax(y2)], color='red')
plt.legend()
Here is what I get, which is not what I expected (two vertical lines, one for each):
You need to use the correct index: Index 0 is for the blue kde, index 1 is for the blue vertical line, index 2 is for the red kde.
Intuitively, as the name suggests ax2.lines gives you the collection of all the lines on the current figure. When you plot distplot with kde=True the second time, you already have 2 lines (previous kde and vertical line) so the index of the second kde is 2 because indexing starts from 0 in python. This is because you are working with the same figure object so the artists plotted from ax1 will also be carried over to ax2. On the contrary, if you were to have individual subplots, then you would have used the same index 0 for both
ax1 = sns.distplot(data01, color = 'blue', kde = True)
x1 = ax1.lines[0].get_xdata()
y1 = ax1.lines[0].get_ydata()
plt.axvline(x1[np.argmax(y1)], color='blue')
ax2 = sns.distplot(data02, color = 'red', kde = True)
x2 = ax2.lines[2].get_xdata() # <--- Use correct index 2 here
y2 = ax2.lines[2].get_ydata() # <--- Use correct index 2 here
plt.axvline(x2[np.argmax(y2)], color='red')
plt.legend()
I'm trying to plot a CDF from multiple simulation runs using Seaborn. I created a very simple code to emulate my results:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df1 = pd.DataFrame({'A':np.random.randint(0, 100, 1000)})
df2 = pd.DataFrame({'A':np.random.randint(0, 100, 1000)})
df3 = pd.DataFrame({'A':np.random.randint(0, 100, 1000)})
f, ax = plt.subplots(figsize=(8, 8))
ax = sns.kdeplot(df1['A'], cumulative=True)
ax = sns.kdeplot(df2['A'], cumulative=True)
ax = sns.kdeplot(df3['A'], cumulative=True)
plt.show()
The code above creates the following plot:
But, since the three lines are results from the same simulation with different seeds, I'd like to "merge" the three lines into one and add a shaded area around the line, representing min and max or the std of the three different runs.
How can this be accomplished in Seaborn?
You may use fill_between to fill between two curves. Now here the problem is that the kde support would be different for the three curves. Obtaining a common kde support will require to calculate the cdf manually. This could be done as follows.
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
def cdf(data, limits="auto", npoints=600):
kde = stats.gaussian_kde(data)
bw = kde.factor
if limits == "auto":
limits = (data.min(), data.max())
limits = (limits[0]-bw*np.diff(limits)[0],
limits[1]+bw*np.diff(limits)[0])
x = np.linspace(limits[0], limits[1], npoints)
y = [kde.integrate_box(x[0],x[i]) for i in range(len(x))]
return x, np.array(y)
d1 = np.random.randint(14, 86, 1000)
d2 = np.random.randint(10, 100, 1000)
d3 = np.random.randint(0, 90, 1000)
mini = np.min((d1.min(), d2.min(), d3.min()))
maxi = np.max((d1.max(), d2.max(), d3.max()))
x1,y1 = cdf(d1, limits=(mini, maxi))
x2,y2 = cdf(d2, limits=(mini, maxi))
x3,y3 = cdf(d3, limits=(mini, maxi))
y = np.column_stack((y1, y2, y3))
ymin = np.min(y, axis=1)
ymax = np.max(y, axis=1)
f, ax = plt.subplots()
ax.plot(x1,y1)
ax.plot(x2,y2)
ax.plot(x3,y3)
ax.fill_between(x1, ymin, ymax, color="turquoise", alpha=0.4, zorder=0)
plt.show()
I am having a set of different times series which can be grouped. E.g. the plot below shows series A, B, C and D. However, A and B are in group G1 and C and D are in group G2.
I would like to reflect that in the plot by adding another axis on the left which goes across groups of turbines and label thes axis accordingly.
I've tried a few thing so far but apparently that one's not so easy.
Does some body know how I can do that?
PS: Since I am using panda's plot(subplots=True) on a data frame which has already columns
| G1 | G2 |
|-------|------|
index | A B | C D |
------|-------|------|
it might be that pandas can do that already for me. That's why I am using the pandas tag.
You can create additional axes in the plot, which span each two plots but only have a left y-axis, no ticks and other decorations. Only a ylabel is set. This will make the whole thing look well aligned.
The good thing is that you can work with your existing pandas plot. The drawback is that is more than 15 lines of code.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
df = pd.DataFrame(np.random.rand(26,4), columns=list("ABCD"))
axes = df.plot(subplots=True)
fig = axes[0].figure
gs = gridspec.GridSpec(4,2)
gs.update(left=0.1, right=0.48, wspace=0.05)
fig.subplots_adjust(left=.2)
for i, ax in enumerate(axes):
ax.set_subplotspec(gs[i,1])
aux1 = fig.add_subplot(gs[:2,0])
aux2 = fig.add_subplot(gs[2:,0])
aux1.set_ylabel("G1")
aux2.set_ylabel("G2")
for ax in [aux1, aux2]:
ax.tick_params(size=0)
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_facecolor("none")
for pos in ["right", "top", "bottom"]:
ax.spines[pos].set_visible(False)
ax.spines["left"].set_linewidth(3)
ax.spines["left"].set_color("crimson")
plt.show()
Here is an example I came up with. Since you did not provide your code, I did it without pandas, because I am not proficient with it.
You basically plot as one would and then create another axis around all your previous ones, remove its axis with ax5.axis('off') and plot the 2 lines and text on it.
from matplotlib import lines
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 4*np.pi, 100)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = np.tan(x)
y4 = np.cos(x)/(x+1)
fig = plt.figure()
fig.subplots_adjust(hspace=.5)
ax1 = plt.subplot(411)
ax1.plot(x, y1)
ax2 = plt.subplot(412)
ax2.plot(x, y2)
ax3 = plt.subplot(413)
ax3.plot(x, y3)
ax4 = plt.subplot(414)
ax4.plot(x, y4)
# new axis around the others with 0-1 limits
ax5 = plt.axes([0, 0, 1, 1])
ax5.axis('off')
line_x1, line_y1 = np.array([[0.05, 0.05], [0.05, 0.5]])
line1 = lines.Line2D(line_x1, line_y1, lw=2., color='k')
ax5.add_line(line1)
line_x2, line_y2 = np.array([[0.05, 0.05], [0.55, 0.9]])
line2 = lines.Line2D(line_x2, line_y2, lw=2., color='k')
ax5.add_line(line2)
ax5.text(0.0, 0.75, "G1")
ax5.text(0.0, 0.25, "G2")
plt.show()
Inspired by How to draw a line outside of an axis in matplotlib (in figure coordinates)?
I would like to generate labels inside the areas of a matplotlib stackplot. I would settle for labeling a line used to bound the area. Consider the example:
import numpy as np
from matplotlib import pyplot as plt
fnx = lambda : np.random.randint(5, 50, 10)
x = np.arange(10)
y1, y2, y3 = fnx(), fnx(), fnx()
areaLabels=['area1','area2','area3']
fig, ax = plt.subplots()
ax.stackplot(x, y1, y2, y3)
plt.show()
This produces:
But I would like to produce something like this:
The matplotlib contour plots have this type of labeling functionality (though the lines are labeled in the case of the contour plot).
Any help (or even redirection to a post I might have missed) is appreciated.
Ah, heuristics. Something like this?:
import numpy as np
from matplotlib import pyplot as plt
length = 10
fnx = lambda : np.random.randint(5, 50, length)
x = np.arange(length)
y1, y2, y3 = fnx(), fnx(), fnx()
areaLabels=['area1','area2','area3']
fig, ax = plt.subplots()
ax.stackplot(x, y1, y2, y3)
loc = y1.argmax()
ax.text(loc, y1[loc]*0.25, areaLabels[0])
loc = y2.argmax()
ax.text(loc, y1[loc] + y2[loc]*0.33, areaLabels[1])
loc = y3.argmax()
ax.text(loc, y1[loc] + y2[loc] + y3[loc]*0.75, areaLabels[2])
plt.show()
which in test runs is okayish:
Finding the best loc could be fancier -- maybe one wants the x_n, x_(n+1) with the highest average value.