I'm creating a chart with matplotlib, here is my code:
fig = plt.figure(facecolor='#131722',dpi=155, figsize=(8, 4))
ax1 = plt.subplot2grid((1,2), (0,0), facecolor='#131722')
Colors = [['#0400ff', '#FF0000'], ['#09ff00', '#ff8c00']]
for x in List:
Index = List.index(x)
rate_buy = []
total_buy = []
for y in x['data']['bids']:
rate_buy.append(y[0])
total_buy.append(y[1])
rBuys = pd.DataFrame({'buy': rate_buy})
tBuys = pd.DataFrame({'total': total_buy})
ax1.plot(rBuys.buy, tBuys.total, color=Colors[Index][0], linewidth=0.5, alpha=0.8)
ax1.fill_between(rBuys.buy, 0, tBuys.total, facecolor=Colors[Index][0], alpha=1)
And here is the output:
The problem with the current output is that the colors of the two areas are "merging": basically the area BELOW the blue line should be blue, but instead it's green. How can i set it to be blue, for example, like in my example?
Example List data:
[[9665, 0.07062500000000001], [9666, 0.943708], [9667, 5.683787000000001], [9668, 9.802289], [9669, 11.763305], [9670, 14.286004], [9671, 16.180122], [9672, 23.316723000000003], [9673, 30.915156000000003], [9674, 33.44226200000001], [9675, 36.14526200000001], [9676, 45.76024100000001], [9677, 51.85294700000001], [9678, 58.79529300000001], [9679, 59.05322900000001], [9680, 60.27704500000001], [9681, 60.743885000000006], [9682, 66.75103700000001], [9683, 71.86412600000001], [9684, 73.659636], [9685, 78.08502800000001], [9686, 78.19614200000001], [9687, 79.98396400000001], [9688, 90.55855800000002]]
I guess the hint of #JohanC is correct, you are plotting in the wrong order and overlay your previous plots with new ones.
I tried to recreate a small example where total_buy1 > total_buy0, so in order to get the desired result you first have to plot total_buy1
and then total_buy0:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
Colors = [['#0400ff', '#FF0000'],
['#09ff00', '#ff8c00']]
n = 100
rate_buy = np.linspace(0, 1000, 100)
total_buy0 = np.linspace(0, 300, n)[::-1] + np.random.normal(scale=10, size=n)
total_buy1 = np.linspace(0, 600, n)[::-1] + np.random.normal(scale=10, size=n)
ax.plot(rate_buy, total_buy1, color=Colors[1][1], linewidth=0.5, alpha=0.8)
ax.fill_between(rate_buy, 0, total_buy1, facecolor=Colors[1][0], alpha=1)
ax.plot(rate_buy, total_buy0, color=Colors[0][1], linewidth=0.5, alpha=0.8)
ax.fill_between(rate_buy, 0, total_buy0, facecolor=Colors[0][0], alpha=1)
I noticed that you use Colors[Index][0] for both plotting calls, so the line and the area will not have different colors.
Related
I have a scatter plot which I'd like to place by another scatter plot, however they have a dynamic marker size.
The green triangles (x, y) are calculated from the original scatter and they're close but not perfect (just from trial and error).
import pandas as pd
from mplsoccer import Pitch, VerticalPitch
data = [['JA', 35, 60, 2000], ['RN', 20, 47, 1500], ['GG', 10, 32, 1000]]
df = pd.DataFrame(data, columns=['Name', 'x', 'y', 'marker_size'])
#This is calculated from x or y length divided by marker size of biggest marker,
# divide by 2 for the radius, but the marker sizes seem to be non-linear.
df['xDiff'] = df['marker_size'] * ((7.3/2000) / 2)
df['yDiff'] = df['marker_size'] * ((11.3/2000) / 2)
df['leftArrowX'] = df['x'] - df['xDiff']
df['leftArrowY'] = df['y']
df['rightArrowX'] = df['x'] + df['xDiff']
df['rightArrowY'] = df['y']
df['downArrowY'] = df['y'] - df['yDiff']
df['downArrowX'] = df['x']
df['upArrowY'] = df['y'] + df['yDiff']
df['upArrowX'] = df['x']
pitch = Pitch(pitch_type='opta', pitch_color='#202428', line_color='#F2F2F2', linewidth=2)
fig, ax = pitch.draw(figsize=(16, 10))
players = pitch.scatter(df.x, df.y, s=df.marker_size, marker='8', color='orange', edgecolors='black', linewidth=1, alpha=1, ax=ax)
leftArrows = pitch.scatter(df.leftArrowX, df.leftArrowY, s=100, marker='<', color='lightgreen', alpha=1, ax=ax)
rightArrows = pitch.scatter(df.rightArrowX, df.rightArrowY, s=100, marker='>', color='lightgreen', alpha=1, ax=ax)
downArrows = pitch.scatter(df.downArrowX, df.downArrowY, s=100, marker='v', color='lightgreen', alpha=1, ax=ax)
upArrows = pitch.scatter(df.upArrowX, df.upArrowY, s=100, marker='^', color='lightgreen', alpha=1, ax=ax)
Result
How can I calculate the co-ordinates for the triangles more accurately given the original marker co-ordinates & marker size, so that they are placed evenly away at each point.
Or possibly any other solution to my problem.
Note: The pitch has co-ordinates 100x100, done in Jupyter Notebook. Thanks.
I wish to modify the 2D line in my legend to plot as line segments (or another method like patches) that will display the range of my colormap (here viridis_r) instead of a singular color. While the third variable (radius) is included in the colorbar, having it displayed in the legend as well will be informative when I add more complications to the plot. Thanks!
fig, ax = plt.subplots()
radii = [1,2,3,4,5]
angle = np.linspace(0, 2 * np.pi, 150)
cmap = plt.get_cmap('viridis_r')
norm = plt.Normalize(radii[0], radii[-1])
m = plt.cm.ScalarMappable(cmap=cmap)
m.set_array(radii)
for radius in radii:
x = radius * np.cos(angle)
y = radius * np.sin(angle)
ax.plot(x, y, color=cmap(norm(radius)))
radius_2Dline = plt.Line2D((0, 1), (0, 0), color='k', linewidth=2)
ax.legend([radius_2Dline],['Radius'], loc='best')
ax.set_aspect( 1 )
fig.colorbar(m).set_label('Radius', size=15)
plt.show()
The following approach uses the "tuple legend handler". That handler puts a list of legend handles (in this case the circles drawn via ax.plot). Setting ndivide=None will draw one short line for each element in the list. The padding can be set to 0 to avoid gaps between these short lines. The default handlelength might be too small to properly see these special handles; therefore, the example code below increases it a bit.
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
import numpy as np
fig, ax = plt.subplots()
radii = [1, 2, 3, 4, 5]
angle = np.linspace(0, 2 * np.pi, 150)
cmap = plt.get_cmap('viridis_r')
norm = plt.Normalize(radii[0], radii[-1])
lines = [] # list of lines to be used for the legend
for radius in radii:
x = radius * np.cos(angle)
y = radius * np.sin(angle)
line, = ax.plot(x, y, color=cmap(norm(radius)))
lines.append(line)
ax.legend(handles=[tuple(lines)], labels=['Radius'],
handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)})
ax.set_aspect('equal')
plt.tight_layout()
plt.show()
I am not sure if this is your goal but here is a stab at it. Following this answer, you can make a 'fake' legend with a colormap.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
fig, ax = plt.subplots()
radii = [1, 2, 3, 4, 5]
angle = np.linspace(0, 2 * np.pi, 150)
cmap = plt.get_cmap('viridis_r')
norm = plt.Normalize(radii[0], radii[-1])
m = plt.cm.ScalarMappable(cmap=cmap)
m.set_array(radii)
for radius in radii:
x = radius * np.cos(angle)
y = radius * np.sin(angle)
ax.plot(x, y, color=cmap(norm(radius)))
# Set box that will act as a 'fake' legend, 25% width of the
# x-axis, 15% of y-axis
cbbox = inset_axes(ax, width="25%", height="15%", loc=2)
cbbox.tick_params(
axis = 'both',
left = False,
top = False,
right = False,
bottom = False,
labelleft = False,
labeltop = False,
labelright = False,
labelbottom = False
)
# Semi-transparent like the usual ax.legend()
cbbox.set_facecolor([1, 1, 1, 0.7])
# Colorbar inside the fake legend box, occupying 85% of the
# box width and %5 box height
cbaxes = inset_axes(cbbox, width="85%", height="5%", loc=2)
cbar = fig.colorbar(m, cax=cbaxes, orientation='horizontal',
ticks=[1, 3, 5])
cbar.set_label('Radius', size=9)
cbar.ax.tick_params(labelsize=9)
ax.set_aspect(1)
plt.show()
I was unsuccessful in creating an actual ax.legend() from a LineCollection or a multicolored line - it only plotted one color - so my solution was this 'fake' legend approach. Hope this helps, cheers.
I have a distplot and I would like to plot a mean line that goes from 0 to the y value of the mean frequency. I want to do this, but have the line stop at when the distplot does. Why isn't there a simple parameter that does this? It would be very useful.
I have some code that gets me almost there:
plt.plot([x.mean(),x.mean()], [0, *what here?*])
This code plots a line just as I'd like except for my desired y-value. What would the correct math be to get the y max to stop at the frequency of the mean in the distplot? An example of one of my distplots is below using 0.6 as the y-max. It would be awesome if there was some math to make it stop at the y-value of the mean. I have tried dividing the mean by the count etc.
Update for the latest versions of matplotlib (3.3.4) and seaborn (0.11.1): the kdeplot with shade=True now doesn't create a line object anymore. To get the same outcome as before, setting shade=False will still create the line object. The curve can then be filled with ax.fill_between(). The code below is changed accordingly. (Use the revision history to see the older version.)
ax.lines[0] gets the curve of the kde, of which you can extract the x and y data.
np.interp then can find the height of the curve for a given x-value:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
x = np.random.normal(np.tile(np.random.uniform(10, 30, 5), 50), 3)
ax = sns.kdeplot(x, shade=False, color='crimson')
kdeline = ax.lines[0]
mean = x.mean()
xs = kdeline.get_xdata()
ys = kdeline.get_ydata()
height = np.interp(mean, xs, ys)
ax.vlines(mean, 0, height, color='crimson', ls=':')
ax.fill_between(xs, 0, ys, facecolor='crimson', alpha=0.2)
plt.show()
The same approach can be extended to show the mean together with the standard deviation, or the median and the quartiles:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
x = np.random.normal(np.tile(np.random.uniform(10, 30, 5), 50), 3)
fig, axes = plt.subplots(ncols=2, figsize=(12, 4))
for ax in axes:
sns.kdeplot(x, shade=False, color='crimson', ax=ax)
kdeline = ax.lines[0]
xs = kdeline.get_xdata()
ys = kdeline.get_ydata()
if ax == axes[0]:
middle = x.mean()
sdev = x.std()
left = middle - sdev
right = middle + sdev
ax.set_title('Showing mean and sdev')
else:
left, middle, right = np.percentile(x, [25, 50, 75])
ax.set_title('Showing median and quartiles')
ax.vlines(middle, 0, np.interp(middle, xs, ys), color='crimson', ls=':')
ax.fill_between(xs, 0, ys, facecolor='crimson', alpha=0.2)
ax.fill_between(xs, 0, ys, where=(left <= xs) & (xs <= right), interpolate=True, facecolor='crimson', alpha=0.2)
# ax.set_ylim(ymin=0)
plt.show()
PS: for the mode of the kde:
mode_idx = np.argmax(ys)
ax.vlines(xs[mode_idx], 0, ys[mode_idx], color='lime', ls='--')
With plt.get_ylim() you can get the limits of the current plot: [bottom, top].
So, in your case, you can extract the actual limits and save them in ylim, then draw the line:
fig, ax = plt.subplots()
ylim = ax.get_ylim()
ax.plot([x.mean(),x.mean()], ax.get_ylim())
ax.set_ylim(ylim)
As ax.plot changes the ylims afterwards, you have to re-set them with ax.set_ylim as above.
I created two subplots on a MPL figure, but i'm having an hard time setting the size on them. I want the space to be splitted between the two charts, so each chart needs to have 50% of the total width of the figure, and i want them to have the same height of the figure, here is how i initialized the subplots:
fig = plt.figure(facecolor='#131722',dpi=155, figsize=(10, 3))
ax1 = plt.subplot2grid((3,3), (2,0), facecolor='#131722')
ax2 = plt.subplot2grid((5,3), (2,2), colspan=5, rowspan=4, facecolor='#131722')
Colors = [['#0400ff', '#FF0000'], ['#09ff00', '#ff8c00']]
for x in List:
Index = List.index(x)
rate_buy = []
total_buy = []
rate_sell = []
total_sell = []
for y in x['data']['asks']:
rate_sell.append(y[0])
total_sell.append(y[1])
for y in x['data']['bids']:
rate_buy.append(y[0])
total_buy.append(y[1])
rBuys = pd.DataFrame({'buy': rate_buy})
rSells = pd.DataFrame({'sell': rate_sell})
tBuys = pd.DataFrame({'total': total_buy})
tSells = pd.DataFrame({'total': total_sell})
ax1.plot(rBuys.buy, tBuys.total, color=Colors[Index][0], linewidth=0.5, alpha=1, label='test')
ax2.plot(rSells.sell, tSells.total, color=Colors[Index][1],alpha=0.5, linewidth=1, label=x['exchange'])
ax1.fill_between(rBuys.buy, 0, tBuys.total, facecolor=Colors[Index][0], alpha=0.4)
ax2.fill_between(rSells.sell, 0, tSells.total, facecolor=Colors[Index][1], alpha=0.4)
And this is what i'm getting:
use plt.tight_layout() before calling plt.show().
I have two numeric arrays of equal length, with one array always having the element value >= to the corresponding (same index) element in the second array.
I am trying to visualize in a single graph:
i) difference between the corresponding elements,
ii) values of the corresponding elements in the two arrays.
I have tried plotting the CDF as below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
arr1 = np.random.uniform(1,20,[25,1])
arr2 = arr1 + np.random.uniform(1,10,[25,1])
df1 = pd.DataFrame(arr1)
df2 = pd.DataFrame(arr2)
fix, ax = plt.subplots()
sns.kdeplot(df1[0], cumulative=True, color='orange', label='arr1')
sns.kdeplot(df2[0], cumulative=True, color='b', label='arr2')
sns.kdeplot(df2[0]-df1[0], cumulative=True, color='r', label='difference')
plt.show()
which gives the following output:
However, it does not capture the difference, and values of the corresponding elements together. For example, suppose the difference between two elements is 3. The two numbers can be 2 and 5, but they can also be 15 and 18, and this can not be determined from the CDF.
Which kind of plotting can visualize both the difference between the elements and the values of the elements?
I do not wish to line plot as below because not much statistical insights can be derived from the visualization.
ax.plot(df1[0])
ax.plot(df2[0])
ax.plot(df2[0]-df1[0])
There are lots of ways to show difference between two values. It really depends on your goal for the chart, how quantitative or qualitative you want to be, or if you want to show the raw data somehow. Here are a few ideas that come to mind that do not involve simple line plots or density functions. I strongly recommend the book Better Data Visualization by Johnathan Schwabish. He discusses interesting considerations regarding data presentation.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
arr1 = np.random.uniform(1,20, size=25)
arr2 = arr1 + np.random.uniform(1,10, size=25)
df = pd.DataFrame({
'col1' : arr1,
'col2' : arr2
})
df['diff'] = df.col2 - df.col1
df['sum'] = df.col1 + df.col2
fig, axes = plt.subplots(ncols=2, nrows=3, figsize=(15,15))
axes = axes.flatten()
# Pyramid chart
df_sorted = df.sort_values(by='sum', ascending=True)
axes[0].barh(
y = np.arange(1,26),
width = -df_sorted.col1
)
axes[0].barh(
y = np.arange(1,26),
width = df_sorted.col2
)
# Style axes[0]
style_func(axes[0], 'Pyramid Chart')
# Dot Plot
axes[1].scatter(df.col1, np.arange(1, 26), label='col1')
axes[1].scatter(df.col2, np.arange(1, 26), label='col2')
axes[1].hlines(
y = np.arange(1, 26),
xmin = df.col1, xmax = df.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[1]
legend = axes[1].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[1], 'Dot Plot')
set_xlim = axes[1].set_xlim(0,25)
# Dot Plot 2
df_sorted = df.sort_values(by=['col1', 'diff'], ascending=False)
axes[2].scatter(df_sorted.col1, np.arange(1, 26), label='col1')
axes[2].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[2].hlines(
y = np.arange(1, 26),
xmin = df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[2]
legend = axes[2].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[2], 'Dot Plot')
set_xlim = axes[2].set_xlim(0,25)
# Dot Plot 3
df_sorted = df.sort_values(by='sum', ascending=True)
axes[3].scatter(-df_sorted.col1, np.arange(1, 26), label='col1')
axes[3].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[3].vlines(x=0, ymin=-1, ymax=27, linewidth=2.5, color='k')
axes[3].hlines(
y = np.arange(1, 26),
xmin = -df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=2
)
# Style axes[3]
legend = axes[3].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[3], 'Dot Plot')
# Strip plot
axes[4].scatter(df.col1, [4] * 25)
axes[4].scatter(df.col2, [6] * 25)
axes[4].set_ylim(0, 10)
axes[4].vlines(
x = [df.col1.mean(), df.col2.mean()],
ymin = [3.5, 5.5], ymax=[4.5,6.5],
color='black', linewidth =2
)
# Style axes[4]
axes[4].yaxis.set_major_locator(ticker.FixedLocator([4,6]))
axes[4].yaxis.set_major_formatter(ticker.FixedFormatter(['col1','col2']))
hide_spines = [axes[4].spines[x].set_visible(False) for x in ['left','top','right']]
set_title = axes[4].set_title('Strip Plot', fontweight='bold')
tick_params = axes[4].tick_params(axis='y', left=False)
grid = axes[4].grid(axis='y', dashes=(8,3), alpha=0.3, color='gray')
# Slope chart
for i in range(25):
axes[5].plot([0,1], [df.col1[i], df.col2[i]], color='k')
align = ['left', 'right']
for i in range(1,3):
axes[5].text(x = i - 1, y = 0, s = 'col' + str(i),
fontsize=14, fontweight='bold', ha=align[i-1])
set_title = axes[5].set_title('Slope chart', fontweight='bold')
axes[5].axis('off')
def style_func(ax, title):
hide_spines = [ax.spines[x].set_visible(False) for x in ['left','top','right']]
set_title = ax.set_title(title, fontweight='bold')
set_xlim = ax.set_xlim(-25,25)
x_locator = ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
y_locator = ax.yaxis.set_major_locator(ticker.FixedLocator(np.arange(1,26, 2)))
spine_width = ax.spines['bottom'].set_linewidth(1.5)
x_tick_params = ax.tick_params(axis='x', length=8, width=1.5)
x_tick_params = ax.tick_params(axis='y', left=False)
What about a parallel coordinates plot with plotly? This will allow to see the distinct values of each original array but then also if they converge on the same diffrence?
https://plot.ly/python/parallel-coordinates-plot/