I am working with Matplotlib and trying to plot a combo box with bars and lines. Below you can see my data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
from matplotlib.ticker import FormatStrFormatter
# Data
data = {
'Year': ['2010','2011','2012','2013','2014','2015','2016','2017','2018','2019'],
'Rate':[10,10,9,7,5,5,5,5,5,5],
'ChangeRate_1':[7,-50,24,150,8,10,60,5,180,5],
'ChangeRate_2':[7,6,-3,1,8,5,8,5,15,5],
}
df = pd.DataFrame(data, columns = ['Year',
'Rate',
'ChangeRate_1',
'ChangeRate_2'
])
df
Below you can see code :
# Ploting combo plot
fig, ax_1 = plt.subplots(figsize = (8, 5))
ax_2 = ax_1.twinx()
ax_3 = ax_2.twinx() ### <---- Problem is probably here
cmap = get_cmap('tab10')
ax_1.bar(df['Year'], df['Rate'], label = 'Rate', color = cmap(0))
ax_2.plot(df['Year'], df['ChangeRate_1'], label = 'ChangeRate_2', color = cmap(0.1),linewidth = '3.5')
ax_3.plot(df['Year'], df['ChangeRate_2'], label = 'ChangeRate_2', color = cmap(0.2),linewidth = '3.5')
handles_1, labels_1 = ax_1.get_legend_handles_labels()
handles_2, labels_2 = ax_2.get_legend_handles_labels()
handles_3, labels_3 = ax_3.get_legend_handles_labels()
ax_1.set_ylim(0, 16)
ax_2.set_ylim(-50,180)
ax_1.legend(handles = handles_1 + handles_2 + labels_3,
labels = labels_1 + labels_2 + labels_3,
loc = 'upper right',
shadow = True)
ax_1.grid(axis = 'y')
ax_1.set_title('Comparison of revenues',fontsize=11)
ax_1.set_ylabel('Rate')
ax_2.set_ylabel('ChangeRate_1')
ax_3.set_ylabel('ChangeRate_2')
ax_1.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))
plt.savefig('ComparisonOfRevenues.pdf')
plt.show()
The above code produces a plot that is shown below.
As shown in the above plot, values for the y-axis for the left and for the right side overlap with values and are not readable.
For the left side, the scale for the 'Rate' should be in the range of 0 to 16, while for the right side, for ChangeRate_1 and ChangeRate_2, from -50 to 180.
So can anybody help me how to solve this problem ?
The instantiation of the third Axes object with ax_3 = ax_2.twinx() can be circumvented by using just one extra y-axis on the right and plotting ChangeRate_1 and ChangeRate_2 on that axis keeping the (right) y-axis label as ChangeRate and then assigning correct labels to the lines.
Code:
fig, ax_1 = plt.subplots(figsize=(8, 5))
ax_2 = ax_1.twinx()
cmap = get_cmap('tab10')
ax_1.bar(df['Year'], df['Rate'], label='Rate', color=cmap(0))
ax_2.plot(df['Year'], df['ChangeRate_1'], label='ChangeRate_1', color=cmap(0.1), linewidth='3.5')
ax_2.plot(df['Year'], df['ChangeRate_2'], label='ChangeRate_2', color=cmap(0.2), linewidth='3.5')
handles_1, labels_1 = ax_1.get_legend_handles_labels()
handles_2, labels_2 = ax_2.get_legend_handles_labels()
ax_1.set_ylim(0, 16)
ax_2.set_ylim(-50,180)
ax_1.legend(handles=handles_1 + handles_2, labels=labels_1 + labels_2,
loc='upper right', shadow=True)
ax_1.grid(axis='y')
ax_1.set_title('Comparison of revenues',fontsize=11)
ax_1.set_ylabel('Rate')
ax_2.set_ylabel('ChangeRate')
ax_1.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))
plt.show()
Related
I am trying to build this type of chart: a mix between a line chart and a stacked area chart using Matplotlib and seaborn. I just want the white area below to be fully transparent. I tried changing the alpha parameter but it does not make the area transparent, just white at best. I am using the below code:
plt.plot(df.index,"5y Avg",data=df,
color=avg_color,
linestyle="dotted",
label= '5y Avg')
plt.stackplot(df.index,df["5Y Max"],color="#B1B3B6",labels= ['5y Range'])
plt.stackplot(df_test.index,df["5Y Min"],color="white",alpha=1)
You can get the effect you want simply by changing the approach to the problem: in place of making transparent the area of the bottom stackplot, you can color only the portion of the graph you want with matplotlib.axes.Axes.fill_between:
ax.fill_between(x = df.index, y1 = df['5Y Min'], y2 = df['5Y Max'], color = '#B1B3B6', label = '5y Range')
Complete Code
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame()
df['index'] = np.arange(1, 53 + 1, 1)
df['5y Avg'] = 2000/53*df['index'] + 100*np.random.rand(len(df))
df['5Y Max'] = 3200/53*df['index'] + 100*np.random.rand(len(df))
df['5Y Min'] = 1000/53*df['index'] + 100*np.random.rand(len(df))
avg_color = '#45A1A2'
df = df.set_index('index')
plt.style.use('seaborn-whitegrid')
fig, ax = plt.subplots()
ax.plot(df.index, df['5y Avg'],
color = avg_color,
linestyle = 'dotted',
label = '5y Avg')
ax.fill_between(x = df.index, y1 = df['5Y Min'], y2 = df['5Y Max'], color = '#B1B3B6', label = '5y Range')
ax.legend(frameon = True)
plt.show()
Plot
[UPDATE: Sorry for not providing the piece where the author of the codes create example data. I have updated the codes]
I found an example of a 3D mesh line chart that satisfied what I need (colouring change with level on z dimension). However, instead of line, I want surface plot. How can I change the codes to have the 3d surface plot?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation, rc
from matplotlib.cm import get_cmap
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.font_manager import FontProperties
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d.art3d import Line3DCollection
index_returns = np.random.normal(loc=1e-4, scale=5e-3, size=(783, 9))
index_returns = np.vstack((np.zeros(shape=(1, 9)) + 100, index_returns))
index_prices = np.cumprod(1 + index_returns, axis=0)
window = 261
df = np.zeros(shape=(index_prices.shape[0]-window, 9))
for i in range(window, index_prices.shape[0], 1):
df[i-window] = (index_prices[i]/index_prices[i-window]) - 1
index = pd.date_range('2019-01-01', periods=index_prices.shape[0]-window, freq='B')
columns = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
df = pd.DataFrame(df, index=index, columns=columns)
# create the figure
fig = plt.figure(figsize=(14.4, 9))
ax = fig.add_subplot(111, projection='3d')
fig.patch.set_alpha(1)
# get the cmap to use
cmap = get_cmap('RdYlGn')
# get the slice based on data frame
current_slice = df.values[:261, :]
index_names = df.columns
index_dates = df.index
# list holding the lines
lines = []
# for each index...
for i in range(current_slice.shape[1]):
# get the coordinates
x = np.array(np.arange(current_slice.shape[0]))
y = np.tile(i, current_slice.shape[0])
z = np.array(current_slice[:, i])
# crete points and segments to color
points = np.array([x, y, z]).T.reshape(-1, 1, 3)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# Create a continuous norm to map from data points to colors
norm = plt.Normalize(-0.19, 0.19)
lc = Line3DCollection(segments, cmap=cmap, norm=norm, zorder=current_slice.shape[1]-i)
# Set the values used for colormapping
lc.set_array(z)
lc.set_linewidth(2)
lc.set_color(cmap(z[-1] * 2.5 + 0.5))
lc.set_label(index_names[i])
lines.append(ax.add_collection(lc))
# add the grids
ax.legend(loc='center right', bbox_to_anchor=(1.1, 0.46), fancybox=True, facecolor=(.95,.95,.95,1), framealpha=1, shadow=False, frameon=True, ncol=1, columnspacing=0, prop={'family': 'DejaVu Sans Mono'})
ax.set_zlabel('Rolling Equity 1Y', labelpad=10)
ax.set_zlim(-0.39, 0.39)
ax.set_zticklabels([' '* 3 + '{:.0%}'.format(val) for val in ax.get_zticks()], fontdict={'verticalalignment': 'center', 'horizontalalignment': 'center'})
ax.set_xlabel('Date', labelpad=30)
ax.set_xlim(0, current_slice.shape[0]-1)
ax.set_xticklabels([index_dates[int(val)].strftime('%m/%y') for val in ax.get_xticks()[:-1]] + [''], rotation=0, fontdict={'verticalalignment': 'top', 'horizontalalignment': 'center'})
ax.set_yticks(np.arange(current_slice.shape[1]))
ax.set_yticklabels([index_names[i] for i in range(current_slice.shape[1])], rotation=-15, fontdict={'verticalalignment': 'center', 'horizontalalignment': 'left'})
# show the plot
plt.show()
I try to produce a plot and want to automatically add text (in this case is percentage) to each circle in correspond to each y axis types. Any help would be very helpful.
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.5, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=30, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
You can use matplotlib.axes.Axes.text:
x_space = 0.4
y_space = 0.05
fontsize = 7
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val}%', fontsize = fontsize)
You have to adjust x_space, y_space and fontsize in order to fit properly the text within the circles.
Complete code
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.5, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=30, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
x_space = 0.4
y_space = 0.05
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val:>5.2f}%', fontsize = 7)
plt.show()
Same code as above, but with increased circle radius and font, in order to improve readability.
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.85, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=50, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
ax.set_ylim(0, len(value) + 1)
x_space = 0.75
y_space = 0.06
fontsize = 12
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val:>5.2f}%', fontsize = fontsize)
plt.show()
Even better, you can use matplotlib.axes.Axes.annotate to get rid of x_space and y_space:
fontsize = 12
for y_i, x_i in enumerate(value, 1):
ax.annotate(f'{x_i:>5.2f}%', xy = (x_i, y_i), xytext = (0, 0), textcoords = 'offset points', ha = 'center', va = 'center', fontsize = fontsize)
You still have to adjust the fontsize to properly fit the radius of the circles.
I have got a matplotlib question about xticks. I wanted to hide all those values that do not occur. I actually did it, but for the second set of values (red chart). I found how to hide for a specific data frame but not for 2 or more.
This is my code:
plt.subplots(figsize=(2, 1), dpi=400)
width = 0.005
xlim = np.arange(0, 1, 0.01)
ylim = np.arange(0, 0.1, 0.001)
plt.xticks(density_2.index.unique(), rotation=90, fontsize=1.5)
plt.yticks(density_2.unique(), fontsize=2)
plt.bar(density_1.index, density_1, width, color='Green', label=condition_1,alpha=0.5)
plt.bar(density_2.index, density_2, width, color='Red', label=condition_2,alpha=0.5)
plt.legend(loc="upper right", fontsize=2)
plt.show()
Link where I saw the solution: show dates in xticks only where value exist in plot chart and hide unnecessary interpolated xtick labels
Thank you very much in advance!
You need to find the intersection of the two lists of density_1's and density_2's ticks, as reported here.
Working example:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
N = 150
values_1 = np.random.randint(low = 5, high = 75, size = N)/100
density_1 = pd.DataFrame({'density_1': values_1})
density_1 = density_1.value_counts().sort_index(ascending = True)
density_1.index = sorted(list(set(values_1)), reverse = False)
values_2 = np.random.randint(low = 35, high = 100, size = N)/100
density_2 = pd.DataFrame({'density_2': values_2})
density_2 = density_2.value_counts().sort_index(ascending = True)
density_2.index = sorted(list(set(values_2)), reverse = False)
width = 0.005
condition_1 = 'Adele'
condition_2 = 'Extremoduro'
fig, ax = plt.subplots(figsize = (10, 5))
ax.bar(density_1.index, density_1, width, color = 'Green', label = condition_1, alpha = 0.5)
ax.bar(density_2.index, density_2, width, color = 'Red', label = condition_2, alpha = 0.5)
ax.legend(loc = 'upper right')
ax.set_xticks(list(set(density_1.index.unique()) & set(density_2.index.unique())), rotation = 90)
plt.show()
In the line:
list(set(density_1.index.unique()) & set(density_2.index.unique()))
you can select ticks which blongs to both density_1 and density_2.
Zoom in:
I'm triying to make a figure where the stem plot has the baseline on the data of dataframe_3_merged['TOTAL'].
import numpy as np
from eurostatapiclient import EurostatAPIClient
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import pandas as pd
#Set versions and formats, so far only the ones used here are availeable and call client
VERSION = 'v2.1'
FORMAT = 'json'
LANGUAGE = 'en'
client = EurostatAPIClient(VERSION, FORMAT, LANGUAGE)
dataframe_3_query_total = 'ilc_peps01?precision=1&sex=T&geo=AT&geo=BE&geo=BG&geo=CH&geo=CY&geo=CZ&geo=DK&geo=EA19&geo=EE&geo=EL&geo=ES&geo=EU28&geo=FI&geo=FR&geo=HR&geo=HU&geo=IE&geo=IS&geo=IT&geo=LT&geo=LU&geo=LV&geo=ME&geo=MK&geo=MT&geo=NL&geo=NO&geo=PL&geo=PT&geo=RO&geo=RS&geo=SE&geo=SI&geo=SK&geo=TR&geo=UK&unit=PC&unitLabel=label&time=2018&age=TOTAL'
dataframe_3_query_urb = 'ilc_peps13?precision=1°_urb=DEG1°_urb=DEG2°_urb=DEG3&geo=AT&geo=BE&geo=BG&geo=CH&geo=CY&geo=CZ&geo=DE&geo=DK&geo=EA19&geo=EE&geo=EL&geo=ES&geo=EU28&geo=FI&geo=FR&geo=HR&geo=HU&geo=IE&geo=IS&geo=IT&geo=LT&geo=LU&geo=LV&geo=MK&geo=MT&geo=NL&geo=NO&geo=PL&geo=PT&geo=RO&geo=RS&geo=SE&geo=SI&geo=SK&geo=UK&unit=PC&unitLabel=label&time=2018'
dataframe_3_total = client.get_dataset(dataframe_3_query_total).to_dataframe().pivot(index = 'geo',columns = 'age',values = 'values')
dataframe_3_urb =client.get_dataset(dataframe_3_query_urb).to_dataframe().pivot(index = 'geo',columns = 'deg_urb',values = 'values')
dataframe_3_merged = dataframe_3_total.join(dataframe_3_urb).dropna()
fig, ax = plt.subplots(figsize=(15, 4))
plt.ylim(0,51)
x = range(0,32,1)
stem_1 =plt.stem(x,dataframe_3_merged['DEG1'])
stem_2=plt.stem(x, dataframe_3_merged['DEG2'])
stem_3=plt.stem(x, dataframe_3_merged['DEG3'])
plt.setp(stem_2, color = 'r')
plt.setp(stem_3, color = 'g')
scatterplot= sns.scatterplot(x=dataframe_3_merged.index, #We draw the scatterplot and specify the arguments
y = dataframe_3_merged['TOTAL'],
ax=ax ,
s = 100 ,
legend = False,
marker="_",
color = 'b')
The goal is to have a plot similar to this image:
I tried to use the list dataframe_3_merged['TOTAL'] as the parameter in the bottom argument of plt.stem but I have this traceback: ValueError: setting an array element with a sequence.
Thank you for your help!
You could replace each stem plot by a scatter plot and a plot of vertical lines (plt.vlines). Setting the zorder=0 ensures the lines are drawn behind the dots.
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
names = ['hydrogen', 'helium', 'lithium', 'beryllium', 'boron', 'carbon', 'nitrogen', 'oxygen', 'fluorine', 'neon', 'sodium', 'magnesium', 'aluminium', 'silicon', 'phosphorus', 'sulphur', 'chlorine', 'argon', 'potassium', 'calcium', 'scandium', 'titanium', 'vanadium', 'chromium', 'manganese', 'iron', 'cobalt', 'nickel', 'copper', 'zinc', 'gallium', 'germanium', 'arsenic', 'selenium', 'bromine', 'krypton']
N = len(names)
df = pd.DataFrame({'Deg1': 35 + np.random.normal(size=N).cumsum(),
'Deg2': 25 + np.random.normal(size=N).cumsum(),
'Deg3': 15 + np.random.normal(size=N).cumsum()},
index=names)
df['Total'] = df.mean(axis=1)
for deg, color, label in zip(['Deg1', 'Deg2', 'Deg3'], ['tomato', 'orange', 'palegreen'],
['label1', 'label2', 'label3']):
plt.vlines(df.index, df[deg], df['Total'], lw=0.2, color='k', zorder=0)
plt.scatter(df.index, df[deg], marker='o', color=color, label=label)
plt.scatter(df.index, df['Total'], marker='_', color='deepskyblue', s=100)
plt.xticks(rotation='vertical')
plt.ylim(0, 51)
plt.margins(x=0.02)
plt.legend(ncol=3, bbox_to_anchor=(0.5, -0.4), loc='upper center')
plt.grid(True, axis='y')
plt.tick_params(length=0)
for where in ['top', 'left', 'right']:
plt.gca().spines[where].set_visible(False)
plt.tight_layout()
plt.show()