I have three years of data that I am plotting into a bar graph.
How do I separate the bars, space them apart, so that they are not overlapping?
I am struggling using the ax ticks.
New to python.
import matplotlib.pyplot as plt
import numpy as np
success2019 = np.array([30.3,42.3,34.1,36.1,30.5,28,32,51.9])
units2019 = np.array([30,'30A',37,'37A',49,50,51,58])
success2018 = np.array([30.4,34.4,43,47.5])
units2018 = np.array([46,30,'68A','30A'])
success2017 = np.array([33.2,29.1,22,62.2,22.3,36.9])
units2017 = np.array([72,74,78,'30A','32A','37A'])
pltone = plt.bar(units2019, success2019, color = 'blue', label = '2019')
plttwo = plt.bar(units2018, success2018, color = 'purple', label = '2018')
pltthree = plt.bar(units2017, success2017, color = 'green',label = '2017')
ax19 = plt.subplot()
ax19.set_xticks(range(len(success2019)))
ax19.set_xticklabels(units2019)
ax18 = plt.subplot()
ax18.set_xticks(range(len(success2018)))
ax18.set_xticklabels(units2018)
ax17 = plt.subplot()
ax17.set_xticks(range(len(success2017)))
ax17.set_xticklabels(units2017)
plt.title('Hunt unit vs Success Rates 2017-2019')
plt.legend(loc="upper right")
plt.ylabel('Success Rate %')
plt.xlabel('Hunting Units')
plt.legend (['2019', '2018', '2017'])
plt.show()
Your code looks OK for me: All bars are separated by spaces and they are automatically ordered.
However, if you like to create separate subplots, use this snippet:
fig, axs = plt.subplots(1,3)
axs[0].bar(units2019, success2019, color = 'blue', label = '2019')
axs[1].bar(units2018, success2018, color = 'purple', label = '2018')
axs[2].bar(units2017, success2017, color = 'green',label = '2017')
If you want to place them in groups next to each other in a single axis, go with:
fig, ax = plt.subplots()
# create new x-axis points
x2017 = np.linspace(0,len(units2017)-1,len(units2017))
# plot
ax.bar(x2017, success2017, color = 'green',label = '2017')
x2018 = np.linspace(0,len(units2018)-1,len(units2018)) + x2017[-1]+1
ax.bar(x2018, success2018, color = 'purple', label = '2018')
x2019 = np.linspace(0,len(units2019)-1,len(units2019)) + x2018[-1]+1
ax.bar(x2019, success2019, color = 'blue', label = '2019')
# set ticks + labels
ax.set_xticks( np.concatenate((x2017,x2018,x2019)) )
ax.set_xticklabels( np.concatenate((units2017,units2018,units2019)) )
You have full control over the width of the bars with the optional argument width=
fig, ax = plt.subplots()
x2017 = np.linspace(0,len(units2017)-1,len(units2017))
ax.bar(x2017, success2017, color = 'green',label = '2017', width=1)
x2018 = np.linspace(0,len(units2018)-1,len(units2018)) + x2017[-1]+1
ax.bar(x2018, success2018, color = 'purple', label = '2018') # default width=0.8
x2019 = np.linspace(0,len(units2019)-1,len(units2019)) + x2018[-1]+1
ax.bar(x2019, success2019, color = 'blue', label = '2019', width = 0.4)
ax.set_xticks( np.concatenate((x2017,x2018,x2019)) )
ax.set_xticklabels( np.concatenate((units2017,units2018,units2019)) )
Combine the data by year and then use a pivot to transform the columns into yearly data. Create a bar chart with a pandas plot of that transformed data.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
success2019 = np.array([30.3,42.3,34.1,36.1,30.5,28,32,51.9])
units2019 = np.array([30,'30A',37,'37A',49,50,51,58])
success2018 = np.array([30.4,34.4,43,47.5])
units2018 = np.array([46,30,'68A','30A'])
success2017 = np.array([33.2,29.1,22,62.2,22.3,36.9])
units2017 = np.array([72,74,78,'30A','32A','37A'])
df19 = pd.DataFrame({'index':units2019,'year':[2019]*len(success2019),'success':success2019})
df18 = pd.DataFrame({'index':units2018,'year':[2018]*len(success2018),'success':success2018})
df17 = pd.DataFrame({'index':units2017,'year':[2017]*len(success2017),'success':success2017})
ax = dfs.pivot('index', 'year','success').plot.bar()
ax.set_title('Hunt unit vs Success Rates 2017-2019')
ax.set_ylabel('Success Rate %')
ax.set_xlabel('Hunting Units')
Related
I am trying to build this type of chart: a mix between a line chart and a stacked area chart using Matplotlib and seaborn. I just want the white area below to be fully transparent. I tried changing the alpha parameter but it does not make the area transparent, just white at best. I am using the below code:
plt.plot(df.index,"5y Avg",data=df,
color=avg_color,
linestyle="dotted",
label= '5y Avg')
plt.stackplot(df.index,df["5Y Max"],color="#B1B3B6",labels= ['5y Range'])
plt.stackplot(df_test.index,df["5Y Min"],color="white",alpha=1)
You can get the effect you want simply by changing the approach to the problem: in place of making transparent the area of the bottom stackplot, you can color only the portion of the graph you want with matplotlib.axes.Axes.fill_between:
ax.fill_between(x = df.index, y1 = df['5Y Min'], y2 = df['5Y Max'], color = '#B1B3B6', label = '5y Range')
Complete Code
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame()
df['index'] = np.arange(1, 53 + 1, 1)
df['5y Avg'] = 2000/53*df['index'] + 100*np.random.rand(len(df))
df['5Y Max'] = 3200/53*df['index'] + 100*np.random.rand(len(df))
df['5Y Min'] = 1000/53*df['index'] + 100*np.random.rand(len(df))
avg_color = '#45A1A2'
df = df.set_index('index')
plt.style.use('seaborn-whitegrid')
fig, ax = plt.subplots()
ax.plot(df.index, df['5y Avg'],
color = avg_color,
linestyle = 'dotted',
label = '5y Avg')
ax.fill_between(x = df.index, y1 = df['5Y Min'], y2 = df['5Y Max'], color = '#B1B3B6', label = '5y Range')
ax.legend(frameon = True)
plt.show()
Plot
For matplotlib, I used this code to change a default color cycle setting, so that I could plot multiple lines with colors in this cycle.
n = 24
color = plt.cm.viridis(np.linspace(0, 1,n))
mpl.rcParams['axes.prop_cycle'] = cycler.cycler('color', color)
for i in range(30):
plt.plot(x,y,data[data["col1"]==i])
plt.show()
How can I do this for plotly as well?
fig = px.line(data[data["col1"]==i],x,y)
for i in range(30):
fig.add_scatter(x,y,data[data["col1"]==i],mode="line")
I often use from itertools import cycle and next(<list of colors>) where <list of colors> could be any sequence of colors, like px.colors.qualitative.Alphabet.
Here's a setup that comes close to what you're looking for:
fig = go.Figure()
for i in range(lines):
color = next(col_cycle)
fig.add_scatter(x = np.arange(0,rows),
y = np.random.randint(-5, 6, size=rows).cumsum(),
mode="lines",
line_color = color,
name = color
)
Plot
Complete code:
from itertools import cycle
import numpy as np
col_cycle = cycle(px.colors.qualitative.Alphabet)
rows = 10
lines = 30
fig = go.Figure()
for i in range(lines):
color = next(col_cycle)
fig.add_scatter(x = np.arange(0,rows),
y = np.random.randint(-5, 6, size=rows).cumsum(),
mode="lines",
line_color = color,
name = color
)
fig.show()
I have got a matplotlib question about xticks. I wanted to hide all those values that do not occur. I actually did it, but for the second set of values (red chart). I found how to hide for a specific data frame but not for 2 or more.
This is my code:
plt.subplots(figsize=(2, 1), dpi=400)
width = 0.005
xlim = np.arange(0, 1, 0.01)
ylim = np.arange(0, 0.1, 0.001)
plt.xticks(density_2.index.unique(), rotation=90, fontsize=1.5)
plt.yticks(density_2.unique(), fontsize=2)
plt.bar(density_1.index, density_1, width, color='Green', label=condition_1,alpha=0.5)
plt.bar(density_2.index, density_2, width, color='Red', label=condition_2,alpha=0.5)
plt.legend(loc="upper right", fontsize=2)
plt.show()
Link where I saw the solution: show dates in xticks only where value exist in plot chart and hide unnecessary interpolated xtick labels
Thank you very much in advance!
You need to find the intersection of the two lists of density_1's and density_2's ticks, as reported here.
Working example:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
N = 150
values_1 = np.random.randint(low = 5, high = 75, size = N)/100
density_1 = pd.DataFrame({'density_1': values_1})
density_1 = density_1.value_counts().sort_index(ascending = True)
density_1.index = sorted(list(set(values_1)), reverse = False)
values_2 = np.random.randint(low = 35, high = 100, size = N)/100
density_2 = pd.DataFrame({'density_2': values_2})
density_2 = density_2.value_counts().sort_index(ascending = True)
density_2.index = sorted(list(set(values_2)), reverse = False)
width = 0.005
condition_1 = 'Adele'
condition_2 = 'Extremoduro'
fig, ax = plt.subplots(figsize = (10, 5))
ax.bar(density_1.index, density_1, width, color = 'Green', label = condition_1, alpha = 0.5)
ax.bar(density_2.index, density_2, width, color = 'Red', label = condition_2, alpha = 0.5)
ax.legend(loc = 'upper right')
ax.set_xticks(list(set(density_1.index.unique()) & set(density_2.index.unique())), rotation = 90)
plt.show()
In the line:
list(set(density_1.index.unique()) & set(density_2.index.unique()))
you can select ticks which blongs to both density_1 and density_2.
Zoom in:
plt.figure(figsize = (12, 8))
sns.set(style = 'dark', palette = 'colorblind', color_codes = True)
ax = sns.countplot('Position', data = data, color = 'orange')
ax.set_xlabel(xlabel = 'Different Positions in Football', fontsize = 16)
ax.set_ylabel(ylabel = 'Number of of Players', fontsize = 16)
ax.set_title(label = 'Comparison of Positions and Players', fontsize = 20)
plt.show()
After excuting this code the labels get Overlapped
Is there any way to rotate the image to prevent overlapping?
Insted of using
ax = sns.countplot('Position', data = data, color = 'orange')
Where 'Position' = x, try to use 'Position'=y, just like that:
ax = sns.countplot(y='Position', data = data, color = 'orange')
The rest of the code remains the same
I'm struggling to adjust my plot legend after adding the axline/ hline on 100 level in the graph.(screenshot added)
if there's a way to run this correctly so no information will be lost in legend, and maybe add another hline and adding it to the legend.
adding the code here, maybe i'm not writing it properly.
fig, ax1 = plt.subplots(figsize = (9,6),sharex=True)
BundleFc_Outcome['Spend'].plot(kind = 'bar',color = 'blue',width = 0.4, ax = ax1,position = 1)
#
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('SPEND', color='b', size = 18)
ax1.set_xlabel('Bundle FC',color='w',size = 18)
ax2 = ax1.twinx()
ax2.set_ylabel('ROAS', color='r',size = 18)
ax1.tick_params(axis='x', colors='w',size = 20)
ax2.tick_params(axis = 'y', colors='w',size = 20)
ax1.tick_params(axis = 'y', colors='w',size = 20)
#ax1.text()
#
ax2.axhline(100)
BundleFc_Outcome['ROAS'].plot(kind = 'bar',color = 'red',width = 0.4, ax = ax2,position = 0.25)
plt.grid()
#ax2.set_ylim(0, 4000)
ax2.set_ylim(0,300)
plt.title('ROAS & SPEND By Bundle FC',color = 'w',size= 20)
plt.legend([ax2,ax1],labels = ['SPEND','ROAS'],loc = 0)
The code gives me the following picture:
After implementing the suggestion in the comments, the picture looks like this (does not solve the problem):
You can use bbox_to_anchor attribute to set legend location manually.
ax1.legend([ax1],labels = ['SPEND'],loc='upper right', bbox_to_anchor=(1.25,0.70))
plt.legend([ax2,ax1],labels = ['SPEND','ROAS'],loc='upper right', bbox_to_anchor=(1.25,0.70))
https://matplotlib.org/users/legend_guide.html#legend-location
So finally figured it out , was simpler for a some reason
Even managed to add another threshold at level 2 for minimum spend.
fig, ax1 = plt.subplots(figsize = (9,6),sharex=True)
BundleFc_Outcome['Spend'].plot(kind = 'bar',color = 'blue',width = 0.4, ax = ax1,position = 1)
#
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('SPEND', color='b', size = 18)
ax1.set_xlabel('Region',color='w',size = 18)
ax2 = ax1.twinx()
ax2.set_ylabel('ROAS', color='r',size = 18)
ax1.tick_params(axis='x', colors='w',size = 20)
ax2.tick_params(axis = 'y', colors='w',size = 20)
ax1.tick_params(axis = 'y', colors='w',size = 20)
#ax1.text()
#
BundleFc_Outcome['ROAS'].plot(kind = 'bar',color = 'red',width = 0.4, ax = ax2,position = 0.25)
plt.grid()
#ax2.set_ylim(0, 4000)
ax2.set_ylim(0,300)
plt.title('ROAS & SPEND By Region',color = 'w',size= 20)
fig.legend([ax2,ax1],labels = ['SPEND','ROAS'],loc = 0)
plt.hlines([100,20],xmin = 0,xmax = 8,color= ['r','b'])
I don't recommend using the builtin functions of pandas to do more complex plotting. Also when asking a question it is common courtesy to provide a minimal and verifiable example (see here). I took the liberty to simulate your problem.
Due to the change in axes, we need to generate our own legend. First the results:
Which can be achieved with:
import matplotlib.pyplot as plt, pandas as pd, numpy as np
# generate dummy data.
X = np.random.rand(10, 2)
X[:,1] *= 1000
x = np.arange(X.shape[0]) * 2 # xticks
df = pd.DataFrame(X, columns = 'Spend Roast'.split())
# end dummy data
fig, ax1 = plt.subplots(figsize = (9,6),sharex=True)
ax2 = ax1.twinx()
# tmp axes
axes = [ax1, ax2] # setup axes
colors = plt.cm.tab20(x)
width = .5 # bar width
# generate dummy legend
elements = []
# plot data
for idx, col in enumerate(df.columns):
tax = axes[idx]
tax.bar(x + idx * width, df[col], label = col, width = width, color = colors[idx])
element = tax.Line2D([0], [0], color = colors[idx], label = col) # setup dummy label
elements.append(element)
# desired hline
tax.axhline(200, color = 'red')
tax.set(xlabel = 'Bundle FC', ylabel = 'ROAST')
axes[0].set_ylabel('SPEND')
tax.legend(handles = elements)