i have a little problem with matplotlib and python. So my problem is the line don't appear in the plot. I am trying to make a graph of a custom function. My code is here bellow:
fig, ax = plt.subplots(figsize=(8,4))
# Define the x axis values:
x = np.linspace(2000,32000)
# Creating the functions that we will plot
def pmgc(x):
return 0.853
def pmec(x):
return (-124.84/(x)) + pmgc(x)
for x in range(2000,32000):
pmgc(x)
pmec(x)
#Plotting
ax.plot(x,pmgc(x), color = 'blue',linewidth = 3)
ax.plot(x,pmec(x), color = 'red',linewidth = 3)
plt.rcParams["figure.autolayout"] = True
ax.set_xlabel("Renda")
plt.legend(labels = ['Propensão Marginal a Cosumir','Propensão Média a Cosumir'],loc = 'upper left', borderaxespad = 0,bbox_to_anchor=(1.02, 1))
plt.title('Gráfico da Questão 6, item c\nFeito por Luiz Mario. Fonte: Autor', loc='center')
Everytime that i run the code the graph appears without the lines. Please could someone can help me ?
Thank you for the attention :)
A few things. You are defining x as np.linspace(2000,32000) so use another variable in your for loop instead (such as i). Then, you want to create empty lists for your pmgc and pmec values to append to in your for loop. Lastly, you don't want to do for x in range(2000,32000): you want to do for i in np.linspace(2000, 32000): to match the length of your x list. But you've already defined np.linspace(2000, 32000) above in your code when you set x equal to it. So just do for i in x:. Put it all together, and you get your lines:
fig, ax = plt.subplots(figsize=(8,4))
# Define the x axis values:
x = np.linspace(2000,32000)
# Creating the functions that we will plot
def pmgc(x):
return 0.853
def pmec(x):
return (-124.84/(x)) + pmgc(x)
pmgc_list = []
pmec_list = []
for i in x:
pmgc_list.append(pmgc(i))
pmec_list.append(pmec(i))
#Plotting
ax.plot(x,pmgc_list, color = 'blue',linewidth = 3)
ax.plot(x,pmec_list, color = 'red',linewidth = 3)
plt.rcParams["figure.autolayout"] = True
ax.set_xlabel("Renda")
plt.legend(labels = ['Propensão Marginal a Cosumir','Propensão Média a Cosumir'],loc = 'upper left', borderaxespad = 0,bbox_to_anchor=(1.02, 1))
plt.title('Gráfico da Questão 6, item c\nFeito por Luiz Mario. Fonte: Autor', loc='center')
Output:
You can create two lists that contain the info this way
# Define the x axis values:
x = np.linspace(2000,32000)
# Creating the functions that we will plot
# create three empty sets
x_list = []
y_list1 = []
y_list2 = []
def pmgc(x):
return 0.853
def pmec(x):
return (-124.84/(x)) + pmgc(x)
for x in range(2000,32000):
# fill in the sets
x_list.append(x)
y_list1.append(pmgc(x))
y_list2.append(pmec(x))
#Plotting
# add x_list and y_list respectively
ax.plot(x_list,y_list1, color = 'blue',linewidth = 3)
ax.plot(x_list,y_list2, color = 'red',linewidth = 3)
plt.rcParams["figure.autolayout"] = True
ax.set_xlabel("Renda")
plt.legend(labels = ['Propensão Marginal a Cosumir','Propensão Média a Cosumir'],loc = 'upper left', borderaxespad = 0,bbox_to_anchor=(1.02, 1))
plt.title('Gráfico da Questão 6, item c\nFeito por Luiz Mario. Fonte: Autor', loc='center')
plt.show()
this might not be the best way to do it, but it will work.
Related
I am trying to create multiple bar charts automatically in a loop using a subplot.
I have created a function to create the parameters for the plot according to how many plots I need like so:
def create_parameters(parameters):
exec("def f_create_parameters({}): pass".format(', '.join(parameters)))
return locals()['f_create_parameters']
and the code that uses the function:
parList = []
names = []
even = 2
odd = 1
for i in range (0, len(listOfCategoriesEN)*2):
parList.append(create_parameters(["ax"+str(odd),"ax"+str(even)]))
names.append("ax"+str(odd))
names.append("ax"+str(even))
odd+=2
even+=2
Then this is the code where I am trying to create a single figure with multiple plots. I am getting all the plots overlayed on the last bar graph. Any idea how to fix it:
val = 0
fig2, (parList) = plt.subplots(len(listOfCategoriesEN)*2,2,figsize=(20,20))
for name,dict_ in categoriesDict.items():
df = pd.DataFrame.from_dict(dict_, orient='index', columns=["Title", "Pageviews"])
df = df.sort_values(by=['Pageviews'], ascending=False)
df[ "Pageviews"] = df[ "Pageviews"].astype(int)
#get top 5
df1 = df.head(5)
df1 = df1.sort_values(by=['Pageviews'], ascending=True)
df1['Title'] = df1['Title'].str.replace('’','\'')
if(not df1.empty):
x = df1['Title']
y = df1['Pageviews']
locals()[names[val]].barh(x, y, color=colours)
#locals()[names[val]].set_title(name+" TOP 5 PAGES")
val+=1
plt.show()
parList is a list of all your subplots.
By using plt.sca(ax) (sca = set current axis) you select the active axis to be ax and then plot your data:
val = 0
for name,dict_ in categoriesDict.items():
# do your data stuff
if(not df1.empty):
x = df1['Title']
y = df1['Pageviews']
ax = parList[val//2, val%2] # needs to be changed if you rearrange your plots
plt.sca(ax)
locals()[names[val]].barh(x, y, color=colours)
#locals()[names[val]].set_title(name+" TOP 5 PAGES")
val+=1
Hi I'm new to python and would like to plot the names of the footballers on my scatterplot as labels if their Goals or npxG are greater than the average values i have calculated.
I wondered whether I could use a for/while loop to go through the data and plot the relevant players names?
I've struggled to figure out the most efficient way for this to be done.
Please see the scatter plot and code below for additional context. Any help would be greatly appreciated, Thanks.
df = pd.read_csv('C:/Users/alexo/Documents/Data/football data/shooting_top5_leagues_21_22.csv',encoding = 'ISO-8859-1')
striker_df = df.loc[(df['Pos']=='FW') & (df['90s']>= 15)]
sns.set_style('darkgrid')
sns.set(rc = {'figure.figsize':(15,8)})
graph = sns.scatterplot(striker_df.Gls,striker_df.npxG_p90,hue=striker_df.League,size=striker_df.npxG_pSh,edgecolor = 'black')
# averageline x axis
graph.axvline(9.751677852348994,c='grey',ls='--')
# average line yaxis
graph.axhline(0.34438111920973147,c='grey',ls='--')
#adding label names for specific players
#title
plt.title('Best Strikers across Europes Top 5 leagues 21/22',size=17,c='black')
# add credits
Notes = 'By Alex Orlandini'
CREDIT_1 = "data: statsbomb via fbref"
graph.text(
36, 0.1, f"{Notes}\n{CREDIT_1}", size=10,
color="#000000",
ha="right");
enter image description here
Yes, you can loop through specific players and add the arrow and text.
Just a matter of getting the x, y coordinate of the data point, then deciding where to place the label. I had to pull my own data since you didn't share yours.
I would also avoid hard coding that average. I'd have that as a calculated variable.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#df = pd.read_csv('C:/Users/alexo/Documents/Data/football data/shooting_top5_leagues_21_22.csv',encoding = 'ISO-8859-1')
df = pd.read_html('https://fbref.com/en/comps/Big5/shooting/players/Big-5-European-Leagues-Stats', header=1)[0]
df = df[df['Rk'].ne('Rk')]
df['npxG'] = df['npxG'].astype(float)
df['90s'] = df['90s'].astype(float)
df['npxG/Sh'] = df['npxG/Sh'].astype(float)
df['Gls'] = df['Gls'].astype(int)
df['npxG_p90'] = df['npxG'] / df['90s']
df['League'] = df['Comp'].str.split(' ',1, expand=True)[1]
df = df.rename(columns={'npxG/Sh':'npxG_pSh'})
striker_df = df.loc[(df['Pos']=='FW') & (df['90s']>= 15)]
sns.set_style('darkgrid')
sns.set(rc = {'figure.figsize':(15,8)})
x_axis_column = 'Gls'
y_axis_column = 'npxG_p90'
graph = sns.scatterplot(x = striker_df[x_axis_column],
y = striker_df[y_axis_column],
hue = striker_df.League,
size = striker_df.npxG_pSh,
edgecolor = 'black')
# averageline x axis
avgX = striker_df[x_axis_column].mean()
graph.axvline(avgX, c='grey', ls='--')
# average line yaxis
avgY = striker_df[y_axis_column].mean()
graph.axhline(avgY, c='grey', ls='--')
xOffset = (striker_df[x_axis_column].max() - striker_df[x_axis_column].min()) *.10
yOffset = (striker_df[y_axis_column].max() - striker_df[y_axis_column].min()) *.10
#adding label names for specific players
for player in ['Robert Lewandowski', 'Kylian Mbappé', 'Patrik Schick', 'Arnaut Groeneveld']:
# Label coordinate, Custom arrow
x = striker_df[striker_df['Player'] == player].iloc[0][x_axis_column]
y = striker_df[striker_df['Player'] == player].iloc[0][y_axis_column]
plt.annotate(player, xy=(x, y),xytext=(x + xOffset, y + yOffset) ,
horizontalalignment="center",
arrowprops=dict(arrowstyle='->', lw=2, color='black')
)
#title
plt.title('Best Strikers across Europes Top 5 leagues 21/22',size=17,c='black')
# add credits
Notes = 'By Alex Orlandini'
CREDIT_1 = "data: statsbomb via fbref"
graph.text(
36, 0.1, f"{Notes}\n{CREDIT_1}", size=10,
color="#000000",
ha="right");
Output:
Or you can iterate through a dataframe:
#adding label names for specific players
striker_df['calc'] = striker_df[x_axis_column] + striker_df[y_axis_column]
striker_df = striker_df.sort_values('calc', ascending = False)
top_players = striker_df.head(8)
for idx, row in top_players.iterrows():
# Label coordinate, Custom arrow
player = row['Player']
x = row[x_axis_column]
y = row[y_axis_column]
plt.annotate(player, xy=(x, y),xytext=(x + xOffset, y) ,
horizontalalignment="center",
arrowprops=dict(arrowstyle='->', lw=2, color='black')
)
To get something like this:
I've got a script wherein I have two functions, makeplots() which makes a figure of blank subplots arranged in a particular way (depending on the number of subplots to be drawn), and drawplots() which is called later, drawing the plots (obviously). The functions are posted below.
The script does some analysis of data for a given number of 'targets' (which can number anywhere from one to nine) and creates plots of the linear regression for each target. When there are multiple targets, this works great. But when there's a single target (i.e. a single 'subplot' in the figure), the Y-axis label overlaps the axis itself (this does not happen when there are multiple targets).
Ideally, each subplot would be square, no labels would overlap, and it would work the same for one target as for multiple targets. But when I tried to decrease the size of the y-axis label and shift it over a bit, it appears that the actual axes object was drawn over the previously blank, square plot (whose axes ranged from 0 to 1), and the old tick mark labels are still visible. I'd like to have those old tick marks removed when calling drawplots(). I've tried changing the subplot_kw={} arguments in makeplots, as well as removing ax.set_aspect('auto') from drawplots, both to no avail. Note that there are also screenshots of various behaviors at the end, also.
def makeplots(targets, active=actwindow):
def rowcnt(y):
rownumb = y//3 if (y%3 == 0) else y//3+1
return rownumb
def colcnt(x):
if x <= 3: colnumb = x
elif x == 4: colnumb = 2
else: colnumb = 3
return colnumb
numsubs = len(targets)
numrow, numcol = rowcnt(numsubs), colcnt(numsubs)
if numsubs >= 1:
if numsubs == 1:
fig, axs = plt.subplots(num='LOD-95 Plots', nrows=1, ncols=1, figsize = [8,6], subplot_kw={'adjustable': 'box', 'aspect': 1})
# changed 'box' to 'datalim'
fig, axs = plt.subplots(num='LOD-95 Plots', nrows=numrow, ncols=numcol, figsize = [numcol*6,numrow*6], subplot_kw={'adjustable': 'box', 'aspect': 1})
fig.text(0.02, 0.5, 'Probit score\n $(\sigma + 5)$', va='center', rotation='vertical', size='16')
else:
raise ValueError(f'Error generating plots [call: makeplots({targets},{active}) - invalid numsubs value]')
axs = np.ravel(axs)
for i, ax in enumerate(axs):
ax.set_title(f'Limit of Detection: {targets[i]}', size=11)
ax.grid()
return fig, axs
and
def drawplots(ax, dftables, color1, color2):
y = dftables.probit
y95 = 6.6448536269514722
logreg = False
regfun = lambda m, x, b : (m*x) + b
regq = scipy.stats.linregress(dftables.qty,y)
regl = scipy.stats.linregress(dftables.log_qty,y)
if regq.rvalue**2 >= regl.rvalue**2:
regression = regq
x_label = 'input quantity'
x = dftables.qty
elif regq.rvalue**2 < regl.rvalue**2:
regression = regl
x_label = '$log_{10}$(input quantity)'
x = dftables.log_qty
logreg = True
slope, intercept, r = regression.slope, regression.intercept, regression.rvalue
r2 = r**2
lod = (y95-intercept)/slope
xr = [0, lod*1.2]
yr = [intercept, regfun(slope, xr[1], intercept)]
regeqn = "y = "+str(f"{slope:.2e}")+"x + "+str(f"{intercept:.3f}")
if logreg:
lodstr = f'log(LOD) = {lod:.2f}' if lod <= 100 else f'log(LOD) = {lod:.2e}'
elif not logreg:
lodstr = f'LOD = {lod:.2f}' if lod <= 100 else f'LOD = {lod:.2e}'
# raise ValueError(f'Error raised calling drawplots()')
ax.set_xlabel(x_label, fontweight='bold')
ax.plot(xr, yr, color=color1, linestyle='--') # plot regression line
ax.plot(lod, y95, marker='D', color=color2, markersize=7) # plot point for LoD
ax.plot(xr, [y95,y95], color=color2, linestyle=':') # horizontal crosshair
ax.plot([lod,lod],[0, 7.1], color=color2, linestyle=':') # vertical crosshair
ax.scatter(x, y, s=81, color=color1, marker='.') # actual data points
ax.annotate(f"{lodstr}", xy=(lod,0.1),
xytext=(0.9*lod,0.5), fontsize=8, arrowprops = dict(facecolor='black', headlength=5, width=2, headwidth=5))
ax.set_aspect('auto')
ax.set_xlim(left=0)
ax.set_ylim(bottom=0)
ax.plot()
if logreg: lod = 10 ** lod
return r2, lod, regeqn, logreg
The context they're called in:
fig, axs = makeplots(targets)
wg.SetForegroundWindow(actwindow)
with open(outName, 'a+') as f:
print(f"Lower Limit of Detection Analysis on {dt} at {tm}\n", file=f)
for i, tars in enumerate(targets):
data[tars] = stripThousands(data[tars])
# logans = checkyn(f"Analyze {tars} using log10(concentration/quantity)? (y/n): ")
for idx, val in enumerate(qtys):
tables[i,idx,2] = hitrate(val,data,tars)
tables[i,idx,3] = norm.ppf(tables[i,idx,2])+5
printtables[tars] = pd.DataFrame(tables[i,:,:], columns=["qty","log_qty","probability","probit"])
# construct dataframes from np.arrays and drop
# rows with infinite probit values:
dftables[tars] = pd.DataFrame(tables[i,:,:], columns=["qty","log_qty","probability","probit"])
dftables[tars].probit.replace([np.inf,-np.inf],np.nan, inplace=True)
dftables[tars].dropna(inplace=True)
r2, lod, eqn, logreg = drawplots(axs[i], dftables[tars], cbcolors[i], cbcolors[i+5])
You should clear the axes in each iteration using pyplot.cla().
You posted a lot of code, so I'm not 100% sure of the best location to place it in your code, but the general idea is to clear the axes before each new plot.
Here is a minimal demo without cla():
x = [[1,2,3], [3,2,1]]
fig, ax = plt.subplots()
for index, data in enumerate(x):
ax.plot(data)
And with cla():
for index, data in enumerate(x):
ax.cla()
ax.plot(data)
I want to change the colour of the boxplots according to what they represent, this are grouped in pairs, so my question is:
How can i change the colour of the boxplots when they are paired?
Considering that the first boxplot of each pair should be blue and the second one red.
This is the code, sorry if it's messy:
def obtenerBoxplotsAnuales(self, directorioEntrada, directorioSalida):
meses = ["Enero","Febrero","Marzo","Abril","Mayo","Junio", "Julio", "Agosto","Septie.","Octubre","Noviem.","Diciem."]
ciudades = ["CO","CR"]
anios = ["2011", "2012", "2013"]
boxPlotMensual = []
fig = plt.figure()
fig.set_size_inches(14.3, 9)
ax = plt.axes()
plt.hold(True)
for anio in anios:
boxPlotAnual = []
i=0
ticks = []
for mes in range(len(meses)):
data1 = getSomeData()
data2 = getSomeData()
data = [ [int(float(data1[2])), int(float(data1[0])), int(float(data1[1]))],
[int(float(data2[2])), int(float(data2[0])), int(float(data2[1]))] ]
plt.boxplot(data, positions=[i,i+1], widths=0.5)
ticks.append(i+0.5)
i=i+2
hB, = plt.plot([1,1],'b-')
hR, = plt.plot([1,1],'r-')
plt.legend((hB, hR),('Caleta', 'Comodoro'))
hB.set_visible(False)
hR.set_visible(False)
ax.set_xticklabels(meses)
ax.set_xticks(ticks)
plt.savefig(directorioSalida+"/asdasd"+str(anio)+".ps", orientation='landscape', papertype='A4' )
This is what i get:
I've read that the solution is related with the fact that plt.boxplot(...) returns a kind of dict object that contains a list of the lines created so the way to modify the colour of each boxplot would be access to the indexes? How for this case?
You can set the colour of the return dict from boxplot as follows,
import matplotlib.pyplot as plt
import numpy as np
nboxes = 10
# fake up some data
spread= np.random.rand(50,nboxes) * 100
center = np.ones((25,nboxes)) * 50
flier_high = np.random.rand(10,nboxes) * 100 + 100
flier_low = np.random.rand(10,nboxes) * -100
data =np.concatenate((spread, center, flier_high, flier_low), 0)
# plot figure
plt.figure()
bp = plt.boxplot(data)
for i, box in enumerate(bp['boxes']):
#Colour alternate boxes blue and red
if i%2:
box.set_color('blue')
else:
box.set_color('red')
plt.show()
Where you loop through all boxes in bp['boxes'] and use the method set_color (you can also box.set_markerfacecolor and other standard matplotlib artist attributes). The bp dict also contains ['boxes', 'fliers', 'medians', 'means', 'whiskers', 'caps'] which can also be changed as required.
I know pandas supports a secondary Y axis, but I'm curious if anyone knows a way to put a tertiary Y axis on plots. Currently I am achieving this with numpy+pyplot, but it is slow with large data sets.
This is to plot different measurements with distinct units on the same graph for easy comparison (eg: Relative Humidity/Temperature/ and Electrical Conductivity).
So really just curious if anyone knows if this is possible in pandas without too much work.
[Edit] I doubt that there is a way to do this(without too much overhead) however I hope to be proven wrong, as this may be a limitation of matplotlib.
I think this might work:
import matplotlib.pyplot as plt
import numpy as np
from pandas import DataFrame
df = DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C'])
fig, ax = plt.subplots()
ax3 = ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.15))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.7)
df.A.plot(ax=ax, style='b-')
# same ax as above since it's automatically added on the right
df.B.plot(ax=ax, style='r-', secondary_y=True)
df.C.plot(ax=ax3, style='g-')
# add legend --> take advantage of pandas providing us access
# to the line associated with the right part of the axis
ax3.legend([ax.get_lines()[0], ax.right_ax.get_lines()[0], ax3.get_lines()[0]],\
['A','B','C'], bbox_to_anchor=(1.5, 0.5))
Output:
A simpler solution without plt:
ax1 = df1.plot()
ax2 = ax1.twinx()
ax2.spines['right'].set_position(('axes', 1.0))
df2.plot(ax=ax2)
ax3 = ax1.twinx()
ax3.spines['right'].set_position(('axes', 1.1))
df3.plot(ax=ax3)
....
Using function to achieve this:
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas.plotting._matplotlib.style import get_standard_colors
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = get_standard_colors(num_colors=len(cols))
# First axis
ax = data.loc[:, cols[0]].plot(label=cols[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=cols[0])
lines, labels = ax.get_legend_handles_labels()
for n in range(1, len(cols)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines['right'].set_position(('axes', 1 + spacing * (n - 1)))
data.loc[:, cols[n]].plot(ax=ax_new, label=cols[n], color=colors[n % len(colors)], **kwargs)
ax_new.set_ylabel(ylabel=cols[n])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
Example:
from random import randrange
data = pd.DataFrame(dict(
s1=[randrange(-1000, 1000) for _ in range(100)],
s2=[randrange(-100, 100) for _ in range(100)],
s3=[randrange(-10, 10) for _ in range(100)],
))
plot_multi(data.cumsum(), figsize=(10, 5))
Output:
I modified the above answer a bit to make it accept custom x column, well-documented, and more flexible.
You can copy this snippet and use it as a function:
from typing import List, Union
import matplotlib.axes
import pandas as pd
def plot_multi(
data: pd.DataFrame,
x: Union[str, None] = None,
y: Union[List[str], None] = None,
spacing: float = 0.1,
**kwargs
) -> matplotlib.axes.Axes:
"""Plot multiple Y axes on the same chart with same x axis.
Args:
data: dataframe which contains x and y columns
x: column to use as x axis. If None, use index.
y: list of columns to use as Y axes. If None, all columns are used
except x column.
spacing: spacing between the plots
**kwargs: keyword arguments to pass to data.plot()
Returns:
a matplotlib.axes.Axes object returned from data.plot()
Example:
>>> plot_multi(df, figsize=(22, 10))
>>> plot_multi(df, x='time', figsize=(22, 10))
>>> plot_multi(df, y='price qty value'.split(), figsize=(22, 10))
>>> plot_multi(df, x='time', y='price qty value'.split(), figsize=(22, 10))
>>> plot_multi(df[['time price qty'.split()]], x='time', figsize=(22, 10))
See Also:
This code is mentioned in https://stackoverflow.com/q/11640243/2593810
"""
from pandas.plotting._matplotlib.style import get_standard_colors
# Get default color style from pandas - can be changed to any other color list
if y is None:
y = data.columns
# remove x_col from y_cols
if x:
y = [col for col in y if col != x]
if len(y) == 0:
return
colors = get_standard_colors(num_colors=len(y))
if "legend" not in kwargs:
kwargs["legend"] = False # prevent multiple legends
# First axis
ax = data.plot(x=x, y=y[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=y[0])
lines, labels = ax.get_legend_handles_labels()
for i in range(1, len(y)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines["right"].set_position(("axes", 1 + spacing * (i - 1)))
data.plot(
ax=ax_new, x=x, y=y[i], color=colors[i % len(colors)], **kwargs
)
ax_new.set_ylabel(ylabel=y[i])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
Here's one way to use it:
plot_multi(df, x='time', y='price qty value'.split(), figsize=(22, 10))