Annotating scatter plot in groupby plot - python

I have a df like so:
ID Prcp NDVI Year
1 1.4 0.1 2000
1 2.3 0.4 2001
1 4.4 0.8 2002
1 0.4 0.1 2003
2 2.1 0.6 2000
2 1.2 0.4 2001
2 3.4 0.7 2002
2 2.8 0.5 2003
and I want to do a scatter plot of Prcp vs. NDVI for each unique ID. I then want to add a data label for Year for each particular point on the plot. I am trying to do it like this:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
df=pd.read_csv(r'H:\my_file.csv')
with PdfPages(r'H:\path_to_out\out.pdf') as pdf:
for i, group in df.groupby('ID'):
plot = group.plot(x=['Prcp'], y=['NDVI'], title='NDVI_' + str(i), kind='scatter').get_figure()
n=df.Year
b=df.Prcp
c=df.NDVI
for i, txt in enumerate(n):
plt.annotate(txt, (b[i],c[i]), fontsize=2.5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
ax = plt.gca()
ax.set_xlabel('Precipitation')
ax.set_ylabel('Mean NDVI')
pdf.savefig(plot, bbox_inches='tight', pad_inches=0)
plt.close(plot)
but this doesn't work correctly.
To do this for just 1 plot, with a df like this:
ID Prcp NDVI Year
1 1.4 0.1 2000
1 2.3 0.4 2001
1 4.4 0.8 2002
1 0.4 0.1 2003
I would do it like this:
a=df.Prcp
b=df.NDVI
n=df.Year
with PdfPages(r'H:\graph.pdf') as pdf:
plt.title('NDVI')
plt.xlabel('Prcp')
plt.ylabel('NDVI')
plt.scatter(df.Prcp,df.NDVI, facecolors='none', s=20, edgecolors='b')
for i, txt in enumerate(n):
plt.annotate(txt, (a[i],b[i]), fontsize=2.5)
axes=plt.gca()
fig=plt.gcf()
pdf.savefig(fig)
plt.show()
EDIT:
I achieved it using this:
def label_point(Prcp,Mean, Year, ax):
a = pd.concat({'Prcp': Prcp, 'NDVI': NDVI, 'Year': Year}, axis=1)
for i, point in a.iterrows():
ax.text(point['Prcp'], point['NDVI'], str(point['Year']))
label_point(group.Prcp, group.NDVI, group.Year, ax)

I achieved it like this:
df=pd.read_csv(r'E:\path.csv')
with PdfPages(r'E:\pth.pdf') as pdf:
for i, group in first.groupby('ID'):
fig, ax = plt.subplots()
plot = group.plot(x=['Prcp'], y=['NDVI'], title='NDVI_' + str(i), kind='scatter', ax=ax).get_figure()
def label_point(Prcp, NDVI, Year, ax):
a = pd.concat({'Prcp': Prcp, 'NDVI': NDVI, 'Year': Year}, axis=1)
for i, point in a.iterrows():
ax.text(point['Prcp'], point['NDVI'], str(point['Year']))
label_point(group.Prcp, group.NDVI, group.Year, ax)
ax = plt.gca()
ax.set_xlabel('Precipitation')
ax.set_ylabel('Mean NDVI')
pdf.savefig(plot, bbox_inches='tight', pad_inches=0)
plt.close(plot)

Related

How to get rid of weird stray lines in scatterplot

I'm dealing with the well-known Gapminder data file (here:
https://www.kaggle.com/datasets/tklimonova/gapminder-datacamp-2007?select=gapminder_full.csv)
df.head():
country year population continent life_exp gdp_cap
0 Afghanistan 2007 31889923 Asia 43.828 974.580338
1 Albania 2007 3600523 Europe 76.423 5937.029526
2 Algeria 2007 33333216 Africa 72.301 6223.367465
3 Angola 2007 12420476 Africa 42.731 4797.231267
4 Argentina 2007 40301927 Americas 75.320 12779.379640
I tried a scatter plot but get confused by the many lines appearing on the plot:
plt.style.use('seaborn')
x = np.array(df['gdp_cap'])
y = np.array(df['life_exp'])
plt.scatter(x, y, marker = 'o', alpha = 1)
coeff = np.polyfit(x, y, 2)
plt.plot(x, coeff[0]*(x**2) + coeff[1]*x + coeff[2])
plt.show()
What I am doing wrong ???
Your second plot overdraws the first plot. Do add another plt.show() to prevent overdrawing:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.style.use('seaborn')
x = np.array(df['gdp_cap'])
y = np.array(df['life_exp'])
plt.scatter(x, y, marker = 'o', alpha = 1)
plt.show()
coeff = np.polyfit(x, y, 2)
plt.plot(x, coeff[0]*(x**2) + coeff[1]*x + coeff[2])
plt.show()
Output:

How can I plot a secondary y-axis with seaborn's barplot?

I'm trying to plot the data (see below). With company_name on the x-axis, status_mission_2_y on the y axis and percentage on the other y_axis. I have tried using the twinx() fucntion but I can't get it to work.
Please can you help? Thanks in advance!
def twinplot(data):
x_ = data.columns[0]
y_ = data.columns[1]
y_2 = data.columns[2]
data1 = data[[x_, y_]]
data2 = data[[x_, y_2]]
plt.figure(figsize=(15, 8))
ax = sns.barplot(x=x_, y=y_, data=data1)
ax2 = ax.twinx()
g2 = sns.barplot(x=x_, y=y_2, data=data2, ax=ax2)
plt.show()
data = ten_company_missions_failed
twinplot(data)
company_name
percentage
status_mission_2_y
EER
1
1
Ghot
1
1
Trv
1
1
Sandia
1
1
Test
1
1
US Navy
0.823529412
17
Zed
0.8
5
Gov
0.75
4
Knight
0.666666667
3
Had
0.666666667
3
Seaborn plots the two bar plots with the same color and on the same x-positions.
The following example code resizes the bar widths, with the bars belonging ax moved to the left. And the bars of ax2 moved to the right. To differentiate the right bars, a semi-transparency (alpha=0.7) and hatching is used.
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd
import seaborn as sns
from io import StringIO
data_str = '''company_name percentage status_mission_2_y
EER 1 1
Ghot 1 1
Trv 1 1
Sandia 1 1
Test 1 1
"US Navy" 0.823529412 17
Zed 0.8 5
Gov 0.75 4
Knight 0.666666667 3
Had 0.666666667 3'''
data = pd.read_csv(StringIO(data_str), delim_whitespace=True)
x_ = data.columns[0]
y_ = data.columns[1]
y_2 = data.columns[2]
data1 = data[[x_, y_]]
data2 = data[[x_, y_2]]
plt.figure(figsize=(15, 8))
ax = sns.barplot(x=x_, y=y_, data=data1)
width_scale = 0.45
for bar in ax.containers[0]:
bar.set_width(bar.get_width() * width_scale)
ax.yaxis.set_major_formatter(PercentFormatter(1))
ax2 = ax.twinx()
sns.barplot(x=x_, y=y_2, data=data2, alpha=0.7, hatch='xx', ax=ax2)
for bar in ax2.containers[0]:
x = bar.get_x()
w = bar.get_width()
bar.set_x(x + w * (1- width_scale))
bar.set_width(w * width_scale)
plt.show()
A simpler alternative could be to combine a barplot on ax and a lineplot on ax2.
plt.figure(figsize=(15, 8))
ax = sns.barplot(x=x_, y=y_, data=data1)
ax.yaxis.set_major_formatter(PercentFormatter(1))
ax2 = ax.twinx()
sns.lineplot(x=x_, y=y_2, data=data2, marker='o', color='crimson', lw=3, ax=ax2)
plt.show()

How to make bar graph of 2 variables based on same DataFrame and I want to choose 2 or until 5 data

I have a DataFrame:
wilayah branch Income Januari 2018 Income Januari 2019 Income Febuari 2018 Income Febuari 2019 Income Jan-Feb 2018 Income Jan-Feb 2019
1 sunarto 1000 1500 2000 3000 3333 4431
1 pemabuk 500 700 3000 3000 4333 5431
1 pemalas 2000 2200 4000 3000 5333 6431
1 hasuntato 9000 1200 6000 3000 2222 2121
1 sibodoh 1000 1500 3434 3000 2233 2121
...
My expectation to to create a bar graph where x axis is every name in branch (e.g sunarto, pemabuk, pemalas, etc), and y axis is income.
Let's say I will compare sunarto's income januari 2018 and income januari 2019, pemabuk's income januari 2018 and income januari 2019, and so on (1 name in x axis, 2 values as comparison of two values). Then I will sort values high to low value from Income Jan-Feb 2019 in my bar graph.
I tried:
import matplotlib.pyplot as plt
import pandas as pd
fig, ax = plt.subplots()
ax = df1[["Sunarto","Income Januari 2018", "Income Januari 2019"]].plot(x='branch', kind='bar', color=["g","b"],rot=45)
plt.show()
Consider a groupby aggregation then run DataFrame.plot. Below will line all branches on x-axis with different income columns as color_coded keys in legend.
agg_df = df.groupby('branch').sum()
fig, ax = plt.subplots(figsize=(15,5))
agg_df.plot(kind='bar', edgecolor='w', ax=ax, rot=22, width=0.5, fontsize = 15)
# ADD TITLES AND LABELS
plt.title('Income by Branches, Jan/Feb 2018-2019', weight='bold', size=24)
plt.xlabel('Branch', weight='bold', size=24)
plt.ylabel('Income', weight='bold', size=20)
plt.tight_layout()
plt.show()
plt.clf()
Should you want each separate branch plots on specific columns, iterate off a groupby list:
dfs = df.groupby('branch')
for i,g in dfs:
ord_cols = (pd.melt(g.drop(columns="wilayah"), id_vars = "branch")
.sort_values("value")["variable"].values
)
fig, ax = plt.subplots(figsize=(8,4))
(g.reindex(columns=ord_cols)
.plot(kind='bar', edgecolor='w', ax=ax, rot=0, width=0.5, fontsize = 15)
)
# ADD TITLES AND LABELS
plt.title('Income by {} Branch, Jan/Feb 2018-2019'.format(i),
weight='bold', size=16)
plt.xlabel('Branch', weight='bold', size=16)
plt.ylabel('Income', weight='bold', size=14)
plt.tight_layout()
plt.show()

How create a simple animated graph with matplotlib from a dataframe

Someone can help me to correct the code below to visualize this data with animated matplotlib?
The dataset for X and Y axis are describe below.
X- Range
mydata.iloc[:,[4]].head(10)
Min_pred
0 1.699189
1 0.439975
2 2.989244
3 2.892075
4 2.221990
5 3.456261
6 2.909323
7 -0.474667
8 -1.629343
9 2.283976
Y - range
dataset_meteo.iloc[:,[2]].head(10)
Out[122]:
Min
0 0.0
1 -1.0
2 2.0
3 -2.0
4 -4.0
5 -4.0
6 -5.0
7 -7.0
8 -3.0
9 -1.0
I've tried the code below,
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
d = pd.read_excel("mydata.xls")
x = np.array(d.index)
y = np.array(d.iloc[:,[2]])
mydata = pd.DataFrame(y,x)
fig = plt.figure(figsize=(10,6))
plt.xlim(1999, 2016)
plt.ylim(np.min(x), np.max(x))
plt.xlabel('Year',fontsize=20)
plt.ylabel(title,fontsize=20)
plt.title('Meteo Paris',fontsize=20)
def animate(i):
data = mydata.iloc[:int(i+1)] #select data range
p = sns.lineplot(x=data.index, y=data[title], data=data, color="r")
p.tick_params(labelsize=17)
plt.setp(p.lines,linewidth=7)
ani = matplotlib.animation.FuncAnimation(fig, animate, frames=17, repeat=True)
The idea is to create a graph where the predicted (Y) would be animated
kind a same like this one in the link below.
https://www.courspython.com/animation-matplotlib.html
Thanks if you can help
Is this what you are trying to get?
x = np.arange(1999,2017)
y = np.random.random(size=x.shape)
fig = plt.figure(figsize=(4,3))
plt.xlim(1999, 2016)
plt.ylim(np.min(y), np.max(y))
plt.xlabel('Year',fontsize=20)
plt.ylabel('Y',fontsize=20)
plt.title('Meteo Paris',fontsize=20)
plt.tick_params(labelsize=17)
line, = plt.plot([],[],'r-',lw=7)
def animate(i):
x_, y_ = x[:i+1],y[:i+1]
line.set_data(x_,y_)
return line,
ani = matplotlib.animation.FuncAnimation(fig, animate, frames=len(x), repeat=True)

python pandas bar plot another column text

max min mincount maxcount
0 12 10 1 6
1 21 14 1 6
2 34 19 1 6
3 6 20 1 4
4 8 22 1 4
5 41 23 1 4
this is pandas DataFrame.
so I want like this image.
enter image description here
text label is very important.
here my code
df = pd.DataFrame({'maxcount': max_count, 'mincount': min_count, 'max': max, 'min': min})
ax = df[['maxcount', 'mincount']].plot(kind='bar')
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#create your dataframe
d= {'max':[12,21,34,6,8,41],'min':[10,14,19,20,22,23],
'mincount':[1,1,1,1,1,1],'maxcount':[6,6,6,4,4,4]}
df=pd.DataFrame(d)
#create 2 dataframes counts and max_min (1 for plotting and 1 for text)
counts=pd.DataFrame(df,columns=['maxcount','mincount'])
max_min=pd.DataFrame(df,columns=['max','min'])
#plot the counts
ax=counts[counts.columns].plot(kind='bar',colormap='Paired',figsize= (12,4))
#using zip() and ax.annotate specify where (location by means of z)
#and what (max_min or counts) you want to plot
for x,y,z in zip(max_min.iloc[:,0].values,counts.iloc[:,0].values, range(len(counts))):
ax.annotate('%.d' % x, (z-0.2, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z-0.1, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
for x,y,z in zip(max_min.iloc[:,1].values,counts.iloc[:,1].values, range(len(counts))):
ax.annotate('%.d' % x, (z+0.1, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z+0.2, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
This is the output:

Categories