I'm scatter plotting values from pandas dataframe. I would like to annotate points only if the value is greater than 100. I have no idea how to go about it.
Here's the code I'm working with (it's terrible but I'm very new to this):
female_data = r'/home/jg/Desktop/hurricanedata_f.csv'
female_df = read_csv(female_data)
male_data = r'/home/jg/Desktop/hurricanedata_m.csv'
male_df = read_csv(male_data)
x = female_df['Year']
y = female_df['alldeaths']
z = female_df['Name']
y_mean = [np.mean(y) for i in x]
a = male_df['Year']
b = male_df['alldeaths']
b_mean = [np.mean(b) for i in b]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.annotate('Agnes', xy=(1972,117))
ax1 = fig.add_subplot(1,1,1)
ax1.scatter(x,y, label = 'female', color = 'r')
ax2 = fig.add_subplot(1,1,1)
ax2.scatter(a,b, label = 'male')
ax3 = fig.add_subplot(1,1,1)
ax3.plot(x, y_mean, linestyle='--', color = 'r')
ax4 = fig.add_subplot(1,1,1)
ax4.plot(a, b_mean, linestyle='--', color = 'blue')
plt.title('Hurricanes')
plt.xlabel('Year')
plt.ylabel('Deaths')
plt.legend(loc='upper right')
plt.ylim([-5,300])
plt.xlim([1948,2020])
plt.show()
You can loop over all your data points and check if each is greater than 100. Then give those points an annotation.
import matplotlib.pyplot as plt
import numpy as np
import string
# Fake data
x = np.arange(10)
y = 10*np.random.rand(10) + 95
names = string.lowercase[:10] # first 10 lowercase letters
# Plot data
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(x,y)
# Annonate points with y values greater than 100
for xi, yi, iname in zip(x,y,names): # Loop over x and y values
if yi > 100: # Check if y is greater than 100
ax.annotate(iname, (xi, yi),size = 30) # Add an annoatation.
plt.show()
Related
I try to plot lines in a loop,but its connecting it,i tried many variants,but cand understand and find the answer,maybe the dataframe
im a newbie in Matplotlib
the code of method:
self.testnewnewventnest[Debit] - is a nested dict with the data i need for plotting
def showmainplot(self):
for Debit in self.Debitlist:
self.Presinit = self.VentTable.loc[Debit]
self.Tinit= float(self.Tinit)
self.Presinit=int(float(self.Presinit))
self.Powinit = float(self.Powinit)
x = symbols("x")
for Turatie in self.Tfin:
eqPres = (Turatie/self.Tinit)*(Turatie/self.Tinit)*self.Presinit-x
PresFin = solve(eqPres)
eqDebit = (Turatie/self.Tinit)*int(Debit)
DebitFin = solve(eqDebit)
eqPow = (Turatie/self.Tinit)*(Turatie/self.Tinit)*(Turatie/self.Tinit)*float(self.Powinit)
self.TestnewVentnest['KW'] = float(eqPow)
self.TestnewVentnest['Turatie'] = Turatie
self.TestnewVentnest['Presiune'] = float(PresFin[0])
self.TestnewVent[float(eqDebit)] = dict(self.TestnewVentnest)
self.testnewnewventnest[Debit] = dict(self.TestnewVent)
print(self.testnewnewventnest)
axeslist = []
n=0
fig, ax = plt.subplots(figsize=(5, 5))
ax1 = ax.twinx()
ax1.spines.right.set_position(("axes", 1.06))
ax.set_xlabel("Debit")
for dicts in self.testnewnewventnest:
Ventdataframe = pd.DataFrame(self.testnewnewventnest[dicts])
print(Ventdataframe)
ax2 = plt.subplot()
fig, ax = plt.subplots(figsize=(5, 5))
ax1 = ax.twinx()
ax1.spines.right.set_position(("axes", 1.06))
ax.set_xlabel("Debit")
axeslist.append(plt.subplot())
# print(df.iloc[0])
# ax1.set_ylabel("Turatie")
# ax.set_ylabel("Presiune")
# Ventdataframe.plot(Ventdataframe.loc["Presiune"], color="b",label="Presiune"+str(n),marker = 'o')
Ventdataframe.loc["Presiune"].plot(color="b",label="Presiune"+str(n),marker = 'o')
n+=1
# ax2 = ax.twinx()
# ax2.set_ylabel('KW')
# ax1.plot(Ventdataframe.loc["Turatie"],color='#000000',label="Turatie",marker = 'o')
# ax2.plot(Ventdataframe.loc["KW"], color='r',label="KW",marker = 'o')
# ax1.grid()
# ax2.yaxis.set_major_locator(FixedLocator(Ventdataframe.loc["KW"]))
# ax.yaxis.set_major_locator(FixedLocator(Ventdataframe.loc["Presiune"]))
# ax1.yaxis.set_major_locator(FixedLocator(self.Tfin))
# ax.xaxis.set_major_locator(FixedLocator(Ventdataframe.columns))
# lc = matpl.ticker.NullLocator()
# ax.yaxis.set_major_locator(lc)
plt.show()
and the self.testnewnewventnest look like:
Yes,the problem was in the loop,and in the dictionaries,in every iteration he added all previous dictionaries from iterations
I would put that in a dataframe and do it like this.
uniques = df['ID'].unique()
for i in uniques:
fig, ax = plt.subplots()
fig.set_size_inches(4,3)
df_single = df[df['ID']==i]
sns.lineplot(data=df_single, x='Month', y='Expense')
ax.set(xlabel='Time', ylabel='Total Expense')
plt.xticks(rotation=45)
plt.show()
I'm updating dynamically a plot in a loop:
dat=[0, max(X[:, 0])]
fig = plt.figure()
ax = fig.add_subplot(111)
Ln, = ax.plot(dat)
Ln2, = ax.plot(dat)
plt.ion()
plt.show()
for i in range(1, 40):
ax.set_xlim(int(len(X[:i])*0.8), len(X[:i])) #show last 20% data of X
Ln.set_ydata(X[:i])
Ln.set_xdata(range(len(X[:i])))
Ln2.set_ydata(Y[:i])
Ln2.set_xdata(range(len(Y[:i])))
plt.pause(0.1)
But now I want to update it in a different way: append some values and show them in other colour:
X.append(other_data)
# change colour just to other_data in X
The result should look something like this:
How could I do that?
Have a look at the link I posted. Linesegments can be used to plot colors at a particular location differently. If you want to do it in real-time you can still use line-segments. I leave that up to you.
# adjust from https://stackoverflow.com/questions/38051922/how-to-get-differents-colors-in-a-single-line-in-a-matplotlib-figure
import numpy as np, matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm
# my func
x = np.linspace(-2 * np.pi, 2 * np.pi, 100)
y = 3000 * np.sin(x)
# select how to color
cmap = ListedColormap(['r','b'])
norm = BoundaryNorm([2000,], cmap.N)
# get segments
xy = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.hstack([xy[:-1], xy[1:]])
# control which values have which colors
n = y.shape[0]
c = np.array([plt.cm.RdBu(0) if i < n//2 else plt.cm.RdBu(255) for i in range(n)])
# c = plt.cm.Reds(np.arange(0, n))
# make line collection
lc = LineCollection(segments,
colors = c
# norm = norm,
)
# plot
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
ax.axvline(x[n//2], linestyle = 'dashed')
ax.annotate("Half-point", (x[n//2], y[n//2]), xytext = (4, 1000),
arrowprops = dict(headwidth = 30))
fig.show()
Matplotlib axis with two scales shared origin
I have already tried to implement this the existing stackflow solution and my both x-axes are not aligning to 0.
My Code :
def align_xaxis(ax1, v1, ax2, v2):
"""adjust ax2 xlimit so that v2 in ax2 is aligned to v1 in ax1"""
x1, _ = ax1.transData.transform((v1, 0))
x2, _ = ax2.transData.transform((v2, 0))
inv = ax2.transData.inverted()
dx, _ = inv.transform((0, 0)) - inv.transform((x1-x2, 0))
minx, maxx = ax1.get_xlim()
ax2.set_xlim(minx+dx, maxx+dx)
def unrealized_profit_loss_graph(profit_loss):
plt.style.use('ggplot');
fig = plt.figure()
ax1 = fig.add_subplot(111);
ax2 = ax1.twiny()
profit_loss['total_G/l'].
plot(kind='barh',color=profit_loss.positive.map({True: 'g', False: 'r'}))
profit_loss['gain_loss_perc'].plot(kind='barh',color=profit_loss.positive.map({True: 'b', False: 'y'}))
ax1.set_xlabel('%', fontsize=12)
ax2.set_xlabel('$', fontsize=12);
align_xaxis(ax1,0,ax2,0)
plt.xlim(-5000, 20000)
plt.xticks(rotation=45);
plt.show();
I would like both x axes to align at 0.
Also to show the negative plus the positive of the ax1.
Working example :
def unrealized_profit_loss():
Profit_loss = Path("C:/Users/champ/Documents/Pers/Python/stock_dfs/Profit_loss_tranactions.xlsx")
df = pd.read_excel(Profit_loss, sheet_name='Unrealized')
current_prices_ROTH=price_data_ROTH.loc[price_data_ROTH.index[-1]] current_prices_Personal=price_data_Personal.loc[price_data_Personal.index[-1]]
df2 = pd.DataFrame({'Symbol':current_prices_ROTH.index, 'Prices':current_prices_ROTH.values})
df2 = pd.DataFrame({'Symbol':current_prices_Personal.index, 'Prices':current_prices_Personal.values})
da= pd.merge(df,df2, how='left',on=['Symbol','Symbol'])
da['gain_loss_perc']=round(((da['Prices']-da['Cost/share'])/da['Cost/share'])*100,2)
da['total_G/l']=round((da['Prices']*da['Quantity'])-(da['Cost Basis']),0)
da['Account_symbol'] = str(da['Account'])
da['Account_symbol'] = da.agg(lambda x: f"{x['Symbol']} - {x['Account']}", axis=1)
da = da.sort_values(by=['total_G/l'],ascending=True)
da.index = da['Account_symbol']
da['positive'] = da['total_G/l'] > 0
del da.index.name
return(da)
def unrealized_profit_loss_graph(profit_loss):
# graph the profit and loss
#fig, (ax1,ax2) = plt.subplots(1,2,sharex=False,sharey=True,figsize=(16,8));
#fig, ax1 = plt.subplots(1,1,figsize=(16,8));
plt.style.use('ggplot');
fig = plt.figure()
ax1 = fig.add_subplot(111);
ax1.set_title('Total G/L (UNREALIZED - IN THE MARKET)');
#ax1 = fig.add_subplot() # Create matplotlib axes
ax2 = ax1.twiny()
profit_loss['total_G/l'].plot(kind='barh',color=profit_loss.positive.map({True: 'g', False: 'r'}))
profit_loss['gain_loss_perc'].plot(kind='barh',color=profit_loss.positive.map({True: 'b', False: 'y'}))
ax1.set_xlabel('%', fontsize=12);
ax2.set_xlabel('$', fontsize=12);
plt.xlim(-5000, 20000);
plt.xticks(rotation=45);
align_xaxis(ax1,0,ax2,0);
plt.show();
# Profit and loss
profit_loss = unrealized_profit_loss()
p_l = unrealized_profit_loss_graph(profit_loss)
xls file I read from
You failed to provide a working example. Nevertheless, try the following: Pass the respective axis to the plot function and then try aligning
def unrealized_profit_loss_graph(profit_loss):
plt.style.use('ggplot')
fig = plt.figure()
ax1 = fig.add_subplot(111)
profit_loss['total_G/l'].plot(kind='barh',
color=profit_loss.positive.map({True: 'g', False: 'r'}),
ax=ax1)
ax2 = ax1.twiny()
profit_loss['gain_loss_perc'].plot(kind='barh',
color=profit_loss.positive.map({True: 'b', False: 'y'}),
ax=ax2)
ax1.set_xlabel('%', fontsize=12)
ax2.set_xlabel('$', fontsize=12)
plt.xlim(-5000, 20000)
plt.xticks(rotation=45)
align_xaxis(ax1,0,ax2,0)
plt.show();
I can correctly plot a trendline with price data but the both X Y axis of date formatting is blank. I am not sure what is messing up this plot configuration for the axis. Here is the Python 2.7 code:
y = df['Close']
# calc the trendline http://stackoverflow.com/questions/26447191/how-to-add-trendline-in-python-matplotlib-dot-scatter-graphs
l = []
for t in df['Time']:
datetime_object = datetime.datetime.strptime(str(t), '%H:%M')
print datetime_object.hour
print datetime_object.minute
l.append((3600 * datetime_object.hour + 60 * datetime_object.minute))
x = l
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
fig = plt.figure()
ax = fig.add_subplot(111)
#http://stackoverflow.com/questions/17709823/plotting-timestamps-hour-minute-seconds-with-matplotlib
plt.xticks(rotation=25)
ax = plt.gca()
ax.set_xticks(x)
xfmt = md.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(xfmt)
ax.plot(x, p(x), 'r--')
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%3.4f')) #http://stackoverflow.com/questions/29188757/matplotlib-specify-format-of-floats-for-tick-lables
plt.show()
Also, df['Close'] would have value samples of:
114.684
114.679
df['Time'] would contains sample values:
23:20
23:21
Update: I found the source of your problem.
In addition to the below problem you incorrectly copied the answer to the linked question.
You wrote: ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%3.4f'))
You need: ax.yaxis.set_major_formatter(FormatStrFormatter('%3.4f'))
See updated graph:
https://imgur.com/a/RvO4z
In your code, you begin axis changes before you have actually plotted anything.
If you move your ax.plot(x, p(x), 'r--') to just below your add_subplot line this will work:
import numpy as np
from matplotlib import pyplot as plt
import datetime
import matplotlib
from matplotlib.ticker import FormatStrFormatter
df = pandas.DataFrame()
df['Time'] = pandas.Series(['23:2','22:1'])
df['Close'] = pandas.Series([114.,114.])
y = df['Close']
# calc the trendline http://stackoverflow.com/questions/26447191/how-to-add-trendline-in-python-matplotlib-dot-scatter-graphs
l = []
for t in df['Time']:
datetime_object = datetime.datetime.strptime(str(t), '%H:%M')
print datetime_object.hour
print datetime_object.minute
l.append((3600 * datetime_object.hour + 60 * datetime_object.minute))
x = l
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
fig = plt.figure()
ax = fig.add_subplot(111)
#Added:
ax.plot(x, p(x), 'r--')
#http://stackoverflow.com/questions/17709823/plotting-timestamps-hour- minute-seconds-with-matplotlib
plt.xticks(rotation=25)
ax = plt.gca()
ax.set_xticks(x)
xfmt = md.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(xfmt)
# REMOVED: ax.plot(x, p(x), 'r--')
# Changed: ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%3.4f'))
ax.yaxis.set_major_formatter(FormatStrFormatter('%3.4f'))
#http://stackoverflow.com/questions/29188757/matplotlib-specify-format-of- floats-for-tick-lables
plt.show()
I have a text file with two columns, x and y. I have plotted them using the below program in scipy as shown below.
import matplotlib.pyplot as plt
with open("data.txt") as f:
data = f.read()
data = data.split('\n')
x = [row.split(' ')[0] for row in data]
y = [row.split(' ')[1] for row in data]
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.set_title("Plot B vs H")
ax1.set_xlabel('B')
ax1.set_ylabel('H')
ax1.plot(x,y, c='r', label='the data')
leg = ax1.legend()
plt.show()
Now I would like to know how to interpolate several points between x=1 and x=5 with increment of around 0.1 on the same graph?
You can create a function using scipy.interp1d:
import numpy as np
from scipy import interpolate
data = np.genfromtxt('data.txt')
x = data[:,0] #first column
y = data[:,1] #second column
f = interpolate.interp1d(x, y)
xnew = np.arange(1, 5.1, 0.1) # this could be over the entire range, depending on what your data is
ynew = f(xnew) # use interpolation function returned by `interp1d`
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.set_title("Plot B vs H")
ax1.set_xlabel('B')
ax1.set_ylabel('H')
ax1.plot(x,y, c='r', label='the data')
ax1.plot(xnew, ynew, 'o', label='the interpolation')
leg = ax1.legend()
plt.show()
If you want to smooth your data, you can use the univariatespline, just replace the f = interpolate... line with:
f = interpolate.UnivariateSpline(x, y)
To change how much it smooths, you can fiddle with the s and k options:
f = interpolate.UnivariateSpline(x, y, k=3, s=1)
As described at the documentation