I want Python 3 to wait till the plot was displayed before the print statement is executed. Right now it looks like that:
This is the relevant part of my code:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(filename)
headers = list(df)
attribute = str(input("Which attribute would you like to analyze?\n"))
attribute_mean = df[attribute].mean()
attribute_std = df[attribute].std()
ax = df.plot.bar(x=headers[0], y=str(attribute), label=("_"+attribute))
ax.axhline(attribute_mean, color="red", label="mean")
ax.axhline((float(attribute_mean) + float(attribute_std)), color="green", label="std")
ax.axhline((float(attribute_mean) - float(attribute_std)), color="green", )
ax.legend(["mean", "+std", "-std"])
plt.ylabel(attribute)
plt.xlabel(headers[0])
plt.margins(x=0, y=0)
fig = plt.gcf()
leg = plt.legend(loc = 'upper right')
fig.show()
print("The presented data showed the following characteristic values:\n")
display(df[attribute].describe())
Is there a way to fix this?
Related
I usually don't ask questions on this platform, but I have a problem that quite bugs me.
Context
I have a function that plots data from a dataframe that has stockdata. It all works perfectly except for the fact that a second, empty window shows next to the actual graph whenever I execute this function. (image)
Here is all the relevant code, I'd be very grateful if some smart people could help me.
def plot(self):
plt.clf()
plt.cla()
colors = Colors()
data = self.getStockData()
if data.empty:
return
data.index = [TimeData.fromTimestamp(x) for x in data.index]
current, previous = data.iloc[-1, 1], data.iloc[0, 1]
percentage = (current / previous - 1) * 100
# Create a table
color = colors.decideColorPct(percentage)
# Create the table
fig = plt.figure(edgecolor=colors.NEUTRAL_COLOR)
fig.patch.set_facecolor(colors.BACKGROUND_COLOR)
plt.plot(data.close, color=color)
plt.title(self.__str2__(), color=colors.NEUTRAL_COLOR)
plt.ylabel("Share price in $", color=colors.NEUTRAL_COLOR)
plt.xlabel("Date", color=colors.NEUTRAL_COLOR)
ax = plt.gca()
ax.xaxis.set_major_formatter(plt_dates.DateFormatter('%Y/%m/%d %H:%M'))
ax.set_xticks([data.index[0], data.index[-1]])
ax.set_facecolor(colors.BACKGROUND_COLOR)
ax.tick_params(color=colors.NEUTRAL_COLOR, labelcolor=colors.NEUTRAL_COLOR)
for spine in ax.spines.values():
spine.set_edgecolor(colors.NEUTRAL_COLOR)
ax.yaxis.grid(True, color=colors.NEUTRAL_COLOR, linestyle=(0, (5, 10)), linewidth=.5)
plt.show()
Some notes:
Matplotlib never gets used in the program before this.
The data is standardized and consists of the following columns: open, low, high, close, volume.
The index of the dataframe exists of timestamps, which gets converted to an index of datetime objects at the following line: data.index = [TimeData.fromTimestamp(x) for x in data.index]
Remove plt.clf() and plt.cla() because it automatically creates window for plot when you don't have this window.
And later fig = plt.figure() creates new window which it uses to display your plot.
Minimal code for test
import matplotlib.pyplot as plt
import pandas as pd
data = pd.DataFrame({'x': [1,2,3], 'y': [2,3,1]})
#plt.clf()
#plt.cla()
fig = plt.figure()
plt.plot(data)
ax = plt.gca()
plt.show()
I am trying to add two custom arrows with labels in a seaborn relplot graph.
I tried using the matplot arrow function which is not working because the seaborne relplot is a "facetgrid". I did not see a specific arrow pointer function in seaborn docu. I want to draw an arrow at a specific x value between the y values of two benchmarks (b1 b2 in example)
Is there an easy way to do this?
I added a simple code example, data and 2 images of what i try to achieve.
CODE:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import gc
import sys
if __name__ == '__main__':
pfad = sys.argv[1]
label = 'test'
df1 = pd.read_csv(pfad, sep=';')
sns_plot = sns.relplot(x="selectivity", ci=None, y="throughput", hue='benchmark', kind="line", data=df1,
dashes=False, markers=True, style="benchmark")
sns_plot.savefig(label + ".png")
plt.savefig(label + ".pdf", bbox_inches='tight')
plt.show()
DATASET (example.csv in same folder)
benchmark;selectivity;throughput
b1;0.01;1426.89
b2;0.01;531.434
b1;0.03;826.89
b2;0.03;531.434
b1;0.05;626.89
b2;0.05;520.434
Currently my Graph looks like this:
This is what I want to achieve:
As mentioned in the comments, I flattened the axes, got the values from the line chart, and added text annotations and arrows respectively.
if __name__ == '__main__':
pfad = sys.argv[1]
label = 'test'
df1 = pd.read_csv(pfad, sep=';')
sns_plot = sns.relplot(x="selectivity", ci=None, y="throughput", hue='benchmark', kind="line", data=df1,
dashes=False, markers=True, style="benchmark")
xydata = []
for ax in sns_plot.axes.flat:
for li in ax.lines:
xydata.append(li.get_xydata())
ax.text(xydata[0][0][0]+0.001, (xydata[0][0][1]+xydata[1][0][1])/2, '2.5X speedup')
ax.text(xydata[1][2][0]+0.001, (xydata[0][2][1]+xydata[1][2][1])/2, '1.3X speedup')
ax.annotate('',
xy=(xydata[0][0][0], xydata[0][0][1]),
xytext=(xydata[0][0][0], xydata[1][0][1]),
xycoords='data',
arrowprops=dict(facecolor='black',width=2.0,headwidth=7.0,headlength=7.0,shrink=0.01))
ax.annotate('',
xy=(xydata[1][2][0], xydata[0][2][1]),
xytext=(xydata[1][2][0], xydata[1][2][1]),
xycoords='data',
arrowprops=dict(facecolor='black',width=2.0,headwidth=7.0,headlength=7.0,shrink=0.01))
#sns_plot.savefig(label + ".png")
#plt.savefig(label + ".pdf", bbox_inches='tight')
plt.show()
In a scatter plot created using px.scatter, how do I mark one data point with a red star?
fig = px.scatter(df, x="sepal_width", y="sepal_length")
# Now set a single data point to color="red", symbol="star".
This isn't really highlighting an already existing data point within a trace you've already produced, but rather adding another one with a different visual appearance. But it does exactly what you're looking for:
fig.add_trace(go.Scatter(x=[3.5], y=[6.5], mode = 'markers',
marker_symbol = 'star',
marker_size = 15))
Plot:
Complete code:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
df = px.data.iris() # iris is a pandas DataFrame
fig = px.scatter(df, x="sepal_width", y="sepal_length")
fig.add_trace(go.Scatter(x=[3.5], y=[6.5], mode = 'markers',
marker_symbol = 'star',
marker_size = 15))
fig.show()
This directly modifies the Scatter trace's Marker itself:
import plotly.express as px
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length")
trace = next(fig.select_traces())
# Modify kth point.
n = len(trace.x)
k = 136
color = [trace.marker.color] * n
color[k] = "red"
size = [8] * n
size[k] = 15
symbol = [trace.marker.symbol] * n
symbol[k] = "star"
# Update trace.
trace.marker.color = color
trace.marker.size = size
trace.marker.symbol = symbol
# Alternatively, call:
# fig.update_traces(marker=dict(color=color, size=size, symbol=symbol))
fig.show()
I want to draw multiple ternary graphs and thought to do this using matplotlib's subplot.
I'm just getting empty 'regular' plots though, not the ternary graphs I want in there. I found the usage of
figure, ax = plt.subplots()
tax = ternary.TernaryAxesSubplot(ax=ax)
so this seems to be possible, but can't really find out how to get this working. Any ideas?
Code I'm using:
I'm using a for loop as the data has columns named tria1-a, tria2-a, etc for the different triads
import ternary
import matplotlib.pyplot as plt
import pandas as pd
#configure file to import.
filename = 'somecsv.csv'
filelocation = 'location'
dfTriad = pd.read_csv(filelocation+filename)
# plot the data
scale = 33
figure, ax = plt.subplots()
tax = ternary.TernaryAxesSubplot(ax=ax, scale=scale)
figure.set_size_inches(10, 10)
tax.set_title("Scatter Plot", fontsize=20)
tax.boundary(linewidth=2.0)
tax.gridlines(multiple=1, color="blue")
tax.legend()
tax.ticks(axis='lbr', linewidth=1, multiple=5)
tax.clear_matplotlib_ticks()
#extract the xyz columns for the triads from the full dataset
for i in range(1,6) :
key_x = 'tria'+ str(i) + '-a'
key_y = 'tria' + str(i) + '-b'
key_z = 'tria' + str(i) + '-c'
#construct dataframe from the extracted xyz columns
dfTriad_data = pd.DataFrame(dfTriad[key_x], columns=['X'])
dfTriad_data['Y'] = dfTriad[key_y]
dfTriad_data['Z'] = dfTriad[key_z]
#create list of tuples from the constructed dataframe
triad_data = [tuple(x) for x in dfTriad_data.to_records(index=False)]
plt.subplot(2, 3, i)
tax.scatter(triad_data, marker='D', color='green', label="")
tax.show()
I had the same problem and could solve it by first "going" into the subplot, then creating the ternary figure in there by giving plt.gca() as keyword argument ax:
plt.subplot(2,2,4, frameon = False)
scale = 10
plt.gca().get_xaxis().set_visible(False)
plt.gca().get_yaxis().set_visible(False)
figure, tax = ternary.figure(ax = plt.gca(), scale = scale)
#now you can use ternary normally:
tax.line(scale * np.array((0.5,0.5,0.0)), scale*np.array((0.0, 0.5, 0.5)))
tax.boundary(linewidth=1.0)
#...
I'm trying to duplicate my y axis so that it appears on both the left and the right side of my graph (same scale on each side). I believe the correct way to do this is through the twiny method, but cannot get my head round it. Here is my current code:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def bar(data_df,
colour_df=None,
method='default',
ret_obj=False):
height = len(data_df.columns)*4
width = len(data_df.index)/4
ind = np.arange(len(data_df.index))
dat = data_df[data_df.columns[0]]
bar_width = 0.85
fig, ax = plt.subplots(figsize=(width,height))
ax1 = ax.bar(ind,dat,bar_width,color='y',log=True)
ax2 = ax1.twiny()
ax.tick_params(bottom='off', top='off', left='on', right='on')
plt.xticks(np.arange(len(data_df.index)) + bar_width,
data_df.index, rotation=67,ha='right')
ylab = 'Region Length (base pairs, log10)'
figname = 'bar' + method + '.png'
if ret_obj==False:
fig.savefig(figname,bbox_inches='tight',dpi=250)
print "Output figure:", figname
plt.close()
if ret_obj==True:
return fig
Which returns the following error when passed a dataframe:
AttributeError: 'BarContainer' object has no attribute 'twiny'
Having looked into it a bit further I believe that using the host/parasite methods would also work, but I'm a bit lost how I could fit it into my current code. Advice would be gratefully appreciated!
You don't have to use twiny in this case. It suffices to draw the labels on all sides:
bars = ax.bar(ind,dat,bar_width,color='y',log=True)
ax.tick_params(axis='both', which='both', labelbottom=True, labeltop=True,
labelleft=True, labelright=True)
I get following result with dummy data:
df = pd.DataFrame({"a": np.logspace(1,10,20)})
bar(df)