Finding average of multiple lines in matplotlib - python

I am trying to create a graph that takes the averages from the below lines and plot them on the same new graph of the same nature.
fig, ax = plt.subplots(1,1, figsize=(10,12))
yticks = np.arange(nchan) * 0.2
lines_BC = plt.plot(resampled_BC.T + yticks[np.newaxis], label = "BC", c='black')
lines_CP = plt.plot(resampled_CP.T + yticks[np.newaxis], label = "CP", c='red')
lines_CR = plt.plot(resampled_CR.T + yticks[np.newaxis], label = "CR", c='cyan')
lines_DC = plt.plot(resampled_DC.T + yticks[np.newaxis], label = "DC", c='blue')
lines_JC = plt.plot(resampled_JC.T + yticks[np.newaxis], label = "JC", c='lime')
ax.set_yticks(yticks)
ax.set_yticklabels(muscles)
ax.set_ylabel('Muscles')
ax.set_xlabel('Time (ms)')
leg = ax.legend([lines_BC[0]] + [lines_CP[0]] + [lines_CR[0]] + [lines_DC[0]] + [lines_JC[0]], ['BC', 'CP', 'CR', 'DC', 'JC'], loc='upper right')
plt.show()
I have tried using np.mean but to no luck.

Related

Animated/interactive plots for DBSCAN clustering

I have the code below. I'm trying to build an interactive DBSCAN clustering plot. When I run it, I get three plots but not interactive. Where is the problem in the code and how to fix it.
df_mv = pd.read_csv(r"https://raw.githubusercontent.com/HanaBachi/MachineLearning/main/multishape.csv") # load from Prof. Pyrcz's GitHub
df_mv.head()
text_trap = io.StringIO()
sys.stdout = text_trap
l = widgets.Text(value=' DBSCAN, Hana Bachi, The University of Texas at Austin',
layout=Layout(width='950px', height='30px'))
eps = widgets.FloatSlider(min=0, max = 2, value=0.1, step = 0.1, description = 'eps',orientation='horizontal', style = {'description_width': 'initial'}, continuous_update=False)
minPts = widgets.FloatSlider(min=0, max = 5, value=1, step = 1, description = 'minPts %',orientation='horizontal',style = {'description_width': 'initial'}, continuous_update=False)
color = ['blue','red','green','yellow','orange','white','magenta','cyan']
style = {'description_width': 'initial'}
ui = widgets.HBox([eps,minPts],)
ui2 = widgets.VBox([l,ui],)
# create activation function plots
def DBSCAN_plot(eps, minPts):
db = DBSCAN(eps=0.155, min_samples=5).fit(df_mv)
labels = db.labels_
# n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
# x = df_mv.values[:,0]
# y = df_mv.values[:,1]
# cmap = plt.cm.rainbow
#norm = mc.BoundaryNorm(labels, cmap.N)
plt.figure(figsize=(14,7))
plt.scatter(x, y, c=labels, cmap='tab10', s=50)
plt.scatter(x[np.where(labels==-1)], y[np.where(labels==-1)], c='k', marker='x', s=100)
plt.title('DBSCAN interactive plot', fontsize = 20)
plt.colorbar()
plt.show()
plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.0, wspace=0.2, hspace=0.3)
plt.show()
# create dashboard/formatting
uia = widgets.HBox([interactive_plot1],)
uia2 = widgets.VBox([eps, uia],)
uib = widgets.HBox([interactive_plot1],)
uib2 = widgets.VBox([minPts, uib],)
interactive_plot1 = widgets.interactive_output(DBSCAN_plot, {'eps': eps,'minPts':minPts})
interactive_plot1.clear_output(wait = True) #
How can I make this plot interactive in function of eps and minPts

Bar chart starting out of axis in python

I need to copy the bar chart in the image with python.
bar chart I have to copy
What I have been able to achieve is next image.
bar chart I have achieved
And the code I have used is:
import matplotlib.pyplot as plt
ausgaben = 130386
einnahmen = 147233
profit = einnahmen-ausgaben
titles = ["Ausgaben", "Profit", "Einnahmen"]
euros = [ausgaben, profit, einnahmen]
colors = ['#6F8CA7', '#F6BC06', '#59908F']
dummysum1 = []
dummysum2 = []
for i in range(len(euros)):
dummysum1.append(euros[i]+4000)
dummysum2.append(max(euros)+15000)
if euros[1] > 0:
dummysum1[1] = euros[1]+4000
if euros[1] <= 0:
dummysum1[1] = 4000
position1 = (euros[0]+euros[2])/2
percentile = (euros[2]-euros[0])/euros[0]*100
if percentile > 0:
label0 = '+{:.1f}%'.format(percentile)
else:
label0 = '{:.1f}%'.format(percentile)
fig, ax = plt.subplots(figsize=(7, 5))
fig.set_facecolor('#D0A210')
fig.patch.set_alpha(0.2)
ax.bar(titles[0], euros[0], alpha=0.6, color=colors[0])
ax.bar(titles[1], euros[1], alpha=0.6, color=colors[1])
ax.bar(titles[2], euros[2], alpha=0.6, color=colors[2])
plt.axhline(y=euros[0], color='#BCBCBC')
plt.axhline(y=euros[2], color='#BCBCBC')
ax.set_facecolor('#D0A210')
ax.patch.set_alpha(0.02)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.spines.right.set_visible(False)
ax.spines.left.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.text(titles[0], dummysum1[0], '{} €'.format(euros[0]), horizontalalignment='center')
ax.text(titles[1], dummysum1[1], '{} €'.format(euros[1]), horizontalalignment='center')
ax.text(titles[2], dummysum1[2], '{} €'.format(euros[2]), horizontalalignment='center')
ax.text(2.58, position1-1000, label0)
ax.text(titles[0], dummysum2[0], titles[0], horizontalalignment='center')
ax.text(titles[1], dummysum2[1], titles[1], horizontalalignment='center')
ax.text(titles[2], dummysum2[2], titles[2], horizontalalignment='center')
plt.show()
. How can I get the yellow bar chart starting at y=130386 instead of y=0 and the yellow arrow at the right hand side?
(The first question is the most important!)
Thank you all!
For the first question, just add a value for the bottom parameter. I have also added the arrow using annotate:
import matplotlib.pyplot as plt
ausgaben = 130386
einnahmen = 147233
profit = einnahmen-ausgaben
titles = ["Ausgaben", "Profit", "Einnahmen"]
euros = [ausgaben, profit, einnahmen]
colors = ['#6F8CA7', '#F6BC06', '#59908F']
dummysum1 = []
dummysum2 = []
for i in range(len(euros)):
dummysum1.append(euros[i]+4000)
dummysum2.append(max(euros)+15000)
if euros[1] > 0:
dummysum1[1] = euros[1]+4000
if euros[1] <= 0:
dummysum1[1] = 4000
position1 = (euros[0]+euros[2])/2
percentile = (euros[2]-euros[0])/euros[0]*100
if percentile > 0:
label0 = '+{:.1f}%'.format(percentile)
else:
label0 = '{:.1f}%'.format(percentile)
fig, ax = plt.subplots(figsize=(7, 5))
fig.set_facecolor('#D0A210')
fig.patch.set_alpha(0.2)
ax.bar(titles[0], euros[0], alpha=0.6, color=colors[0])
ax.bar(titles[1], euros[1], alpha=0.6, color=colors[1], bottom=ausgaben)
ax.bar(titles[2], euros[2], alpha=0.6, color=colors[2])
plt.axhline(y=euros[0], color='#BCBCBC')
plt.axhline(y=euros[2], color='#BCBCBC')
ax.set_facecolor('#D0A210')
ax.patch.set_alpha(0.02)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.spines.right.set_visible(False)
ax.spines.left.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.text(titles[0], dummysum1[0], '{} €'.format(euros[0]), horizontalalignment='center')
ax.text(titles[1], dummysum1[1]+ausgaben, '{} €'.format(euros[1]), horizontalalignment='center')
ax.text(titles[2], dummysum1[2], '{} €'.format(euros[2]), horizontalalignment='center')
ax.text(2.58, position1-1000, label0)
ax.text(titles[0], dummysum2[0], titles[0], horizontalalignment='center')
ax.text(titles[1], dummysum2[1], titles[1], horizontalalignment='center')
ax.text(titles[2], dummysum2[2], titles[2], horizontalalignment='center')
ax.annotate("", xy=(2.5, ausgaben+profit*1.05), xytext=(2.5, ausgaben), arrowprops=dict(arrowstyle="->", color="orange", lw=2.0))
plt.show()

Creating a legend, and printing a graph with python

I am wanting to add a legend to the graph below and download it as a pdf. The code I have for the graph is below.
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
mta = pd.read_csv('../project/all_mta_data_cleanded.csv') # mta data cleanded into similare catagories
cata = pd.read_csv('../project/mta_catagories_breakdown.csv') #document combining all of the catagories
weather = pd.read_csv('../project/New York Tempeture Data.csv')
mta['Total Lost Items'] = mta['Total Lost Items'] = (mta['Accessories']+ mta['Books'] + mta['Bags'] + mta['Cellphones'] + mta['Clothing'] + mta['Money'] + mta['Eletronics'] + mta['Entrainment'] + mta['Glasses'] + mta['Shoes'] + mta['Household Items'] + mta['Indentification'] + mta['Jewlery'] + mta['Keys'] + mta['Medical Equipment'] + mta['Miscellaneous'] + mta['Instruments'] + mta['NYCT Equipment'] + mta['Sports Equipment'] + mta['Tickets'] + mta['Tools'] + mta['Toys'] + mta['Wallets/Purses'])
clear = mta.iloc[[13, 40,68,92,115,138,157,176,200,222,241,245,246,250],:] #selected the last pull of each month
compl = pd.merge(clear,weather, left_on='Date',right_on='Time',how='outer').drop(columns=['Time'])
fig, ax1 = plt.subplots()
ax1.plot(compl['Date'], compl['Temp'] ,color = 'red',marker='o')
ax2= ax1.twinx()
ax2.plot(compl['Date'], compl['Total Lost Items'],color= 'purple',marker='^')
ax1.set_ylabel('Tempeture in Fahrenheit', )
for tick in ax1.get_xticklabels():
tick.set_rotation(90)
ax2.set_ylabel('Number Of Items Lost')
ax1.set_title('Average Weather In New Your City vs Total Items Lost Each Month')
plt.set_legend()
plt.figure.savefig('Project Figure.pdf')```
to add a legend to your graph you have to specify the field "label" in plt.plot(), use plt.legend() and plt.show()
fig, ax1 = plt.subplots()
l1 = ax1.plot(compl['Date'], compl['Temp'] ,color = 'red',marker='o', label = 'label_1')
ax2= ax1.twinx()
l2 = ax2.plot(compl['Date'], compl['Total Lost Items'],color= 'purple',marker='^', label = 'label_2')
ax1.set_ylabel('Temperature in Fahrenheit')
for tick in ax1.get_xticklabels():
tick.set_rotation(90)
ax2.set_ylabel('Number Of Items Lost')
ax1.set_title('Average Weather In New Your City vs Total Items Lost Each Month')
plt.legend([l1,l2],['lab1', 'lab2'])
plt.show()
plt.savefig('Project Figure.pdf')

Matplotlib: center Y-axis on 0

I've made a function to graph economic performance, but the output is often lopsided on the y-axis.
The below graph shows the problem. The range of y values makes the chart default to the max/min as the range of the y axis.
Is there any way to force the chart to center itself on 0, or do I need derive the max and min y values within the function?
The function is below. If you'd like me to replace the variables with values to repro the chart lmk- it's a bit of a task.
def recession_comparison(key, variable, dimension):
'''
Creates the "scary chart"- proportional growth for a single area/industry. All recessions included in chart.
Parameters:
key (str or int): area-fips or industry_code
variable (str): determines what economic indicator will be used in the timeline. Must be one of ['month3_emplvl' (employment), 'avg_wkly_wage' (wages), 'qtrly_estabs_count'(firms)]
dimension (str): dimension of data to chart.
Returns:
fig (matplotlib plot)
'''
fig, ax = plt.subplots(figsize =(15, 10))
if dimension == 'area':
index = 'area_fips'
title = 'Recession Comparison, ' + area_titles[key] + " (" + str(key) + ")"
elif dimension == 'industry':
index = 'industry_code'
title = 'Recession Comparison: ' + industry_titles[key] + " (" + str(key) + ")"
for recession in recessions_int.keys():
if recession == 'full':
break
loadpath = filepath(variable = variable, dimension = dimension, charttype = 'proportional', recession = recession, filetype = 'json')
df = pd.read_json(loadpath)
df.set_index(index, inplace = True)
ax.plot(df.loc[key][1:-1]*100, label = str(recession), linewidth = 1.5, alpha = 0.8)
ax.axvline(x = 6, color = 'black', linewidth = 0.8, alpha = 0.5, ls = ':', label = 'Event Quarter')
ax.axhline(y = 0, color = 'black', linewidth = 0.8, alpha = 0.5, ls = '--', label = 'Pre-Recession baseline')
ax.set_xlabel('Quarters since start of recession')
ax.set_ylabel('Growth: ' + var_display[variable])
ax.set_title(title)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.legend()
plt.show()
return fig
edit: full code solution from DapperDuck:
def recession_comparison(key, variable, dimension):
fig, ax = plt.subplots(figsize =(15, 10))
if dimension == 'area':
index = 'area_fips'
title = 'Recession Comparison, ' + area_titles[key] + " (" + str(key) + ")"
elif dimension == 'industry':
index = 'industry_code'
title = 'Recession Comparison: ' + industry_titles[key] + " (" + str(key) + ")"
for recession in recessions_int.keys():
if recession == 'full':
break
loadpath = filepath(variable = variable, dimension = dimension, charttype = 'proportional', recession = recession, filetype = 'json')
df = pd.read_json(loadpath)
df.set_index(index, inplace = True)
ax.plot(df.loc[key][1:-1]*100, label = str(recession), linewidth = 1.5, alpha = 0.8)
ax.axvline(x = 6, color = 'black', linewidth = 0.8, alpha = 0.5, ls = ':', label = 'Event Quarter')
ax.axhline(y = 0, color = 'black', linewidth = 0.8, alpha = 0.5, ls = '--', label = 'Pre-Recession baseline')
yabs_max = abs(max(ax.get_ylim(), key=abs))
ax.set_ylim(ymin=-yabs_max, ymax=yabs_max)
ax.set_xlabel('Quarters since start of recession')
ax.set_ylabel('Growth: ' + var_display[variable])
ax.set_title(title)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.legend()
plt.show()
return fig
Corrected image:
Add the following code right after ax.axhline(y = 0, color = 'black', linewidth = 0.8, alpha = 0.5, ls = '--', label = 'Pre-Recession baseline'):
yabs_max = abs(max(ax.get_ylim(), key=abs))
ax.set_ylim(ymin=-yabs_max, ymax=yabs_max)

Pyplot trendline does not appear over scatter plot but to the right of it?

I wrote code to create a scatter plot and got linear trendline equations, but pyplot won't put the trendline directly over the scatter plot.
Here is code (sorry if it is messy!):
for acc in accdict:
cc = 'b'
zz = 50
if str(accdict\[acc\]\[0\]) in str(alt):
cc = 'r'
for item in accdict\[acc\]\[1\]:
altx.append(float(item))
for item in accdict\[acc\]\[2\]:
alty.append(float(item))
if altcount < refcount:
zz = 100
else:
zz = 0
else:
for item in accdict\[acc\]\[1\]:
refx.append(float(item))
for item in accdict\[acc\]\[2\]:
refy.append(float(item))
plt.scatter(accdict\[acc\]\[1\], accdict\[acc\]\[2\], color=cc, zorder=zz)
#plt.plot(np.unique(altx), np.poly1d(np.polyfit(altx, alty, 1))(np.unique(altx)), color='r',zorder=zz)
(m,b) = np.polyfit(altx ,alty ,1)
p = np.poly1d((m,b))
plt.plot(altx, p(altx), color='r', zorder=z)
yp = np.polyval(\[m,b\], altx)
alteq = phen + ' = ' + str(round(m, 4)) + 'x' ' + ' + str(round(b, 4))
plt.plot(np.unique(refx), np.poly1d(np.polyfit(refx, refy, 1))(np.unique(refx)), color='b', zorder=zz)
(m,b) = np.polyfit(refx ,refy ,1)
yp = np.polyval(\[m,b\], refx)
refeq = phen + ' = ' + str(round(m, 4)) + 'x' ' + ' + str(round(b, 4))
plt.annotate(' ' + alteq, xy=(0.5, 0), xytext=(0, 10), xycoords=('axes fraction', 'figure fraction'), textcoords='offset points', size=8, ha='left', va='bottom', color='r')
plt.annotate(refeq + ' ',xy=(0.5, 0), xytext=(0, 10), xycoords=('axes fraction', 'figure fraction'), textcoords='offset points', size=8, ha='right', va='bottom', color='b')
plt.xlabel('Days')
plt.ylabel(phen)
red_patch = mpatches.Patch(color='red', label='Alt')
blue_patch = mpatches.Patch(color='blue', label='Ref')
plt.legend(handles=\[red_patch,blue_patch\],loc='best')
plt.title('Position:'+sheetname+' '+phen+' Over Time')
newpath = '/Users/elijahsaltzman/'+filename\[:-4\]+'-scatter/'+sheetname
if not os.path.exists(newpath):
os.makedirs(newpath)
plt.savefig(newpath + '/' + phen + '.png')
plt.close()

Categories