I try to plot 3 plots for each columns ('AFp1','AFp2','F9') in one figure with 'freqs' on the x axis and 'psd' on the y axis. I'm looking for a kind of loop through the variables because at the end I want to plot >50 plots in one figure.
Here I found a code that seems to do what I want but I don't get it to work:
num_plots = 20
colormap = plt.cm.gist_ncar
plt.gca().set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, num_plots))))
x = np.arange(10)
labels = []
for i in range(1, num_plots + 1):
plt.plot(x, i * x + 5 * i)
labels.append(r'$y = %ix + %i$' % (i, 5*i))
plt.legend(labels, ncol=4, loc='upper center',
bbox_to_anchor=[0.5, 1.1],
columnspacing=1.0, labelspacing=0.0,
handletextpad=0.0, handlelength=1.5,
fancybox=True, shadow=True)
plt.show()
Here is how I tried to include this code in my for loop:
path = r'C:/M'
for fil in os.listdir(path):
#extract SUBJECT name
r = (fil.split(" ")[0])
#load file in pandas dataframe
data = pd.read_csv(path+f'{r} task.txt',sep=",",usecols= 'AFp1','AFp2','F9'])
data.columns = ['AFp1','AFp2','F9']
num_plots = 3
for columns in data(1, num_plots + 1):
freqs, psd = signal.welch(data[columns], fs=500,
window='hanning',nperseg=1000, noverlap=500, scaling='density', average='mean')
colormap = plt.cm.gist_ncar
plt.gca().set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, num_plots))))
plt.plot(freqs, psd)
plt.legend(columns, ncol=4, loc='upper center',
bbox_to_anchor=[0.5, 1.1],
columnspacing=1.0, labelspacing=0.0,
handletextpad=0.0, handlelength=1.5,
fancybox=True, shadow=True)
plt.title(f'PSD for {r}')#, nperseg=1000, noverlap=500
plt.xlabel('Frequency [Hz]')
plt.ylabel('Power [V**2/Hz]')
plt.axis([0,50, -1, 5])
plt.show()
I get the following error:
for columns in data(1, num_plots + 1):
TypeError: 'DataFrame' object is not callable
If anyone could tell me how I can make it work, it would be great :D
Thank you very much,
Angelika
Shoaib's answer finally worked. Thank you very much:
"you should only use plt.show() once, so put it outside of for loop. your error is because data is an array but you used it as a function like data(something). you should see what is dimensions of data and then try to select columns or values using data[ something ] not data( something ). check dimensions of data using codes like print(data) or print(data[0]) or print(len(data)) or print(len(data[0])) etc. it will help you in debugging your code "
here is how you plot three functions in a figure
import matplotlib.pyplot as plt
from math import *
x_lim = 6
n = 1000
X = []
Y1 = []
Y2 = []
Y3 = []
for i in range(n):
x = x_lim * (i/n-1)
y1 = sin(x)
y2 = cos(x)
y3 = x**2
X.append( x )
Y1.append( y1 )
Y2.append( y2 )
Y3.append( y3 )
plt.plot(X,Y1)
plt.plot(X,Y2)
plt.plot(X,Y3)
plt.title("title")
plt.xlabel("x")
plt.ylabel("y")
plt.show()
your question was not reproducible, because the file you are getting your data from is not entailed. so we can not reproduce your error with copy paste your code. but if you have an array namely data with for example 4 columns then you can separate each columns then plot them
for row in data:
x.append( row[0] )
Y1.append( row[1] )
Y2.append( row[2] )
Y3.append( row[3] )
Related
I am attempting to plot multiple line graphs in a graph table itself. However, I run into an error that mentioned:
No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
Not only this happened but my legend tables of the 3 lines don't merge together and my X-axis does not show the months but random numbers from my dataframe. Here is my code and graph result to look through.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_excel (r'C:\Users\admin\Desktop\Question Folder\Sales of top 30 customers.xlsx')
#Refine and adjust the dataframe for suitable manipulation
df = df.drop('Unnamed: 0', axis = 1)
df = df.iloc[2: , :]
row_detail = df.head(1).values.tolist()
row_detail = row_detail[0]
a = df.iloc[-3:, :].values.tolist()
a = a[0]
df.columns = row_detail
df = df.iloc[1:, :]
print(df) # This is for checking purpose
# This creates a dataframe needed for the practice
df1 = df.iloc[:3]
# This is to plot a line graph from df1
df_chosen = df1
a = 0
# Turning data row of a customer into a list
data_row_1 = df_chosen.iloc[a].values.tolist()
data_row_2 = df_chosen.iloc[a + 1].values.tolist()
data_row_3 = df_chosen.iloc[a + 2].values.tolist()
date = data_row_1[1:]
cus_1 = data_row_1[0]
cus_2 = data_row_2[0]
cus_3 = data_row_3[0]
y1 = data_row_1[1:]
y2 = data_row_2[1:]
y3 = data_row_3[1:]
x = np.arange(len(date)) # the label locations
width = 0.60 # the width of the bars
fig, ax = plt.subplots()
# Increase size of plot in jupyter
plt.rcParams["figure.figsize"] = (20,15)
plt.rcParams.update({'font.size':25})
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel('Months', fontsize=30)
ax.set_ylabel('Sales', fontsize=30)
ax.set_title('Monthly Sales from ' + cus_1 +", " + cus_2+ " and " + cus_3, fontsize=30)
ax.set_xticks(x, date)
ax.set_ylim(bottom = 0, top = 1000)
legend1 = plt.legend(())
ax.legend(loc='best', fontsize=30)
plt.grid(True)
# set up the 1st line graph
ax.plot(x, y1, "r", label = cus_1, marker='x')
#ax.set_yticks(
ax.grid(True) # turn on grid #1
ax.set_ylim(bottom = 0, top = 1000)
ax.legend(loc='upper left', fontsize=25)
ax2 = ax.twinx()
ax2.plot(x, y2, "b", label= cus_2, marker='x')
ax2.set_yticks([])
ax2.grid(False) # turn off grid #2
ax2.set_ylim(bottom = 0, top = 10000)
ax2.legend(loc='upper left', fontsize=25)
ax3 = ax2.twinx()
ax3.plot(x, y3, "g", label= cus_3, marker='x')
ax3.set_yticks([])
ax3.grid(False) # turn off grid #2
ax3.set_ylim(bottom = 0, top = 10000)
ax3.legend(loc='upper left', fontsize=25)
I just need to understand and know the solutions for the following:
Why is the X-axis not showing the months' names?
Why is the 3 separate legend tables not connected together?
How do I avoid the 'No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.' error warning?
Hope to receive a favorable reply soon. :)
Edit notice: Here is the dataframe used for this problem:
I want to read an Excel file, sum the values for the years 2021, 2020 and 2019 for the locations from the same region (region B) and then create a graph with two lines (for region A and B) which will show how the values for both regions have changed during the years.
I tried with this code:
import matplotlib.pyplot as plt
import pandas as pd
excel_file_path = "Testfile.xlsx"
df = pd.read_excel(excel_file_path)
x = ["2021", "2020", "2019"]
y1 = df_region["Values2021"]
y2 = df_region["Values2020"]
y3 = df_region["Values2019"]
fig = plt.figure(figsize=(20,5))
plt.plot(x, y1, color = 'red', label = "A")
plt.plot(x, y2, color = 'blue', label = "B")
plt.legend(loc='best')
plt.show()
But it isn't working for me - I get the following error:
"Exception has occurred: ValueError
x and y must have same first dimension, but have shapes (3,) and (2,)"
What do I need to do to get the result that I want? Any help would be greatly appreciated.
I think you meant to define your y1 and y2 a little differently.
x = ["2021", "2020", "2019"]
fig = plt.figure(figsize=(20,5))
colors = ['red', 'blue']
for i, region in enumerate(df_region.index):
y = df_region.loc[region, :]
plt.plot(x, y, color = colors[i], label = region)
plt.legend(loc='best')
plt.show()
which plots AND reads the region names from the DataFrame.
I have a scatter plot generated using:
x = list(auto['umap1'])
y = list(auto['umap2'])
final_df2 = pd.DataFrame(list(zip(x,y,communities)), columns =['x', 'y', 'cluster'])
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s = 0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = range(max(cluster_list)))
plt.show()
I wanted to know how can I add the colorbar labels (numbers from 1-31) to the actual clusters on the graph (as text) that each one corresponds to. This is because it is quite hard to tell this from the colours as they loop back to red.
I tried:
n = list(final_df2['cluster'])
for i, txt in enumerate(n):
ax.annotate(txt, (y[i], x[i]))
But this is giving me no luck.
Your code for the annotations is writing an annotation for each and every dot. This just ends in a sea of numbers.
Somehow, you should find a kind of center for each cluster, for example by averaging all the points that belong to the same cluster.
Then, you use the coordinates of the center to position the text. You can give it a background to make it easier to read.
As I don't have your data, the code below simulates some points already around a center.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# calculate some random points to serve as cluster centers; run a few steps of a relaxing algorithm to separate them a bit
def random_distibuted_centers():
cx = np.random.uniform(-10, 10, MAX_CLUST + 1)
cy = np.random.uniform(-10, 10, MAX_CLUST + 1)
for _ in range(10):
for i in range(1, MAX_CLUST + 1):
for j in range(1, MAX_CLUST + 1):
if i != j:
dist = np.linalg.norm([cx[i] - cx[j], cy[i] - cy[j]])
if dist < 4:
cx[i] += 0.4 * (cx[i] - cx[j]) / dist
cy[i] += 0.4 * (cy[i] - cy[j]) / dist
return cx, cy
N = 1000
MAX_CLUST = 31
cx, cy = random_distibuted_centers()
# for demonstration purposes, just generate some random points around the centers
x = np.concatenate( [np.random.normal(cx[i], 2, N) for i in range(1,MAX_CLUST+1)])
y = np.concatenate( [np.random.normal(cy[i], 2, N) for i in range(1,MAX_CLUST+1)])
communities = np.repeat(range(1,MAX_CLUST+1), N)
final_df2 = pd.DataFrame({'x':x, 'y':y, 'cluster': communities})
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s=0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = cluster_list)
bbox_props = dict(boxstyle="circle,pad=0.3", fc="white", ec="black", lw=2, alpha=0.9)
for i in range(1,MAX_CLUST+1):
ax.annotate(i, xy=(cx[i], cy[i]), ha='center', va='center', bbox=bbox_props)
plt.show()
I am not sure as to why this happens. Maybe it is just a simple mistake that I cannot see, but by using this code:
for filename in glob.glob('/Users/jacob/Desktop/MERS/new/NOT COAL/gensets/statistics_per_lgu/per_lgu_files/*.csv'):
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.read_csv(filename)
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2, 2, i + 1)
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(df['New Capacity based on 0.8 PF'], bins=binwidth,
color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
ax.axvline(x=df['New Capacity based on 0.8 PF'].median(), linestyle='dashed', alpha=0.3, color='blue')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].median(),y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=df['New Capacity based on 0.8 PF'].mean(), linestyle='dashed', alpha=0.9, color='green')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].mean(),y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('/Users/jacob/Documents/Gensets_gis/historgrams/per_lgu_files/{}.png'.format(name))
I get all files created like this attached photo here.
Any ideas as to what I've done wrong?
Thanks in advance.
attached photo of one histogram output
My desired result would be something like this.
Desired output
It doesn't create new subplots but it use previous ones and then it draw new plots on old plots so you have to use clear subplot before you draw new histogram.
ax = plt.subplot(2, 2, i + 1)
ax.clear()
Example code. It gives desired output but if you remove `ax.clear() then first image will be OK but you get new plot with old plots on second and third image.
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
for n in range(3):
filename = f'example_data_{n}.csv'
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.DataFrame({'New Capacity based on 0.8 PF': random.choices(list(range(1000)), k=100)})
data = df['New Capacity based on 0.8 PF']
median = data.median()
mean = data.mean()
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2,2,i+1)
ax.clear() # <--- it removes previous histogram
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(data , bins=binwidth, color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
min_ylim, max_ylim = plt.ylim()
ax.axvline(x=median, linestyle='dashed', alpha=0.3, color='blue')
ax.text(x=median, y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=mean, linestyle='dashed', alpha=0.9, color='green')
ax.text(x=mean, y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('{}.png'.format(name))
I'm trying to do PCA analysis for our Repertory Grid Tool. I have a matrix which contains all the info I need, however I want to put the names of the alternatives(column names) on the dots in the analysis. My code is something like this:
matrixAlternatives= transpose(matrixAlternatives)
var_grid = np.array(matrixAlternatives)
#improve output readability
np.set_printoptions(precision=2)
np.set_printoptions(suppress=True)
print "var_grid:"
print var_grid
#Create the PCA node and train it
pcan = mdp.nodes.PCANode(output_dim=2, svd=True)
pcar = pcan.execute(var_grid)
print "\npcar"
print pcar
print "\neigenvalues:"
print pcan.d
print "\nexplained variance:"
print pcan.explained_variance
print "\neigenvectors:"
print pcan.v
#Graph results
#pcar[3,0],pcar[3,1] has the projections of alternative3 on the
#first two principal components (0 and 1)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(pcar[:, 0], pcar[:, 1], 'r^')
ax.plot(pcan.v[:,0], pcan.v[:,1], 'ro')
#draw axes
ax.axhline(0, color='black')
ax.axvline(0, color='black')
#annotations each concern
id=0
for xpoint, ypoint in pcan.v:
ax.annotate('C{:.0f}'.format(id), (xpoint, ypoint), ha='center',
va='center', bbox=dict(fc='white',ec='none'))
id+=1
#calculate accounted for variance
var_accounted_PC1 = pcan.d[0] * pcan.explained_variance * 100 /(pcan.d[0] + pcan.d[1])
var_accounted_PC2 = pcan.d[1] * pcan.explained_variance * 100 /(pcan.d[0] + pcan.d[1])
#Show variance accounted for
ax.set_xlabel('Accounted variance on PC1 (%.1f%%)' % (var_accounted_PC1))
ax.set_ylabel('Accounted variance on PC2 (%.1f%%)' % (var_accounted_PC2))
canvas = FigureCanvas(fig)
response = HttpResponse(content_type='image/png')
canvas.print_png(response)
fig.clf()
plt.close()
plt.clf()
del var_grid
gc.collect()
return response
If I understand you correctly you just need to annotate your plots using the column heading. Here is a minimal example:
import matplotlib.pylab as plt
import numpy as np
x = np.linspace(0, 10 ,100)
y = np.sin(x)
plt.plot(x, y , "ro")
plt.annotate(s=" some string", xy=(x[25], y[25]))
You will need to add some formatting I suspect to get the strings in the correct place.