How to name graphs in legend based on folder's name - python

I have plotted a graph for different sub-folders inside of a directory and I want to put a legend of each graph based on the name of the folder. I mean, the plot legend is being to be written according to the folder's name. The code for plotting is below:
from __future__ import division
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
import seaborn as sns
from scipy import stats
from scipy.stats.kde import gaussian_kde
root = r'C:\Users\Hasan\Desktop\output\new our scenario\beta 15\test'
mean_cu=[]
my_list = os.listdir(root)
my_list = [file for file in my_list if os.path.isdir(os.path.join(root, file))]
for directory in my_list:
CASES = [file for file in os.listdir(os.path.join(root, directory)) if file.startswith('config')]
if len(CASES)==0:
continue
maxnum = np.max([int(os.path.splitext(f)[0].split('_')[1]) for f in CASES])
CASES = ['configuration_%d.out' % i for i in range(maxnum)]
mean_cu=[]
for i, d in enumerate(CASES):
a = np.loadtxt(os.path.join(root, directory,d)).T
num = os.path.splitext(d)[0]
local_cu = np.abs(a[4])
mean_curv.append(np.mean(local_cu))
pdf = stats.norm.pdf(mean_cu)
Time = np.arange(0,len(pdf))
plt.plot(Time,pdf)

From what I understand, you have different graphs, and for each of them you want a different legend based on current folder. You can set legend by adding label=directory in plot method. Maybe you should first extract current folder (using split or different method) if you don't want full directory. That depends on your directory variable.
Consider following example:
import matplotlib.pyplot as plt
import pandas as pd
legend1 = ["1", "2"]
df = pd.DataFrame({"A":[4,5], "B":[6,7]})
for item in legend1:
fig, ax = plt.subplots()
ax.plot(df["A"], df["B"], label=item)
ax.legend(loc='upper left', frameon=False)
will result the following two graphs:
As you can see the only different is the legend, that was set by legend1 list. You can make it your directories.

Related

execute .py file, call python function in for loop

I have a function 'plot_rdm', which creates a plot and saves it as 'rdm.png'. I want several of these plots to be formed, each using a different .json file - so I have the function plot_rdm saved in 'plotrdm.py'.
In the saverdm.py file - I defined the filepath of the .json file I want to create a plot from and then called the plot_rdm function, looping over all of the files I want to create a plot from:
#import libraries
import numpy as np
import matplotlib
import matplotlib.pyplot
import matplotlib.pyplot as plt
import scipy
import scipy.spatial
import scipy.spatial.distance as sd
from scipy.spatial.distance import squareform
import os
import json
# define fpath
#i.e. fpath[0] will be the first filepath...
path = './RDM_researchproject'
rootdir = path
filepath = []
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if file.startswith('Meadows'):
count=0 # count default
filepath.append(os.path.join(subdir, file))
fpath = filepath[count]
os.system("/home/taran/RDM_researchproject/AVIMA/plotrdm.py")
count +=1
The plotrdm.py file with the plot_rdm function is as follows:
def plot_rdm(fpath):
import numpy as np
import matplotlib
import matplotlib.pyplot
import matplotlib.pyplot as plt
import scipy
import scipy.spatial
import scipy.spatial.distance as sd
from scipy.spatial.distance import squareform
import json
with open(fpath) as fhandle:
data = json.load(fhandle)
#inspect rdm stimuli labels
stim = data['stimuli']
#contain all labels for y axis and x axis seperately
y_names = []
for i in stim:
y_names.append(i['name'])
x_names = []
for i in stim:
x_names.append(i['name'])
#create rdm array and squareform
rdm_array = np.array(data['rdm'])
srdm = squareform(rdm_array)
#label x and y axis on rdm
fig, ax = plt.subplots()
rdm = ax.imshow(srdm)
ax.set_xticks(np.arange(len(x_names)))
ax.set_yticks(np.arange(len(y_names)))
ax.set_xticklabels(x_names)
ax.set_yticklabels(y_names)
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
plt.plot(srdm)
plt.imshow(srdm)
plt.colorbar(mappable = None, cax = None, ax = None)
fig.subplots_adjust(bottom=0.23)
import matplotlib.pyplot as plt
plt.savefig('rdm.png')
I am able to create the plots individually (i.e. when I don't call the plot_rdm function and loop over the files but I specify the filepath each time). But when I use the following code, I get an empty plot forming in the AVIMA folder. I'm not sure what's wrong in the saverdm file making this happen?
https://github.com/Taranks7/RDM_researchproject If I haven't explained what's going on well, this is the project I'm working on.
Thank you
When you want to call a python function from another file, you should not try to run another python process by calling os.system. Just import that function:
from plotrdm import plot_rdm
Instead of using os.filewalk and a file.startswith check, we can cleanup the code a lot by using the nice python library glob. I throw in a enumerate for good measure.
Your new rdmsave.py
import glob
from plotrdm import plot_rdm
basedir = "."
if __name__ == "__main__":
count = 0
for count, path in enumerate(sorted(glob.glob(f'{basedir}/**/Meadow*.json', recursive=True)), start=1):
print(f"processing {path}")
output_image = f'rdm_{count - 1:02}.png'
print(f"output image will be {output_image}")
plot_rdm(path, output_image)
print(f"processed {count} files")
Note that you may need to change basedir back to your local path.
And your plotrdm.py becomes:
import numpy as np
import matplotlib
import matplotlib.pyplot
import matplotlib.pyplot as plt
import scipy
import scipy.spatial
import scipy.spatial.distance as sd
from scipy.spatial.distance import squareform
import json
import matplotlib.pyplot as plt
def plot_rdm(fpath, output_filename):
with open(fpath) as fhandle:
data = json.load(fhandle)
# inspect rdm stimuli labels
stim = data['stimuli']
# contain all labels for y axis and x axis seperately
y_names = []
for i in stim:
y_names.append(i['name'])
x_names = []
for i in stim:
x_names.append(i['name'])
# create rdm array and squareform
rdm_array = np.array(data['rdm'])
srdm = squareform(rdm_array)
# label x and y axis on rdm
fig, ax = plt.subplots()
rdm = ax.imshow(srdm)
ax.set_xticks(np.arange(len(x_names)))
ax.set_yticks(np.arange(len(y_names)))
ax.set_xticklabels(x_names)
ax.set_yticklabels(y_names)
plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")
plt.plot(srdm)
plt.imshow(srdm)
plt.colorbar(mappable=None, cax=None, ax=None)
fig.subplots_adjust(bottom=0.23)
plt.savefig(output_filename)
I added the second argument output_filename to the plot_rdm function to make it possible to store each image in a new file.
The output on my machine reads
processing ./5/Meadows_avima-image-version1_v_v2_vital-macaw_2_tree.json
output image will be rdm_00.png
processing ./4/Meadows_avima-image-version1_v_v2_quick-louse_2_tree.json
output image will be rdm_01.png
processing ./1/Meadows_avima-image-version1_v_v2_better-hound_2_tree.json
output image will be rdm_02.png
processing ./3/Meadows_avima-image-version1_v_v2_huge-falcon_2_tree.json
output image will be rdm_03.png
processing ./2/Meadows_avima-image-version1_v_v2_guided-koi_2_tree.json
output image will be rdm_04.png
processed 4 files
And 4 png files are created in the current folder.

how to get the line graph for the multiple csv files using python?

enter image description hereLets say I have multiple csv files for test_suites UPT_Synergy_graph22.csv, UPT_C7000_graph22.csv, SAT-Synergy-gen2_graph22.csv, like this i have 10 more csv files, which are having the same columns in all the files -build_id and pass percent. I need to plot the line graph for all those files. where build id is x-axis and pass-percent is y-axis. I need to get line graph for each csv file (mean for each test suite).
I was able to get the graph for only one csv file, i am not able to fetch the results for all.
please help me to resolve this. below is the code i have used. or suggest me with any other module that can fit.
import pandas as pd
import pygal
from pygal.style import Style
data_frame=pd.read_csv('C:\\Users\\shivarad\\Documents\\Scale_graph22.csv', dtype={"Pass
Percentage":int,"original_pass_percent":int})
a = []
b = []
line_chart=pygal.Line()
#line_chart.title='Graphical Representation of the pass percentage for a build ID---TEST SUITE: SCALE'
line_chart.x_labels=data_frame['build ID']
for index,row in data_frame.iterrows():
a.append(row["Pass Percentage"])
b.append(row["original_pass_percent"])
line_chart.add("Pass Percentage",a)
line_chart.add("original_pass_percent",b)
line_chart.render_in_browser()
#bar_chart.title='Graphical Representation of the pass percentage for a build ID---TEST SUITE: SCALE'
bar_chart = pygal.stackedBar(height=360, width=440,explicit_size=True)
bar_chart.title='Graphical Representation of the pass percentage for a build ID---TEST SUITE: SCALE'
bar_chart.x_labels=data_frame['build ID']
for index,row in data_frame.iterrows():
a.append(row["Pass Percentage"])
b.append(row["original_pass_percent"])
# adding the apeended list
bar_chart.add('Pass Percentage', a)
bar_chart.add('original_pass_percent', b)
# rendering the file
bar_chart.render_in_browser()}
Answering: I need to plot the line graph for all those files. where build id is x-axis and pass-percent is y-axis.
This code will do the following:
Create a list of the pertinent files
Iterate through each file
Create a dataframe
Plot the two specified columns from the dataframe with a label
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
%matplotlib inline
# graphing parameters
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (16.0, 10.0)
p = Path(r'c:\Users\shivarad\Documents') # path to files
files = list(p.rglob('*graph22.csv')) # get files
# everything for here down, belongs in one Jupyter cell
plt.figure()
for f in files: # iterate through files
file_name = f.stem # get filename
df = pd.read_csv(f, dtype={'Pass Percentage': int, 'original_pass_percent': int}) # create dataframe
print(df.head()) # this is here to verify df has data; it can be commented out or removed
plt.plot('build ID', 'Pass Percentage', data=df, label=file_name) # plot the data from each file
plt.legend(bbox_to_anchor=(1.04, 0.5), loc='center left')
plt.savefig('test.jpg') # verify there's plot in the file
plt.show() # outside the loop

pie chart label overlap

I am having problems generating a graph which doesn't overlap with text both for percentages and country codes, I am using python3 matplotlib, see image below.
Any ways of fixing this issue even if it changes layout are appreciated.
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
import json
countries = []
import os
path = 'data_used_for_graph'
entries = os.listdir(path)
for file in entries:
with open(path+"/"+file) as f:
content = json.load(f)
for x in content['personalNames']:
countries.append(x['countryOrigin'])
counts = Counter(countries)
labels, values = zip(*counts.items())
# sort your values in descending order
indSort = np.argsort(values)[::-1]
# rearrange your data
labels = np.array(labels)[indSort]
values = np.array(values)[indSort]
# Data to plot
sizes = values
# Plot
plt.pie(sizes, labels=labels,autopct='%1.1f%%', shadow=True, startangle=140)
plt.show()

Multiple PIL images into one single plot

I have a n amount of images with a format:
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=64x64 at 0x7F555F6E3898>
all together in a list.
I can visualize each individually but I want to visualize them all together, one beside another until it gets too long and then go to the next row (for instance, n/4 rows with 4 images in each row). Once this is done, I would like to save it as a jgp as well.
I tried using subplot from matplotlib but it says that the values are unhashable.
My code is:
from os import listdir
from os.path import isfile, join
files = [f for f in listdir(mypath) if isfile(join(mypath, f))]
files.sort()
files.sort(key = len)
im = []
for jpg in files:
im.append(Image.open(mypath+'/'+jpg))
for i in im:
image = np.asarray(i)
plt.subplot(3,np.floor(len(image)/3),image)
plt.show()
update your usage for subplot, then it should be good. please see below sample code.
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
# get sample data
from sklearn import datasets
x,y=datasets.load_digits(n_class=10, return_X_y=True)
x = x[0:100,:]
x = np.reshape(x,[100,8,8])
# plot
for i in range(x.shape[0]):
if i >= 9:
break
image = x[i,:].squeeze()
plt.subplot(3,3,i+1)
plt.imshow(image,cmap='gray',interpolation='none')
alternatively:
# https://matplotlib.org/mpl_toolkits/axes_grid/users/overview.html
from mpl_toolkits.axes_grid1 import ImageGrid
fig = plt.figure(1,(10,10))
grid = ImageGrid(fig, 111,
nrows_ncols=(2,7),
axes_pad=0.1,
)
for i in range(14):
image = x[i,:].squeeze()
grid[i].imshow(image,cmap='gray',interpolation='none')

Plot multiple csv files with Python/matplotlib loop

I have a directory filled with multiple .csv files, each only has two columns (date and an integer). I am trying to get this code to loop over each file and plot them individually so that there is a corresponding .png to each .csv. Every time it runs, I end up with the correct number of .png files, but each has exactly the same data. I have already inplemented the plt.clf() method to clear it for each loop, but it doesn't work. Here is the code:
import numpy as np
import pylab as pl
import matplotlib.pyplot as plt
import datetime as DT
import matplotlib.dates as mdates
import scipy
import os
import glob
rootdir='/path/to/file'
for infile in glob.glob( os.rootdir.join(rootdir, '*.csv.out') ):
output = infile + '.out'
data= np.loadtxt(infile, delimiter=',',
dtype={'names': ('date', 'session'),'formats': ('S10', 'i4')} )
#Organizes 2-column spreadsheet
dates, sessions = map(list, zip(*data))
print dates, sessions
x = [DT.datetime.strptime(date,"%m-%d-%y") for date in dates]
y = [sessions]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.xaxis_date()
ax.grid()
#Fills space under plotted line
ax.fill_between(x, sessions, color='blue')
# slants the x axis
fig.autofmt_xdate()
plt.plot(x,sessions)
plt.xlabel('Date')
plt.ylabel('Sessions')
plt.title('Peak Usage')
fileNameTemplate = r'\path\to\file\Plot{}.png'
for subdir,dirs,files in os.walk(rootdir):
for count, file in enumerate(files):
pl.savefig(fileNameTemplate.format(count), format='png')
pl.clf()
I modeled the enumerator after a solution in this answer but I am still getting an issue.
You need to:
define a function for your plots
call that function from your loop
include plt.close() at the end of said function.
Right now, you're not creating new plots as you walk the directory. The plot command needs to be inside the loop.
def plot():
#do your plotting in here. If this is being called from a loop and the
#variables used herein are defined before, it will use the
#global values as they exist at the time. You can also end this function with
fig.savefig(**args)
plt.close()
for count, file in enumerate(files):
plot()

Categories