Only last graph is getting pasted in pdf file in python - python

I am reading the parameters from different CSV files and creating the graphs after comparing the parameters across the CSVs. The problem is only last graph is getting pasted in PDF for the last parameter.
with PdfPages('example.pdf') as pdf:
for arg in sys.argv[1:]:
file_reader= open(arg, "rt", encoding='ascii')
read = csv.reader(file_reader)
for row in read:
if operation_OnDut in row:
column_Result = row[10]
resultOfOperations_OnDut_List.append(column_Result)
buildNumber = row[0]
buildName_List.append(buildNumber)
N = len(resultOfOperations_OnDut_List)
ind = np.arange(N)
#Draw graph for operations performed in that TEST CASE
y = resultOfOperations_OnDut_List
width = .1
fig, ax = plt.subplots()
plt.bar(ind, y, width, label = column_Parameters, color="blue")
plt.xticks(ind, buildName_List)
plt.title("Performance and Scale")
plt.ylabel('Result of Operations')
plt.xlabel('Execution Builds')
plt.legend()
plt.tight_layout()
pdf.savefig()
plt.close()
resultOfOperations_OnDut_List = []
buildName_List = []

You probably got the indentation wrong...
Try
with PdfPages('example.pdf') as pdf:
for arg in sys.argv[1:]:
file_reader= open(arg, "rt", encoding='ascii')
read = csv.reader(file_reader)
for row in read:
if operation_OnDut in row:
column_Result = row[10]
....
# one level deeper
N = len(resultOfOperations_OnDut_List)
ind = np.arange(N)
#Draw graph for operations performed in that TEST CASE
...
Note that the section starting with N = len(resultOfOperations_OnDut_List) has been shifted four spaces to the left to be within the first for loop. If you want it to be within the second for loop add four more spaces.

Related

Skip first several rows when plotting a CSV-file

For a project at work I'm working on a code that reads a csv-file and generates a plot.
My problem is, I work with multiple csv-files but all of them contain 10-40 rows in the beginning, filled with device and sensor information.
I would wish for my code to detect where the first line of values are and to start reading the values from there into my array. But since my experience with Python is very low, I couldnt find a good solution.
If you can recommend me specific methods or change my code, feel free to comment.
Thanks to everyone taking their time to help me
import matplotlib.pyplot as plt
import csv
a = []
b = []
c = []
d = []
e = []
with open('PATH','r') as csvfile:
lines = csv.reader(csvfile, delimiter=',')
for row in lines:
a.append(float(row [0]))
b.append(float(row [1]))
#c.append(float(row [2]))
#d.append(float(row [3]))
#e.append(float(row [4]))
f = plt.figure()
f.set_figwidth(12)
f.set_figheight(8)
#plt.plot(X-Achse, Y-Achse, linewidth=* , color = ' ', label = " ")
plt.plot(a, b, linewidth=0.35, color = 'b', label = "Sensor 1")
#plt.plot(a, c, linewidth=0.35, color = 'g', label = "Sensor 2")
plt.title('Pressure Report', fontsize = 20)
plt.xlabel('Time(s)')
plt.ylabel('Pressure(bar)')
plt.grid()
plt.legend()
plt.show()
You can skip the lines using conditional statements as below:
count = 1
for row in lines:
if (count < 10 and count > 40):
a.append(float(row [0]))
b.append(float(row [1]))
count += 1
What is working for me are the following changes. It may not be the fastest and best solution, but it does what its supposed to.
import matplotlib.pyplot as plt
import csv
path = 'PATH'
line_number = 0
list_of_results = []
count = 1
a = []
b = []
c = []
d = []
e = []
with open(path, 'r') as read_obj:
for line in read_obj:
line_number += 1
if "0.000000000" in line:
list_of_results.append((line_number))
firstline = list_of_results[0]-1
with open(path,'r') as csvfile:
lines = csv.reader(csvfile, delimiter=',')
for row in lines:
if (count > firstline):
a.append(float(row [0]))
b.append(float(row [1]))
#c.append(float(row [2]))
#d.append(float(row [3]))
#e.append(float(row [4]))
count += 1
f = plt.figure()
f.set_figwidth(12)
f.set_figheight(8)
#plt.plot(X-Achse, Y-Achse, linewidth=* , color = ' ', label = " ")
plt.plot(a, b, linewidth=0.35, color = 'b', label = "Sensor 1")
#plt.plot(a, c, linewidth=0.35, color = 'g', label = "Sensor 2")
plt.title('Pressure Report', fontsize = 20)
plt.xlabel('Time(s)')
plt.ylabel('Pressure(bar)')
#plt.axis([x_min, x_max, y_min, y_max])
#plt.axis([350, 380, -6, 2.2])
plt.grid()
plt.legend()
plt.show()

Why first figure in the list is not plotted, but at the end there is an empty plot?

I have a problem with matplotlib.
I need to prepare a plot consisted of all plots from list in specified directory. The code below generating that, but it omits first path...
For example, if I need to prepare image consisted of 14 subplots, only 13 are copied, first is omitted and instead of first, there is an empty plot at the last position.
I have checked, that function reads all paths, including first at list.
If you will be able to help and to give me a hint, what I`m doing wrong, I will be grateful.
Best regards
def create_combo_plot(path_to_dir, list_of_png_abspath):
name = path_to_dir.replace('_out', '')
title = name
if name.find('/') != -1:
title = name.split('/')[-1]
list_of_png_abspath
how_many_figures = len(list_)
combo_figure = plt.figure(2, figsize=(100,100))
a = 4
b = int(floor(how_many_figures/4.1)) + 1
for i, l in enumerate(list_of_png_abspath):
print l #I`ve checked, path is reached
j = i + 1
img=mpimg.imread(l)
imgplot = plt.imshow(img, interpolation="nearest")
plot = plt.subplot(b, a, j)
combo_figure.suptitle(title, fontsize=100)
combo_figure.savefig(path_to_dir +'/' + title + '.jpeg')
plt.close(combo_figure)
Replace these two lines:
imgplot = plt.imshow(img, interpolation="nearest")
plot = plt.subplot(b, a, j)
with these:
sub = plt.subplot(b, a, j)
sub.imshow(img, interpolation="nearest")
The line:
imgplot = plt.imshow(img, interpolation="nearest")
adds a new plot to the last active subplot. In your case it was created in the previous loop here:
plot = plt.subplot(b, a, j)
Therefore, you start with the second image and the last subplot stays empty.
But if you create the subplot first:
sub = plt.subplot(b, a, j)
and later explicitly plot into it:
sub.imshow(img, interpolation="nearest")
you should see 14 plots.

Python - Outputting two data sets (lists?) to data file as two columns

I am very novice when it comes to python. I have done most of my programming in C++. I have a program which generates the fast Fourier transform of a data set and plots both the data and the FFT in two windows using matplotlib. Instead of plotting, I want to output the data to a file. This would be a simple task for me in C++, but I can't seem to figure this out in python. So the question is, "how can I output powerx and powery to a data file in which both data sets are in separate columns? Below is the program:
import matplotlib.pyplot as plt
from fft import fft
from fft import fft_power
from numpy import array
import math
import time
# data downloaded from ftp://ftp.cmdl.noaa.gov/ccg/co2/trends/co2_mm_mlo.txt
print ' C02 Data from Mauna Loa'
data_file_name = 'co2_mm_mlo.txt'
file = open(data_file_name, 'r')
lines = file.readlines()
file.close()
print ' read', len(lines), 'lines from', data_file_name
window = False
yinput = []
xinput = []
for line in lines :
if line[0] != '#' :
try:
words = line.split()
xval = float(words[2])
yval = float( words[4] )
yinput.append( yval )
xinput.append( xval )
except ValueError :
print 'bad data:',line
N = len(yinput)
log2N = math.log(N, 2)
if log2N - int(log2N) > 0.0 :
print 'Padding with zeros!'
pads = [300.0] * (pow(2, int(log2N)+1) - N)
yinput = yinput + pads
N = len(yinput)
print 'Padded : '
print len(yinput)
# Apply a window to reduce ringing from the 2^n cutoff
if window :
for iy in xrange(len(yinput)) :
yinput[iy] = yinput[iy] * (0.5 - 0.5 * math.cos(2*math.pi*iy/float(N-1)))
y = array( yinput )
x = array([ float(i) for i in xrange(len(y)) ] )
Y = fft(y)
powery = fft_power(Y)
powerx = array([ float(i) for i in xrange(len(powery)) ] )
Yre = [math.sqrt(Y[i].real**2+Y[i].imag**2) for i in xrange(len(Y))]
plt.subplot(2, 1, 1)
plt.plot( x, y )
ax = plt.subplot(2, 1, 2)
p1, = plt.plot( powerx, powery )
p2, = plt.plot( x, Yre )
ax.legend( [p1, p2], ["Power", "Magnitude"] )
plt.yscale('log')
plt.show()
You can use a csv.writer() to achieve this task, here is the reference: https://docs.python.org/2.6/library/csv.html
Basic usage:
zip you lists into rows:
rows=zip(powery,powerx)
Use a csv writer to write the data to a csv file:
with open('test.csv', 'wb') as f:
writer = csv.writer(f)
for row in rows:
writer.writerow(row)
Depending on what you want to use the file for, I'd suggest either the csv module or the json module.
Writing the file as CSV data will give you the ability to open it with a spreadsheet, graph it, edit it, etc.
Writing the file as JSON data will give you the ability to quickly import it into other programming languages, and to inspect it (generally read-only -- if you want to do serious editing, go with CSV).
This is how you can write data from two different lists into text file in two column.
# Two random lists
index = [1, 2, 3, 4, 5]
value = [4.5, 5, 7.0, 11, 15.7]
# Opening file for output
file_name = "output.txt"
fwm = open(file_name, 'w')
# Writing data in file
for i in range(len(index)):
fwm.write(str(index[i])+"\t")
fwm.write(str(value[i])+"\n")
# Closing file after writing
fwm.close()
if your list contain data in the form of string then remove 'str' while writing data in file.
If you want to save data in csv file change
fwm.write(str(index[i])+"\t")
WITH
fwm.write(str(index[i])+",")

Turning a raggedly filled array into heatmap in python

I am working with python and trying to implement a heatmap. I have data that is being stored into a 2d array. the array is roughly array[1000][1000] but sometimes the xvalues are greater than 1000.
I am looking to make a heatmap of this array, and I cannot figure out how to do it. I have no idea how to get my data to conform since it is a ragged array not a standard size.
Here is a rough copy of my code I have been attempting
sum_data = None
#count = 0
#for in_file in file_values:
print "summing file: data"
data = []
with open('data', 'r') as in_data:
for line in in_data:
line_no_end= line.rstrip('\n')
list_container = []
list_container = line_no_end.split(",")
#print "eachRow: %s" % list_container
data.append(list_container)
if sum_data == None:
sum_data = data
else:
sum_data = [[int(sum_data[y][x]) + int(data[y][x]) for x in range(len(data[y]))] for y in range(len(data))] #makes 2d array from file
and then I was trying to get it to conform to this heatmap example
heatmap, xedges, yedges = np.histogram2d(x,y,bins = 50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.imshow(heatmap,extent=extent)
plt.show()
plt.savefig('output.png')

Can't save all matlibplots to pdf - missing some graphs (IPython)

I have a script that I run in IPython, and basically takes an input .csv file of gene_names and pushes them into this for loop, where
with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
for row in reader:
gene_name = row
probe_exclusion_keyword = []
print gene_name
The gene_name values from this list(in the .csv file) is then fed into a line, if inference_method == "approximate_random": (in Scripts.py)
with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
for row in reader:
gene_name = row
probe_exclusion_keyword = []
print gene_name
print "Fetching probe ids for gene %s" % gene_name
probes_dict = get_probes_from_genes(gene_name)
print "Found %s probes: %s" % (len(probes_dict), ", ".join(probes_dict.values()))
if probe_exclusion_keyword:
probes_dict = {probe_id: probe_name for (probe_id, probe_name) in probes_dict.iteritems() if not args.probe_exclusion_keyword in probe_name}
print "Probes after applying exclusion cryterion: %s" % (", ".join(probes_dict.values()))
print "Fetching expression values for probes %s" % (", ".join(probes_dict.values()))
expression_values, well_ids, donor_names = get_expression_values_from_probe_ids(
probes_dict.keys())
print "Found data from %s wells sampled across %s donors" % (len(well_ids), len(set(donor_names)))
print "Combining information from selected probes"
combined_expression_values = combine_expression_values(
expression_values, method=probes_reduction_method)
print "Translating locations of the wells to MNI space"
mni_coordinates = get_mni_coordinates_from_wells(well_ids)
print "Checking values of the provided NIFTI file at well locations"
nifti_values = get_values_at_locations(
stat_map, mni_coordinates, mask_file=mask, radius=radius, verbose=True)
# preparing the data frame
names = ["NIFTI values", "%s expression" % gene_name, "donor ID"]
data = pd.DataFrame(np.array(
[nifti_values, combined_expression_values, donor_names]).T, columns=names)
data = data.convert_objects(convert_numeric=True)
len_before = len(data)
data.dropna(axis=0, inplace=True)
nans = len_before - len(data)
if nans > 0:
print "%s wells fall outside of the mask" % nans
if inference_method == "fixed":
print "Performing fixed effect analysis"
fixed_effects(data, ["NIFTI values", "%s expression" % gene_name])
**if inference_method == "approximate_random":**
print "Performing approximate random effect analysis"
approximate_random_effects(
data, ["NIFTI values", "%s expression" % gene_name], "donor ID")
print "poopy"
pdf.savefig()
plt.ion() #should i add ion() here?
if inference_method == "bayesian_random":
print "Fitting Bayesian hierarchical model"
bayesian_random_effects(
data, ["NIFTI values", "%s expression" % gene_name], "donor ID", n_samples, n_burnin)
# if __name__ == '__main__': #What exactly does this do? Start trigger for the script to run?
# main()
that triggers approximate_random_effects(in Analysis.py) to plot two graphs, the violinplot and the lmplot:
def approximate_random_effects(data, labels, group):
correlation_per_donor = {}
for donor_id in set(data[group]):
correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
list(data[labels[1]][data[group] == donor_id]))
average_slope = np.array(correlation_per_donor.values()).mean()
t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
plt.axhline(0, color="red")
sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
plt.ion()
return average_slope, t, p_val
I'm trying to save both graphs for all the gene_names into a pdf file, by roughly following "Saving multiple figures to one pdf file in matplotlib" and the matplotlib.PdfPages approach.
However, in the pdf file, I am only getting the lmplot for all my gene_names and NOT the violin plot. What do I do to fix this?
Thanks! Help will be much appreciated!
It looks like your code is creating two figures, one for each plots, but you only call pdf.savefig() once after the second figure is created, therefore only saving the second figure.
If you want one figure per page in your pdf, you need to call pdf.savefig() twice: once after creating each plot.
I would recommend that you change the structure of your program a bit, so you can save the pdf after each plot:
def approximate_random_effects(data, labels, group):
correlation_per_donor = {}
for donor_id in set(data[group]):
correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
list(data[labels[1]][data[group] == donor_id]))
average_slope = np.array(correlation_per_donor.values()).mean()
t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
with PdfPages('poopyheadjoe04.pdf') as pdf:
fig = plt.figure()
sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
plt.axhline(0, color="red")
pdf.savefig(fig) ## Saving first figure
fig = plt.figure()
sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
pdf.savefig(fig) ## Saving second figure
return average_slope, t, p_val
You then need to delete in your main program the lines with PdfPages('poopyheadjoe04.pdf') as pdf:, pdf.savefig() and plt.ion().
If you need the two plots on the same pdf page, you need change the violinplot and lmplot in such a way that they use different axes on the same figure.

Categories