Plot multiple csv files with Python/matplotlib loop - python

I have a directory filled with multiple .csv files, each only has two columns (date and an integer). I am trying to get this code to loop over each file and plot them individually so that there is a corresponding .png to each .csv. Every time it runs, I end up with the correct number of .png files, but each has exactly the same data. I have already inplemented the plt.clf() method to clear it for each loop, but it doesn't work. Here is the code:
import numpy as np
import pylab as pl
import matplotlib.pyplot as plt
import datetime as DT
import matplotlib.dates as mdates
import scipy
import os
import glob
rootdir='/path/to/file'
for infile in glob.glob( os.rootdir.join(rootdir, '*.csv.out') ):
output = infile + '.out'
data= np.loadtxt(infile, delimiter=',',
dtype={'names': ('date', 'session'),'formats': ('S10', 'i4')} )
#Organizes 2-column spreadsheet
dates, sessions = map(list, zip(*data))
print dates, sessions
x = [DT.datetime.strptime(date,"%m-%d-%y") for date in dates]
y = [sessions]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.xaxis_date()
ax.grid()
#Fills space under plotted line
ax.fill_between(x, sessions, color='blue')
# slants the x axis
fig.autofmt_xdate()
plt.plot(x,sessions)
plt.xlabel('Date')
plt.ylabel('Sessions')
plt.title('Peak Usage')
fileNameTemplate = r'\path\to\file\Plot{}.png'
for subdir,dirs,files in os.walk(rootdir):
for count, file in enumerate(files):
pl.savefig(fileNameTemplate.format(count), format='png')
pl.clf()
I modeled the enumerator after a solution in this answer but I am still getting an issue.

You need to:
define a function for your plots
call that function from your loop
include plt.close() at the end of said function.
Right now, you're not creating new plots as you walk the directory. The plot command needs to be inside the loop.
def plot():
#do your plotting in here. If this is being called from a loop and the
#variables used herein are defined before, it will use the
#global values as they exist at the time. You can also end this function with
fig.savefig(**args)
plt.close()
for count, file in enumerate(files):
plot()

Related

Matplotlib: Generating Subplots for Multiple Time Series

I have the following dataset that was randomly generated through a simulation I am building:
https://drive.google.com/drive/folders/1JF5QrliE9s8VPMaGc8Z-mwpFhNWkeYtk?usp=sharing
For debugging purposes, I would like to be able to view this data in a series of small multiples. Like this:
I am attempting to do this using matplotlib and pandas. Here is my code for that:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
def graph_file(f: str):
"""
Graphs a single file of data
and exports it as a pdf of separate charts.
"""
data = pd.read_csv(f)
header = data.columns
fname = f[:-4] + '.pdf'
with PdfPages(fname) as pdf:
n = len(header)
time: str = header[0]
# Multiple charts on one page
fig = plt.figure()
for i in range(1, n):
y: str = header[i]
ax = fig.add_subplot()
data.plot(x=time, y=y)
pdf.savefig(bbox_inches='tight')
When I open up the .csv file and try to run the function using a Jupyter notebook, I get the same deprecation warning over and over again:
<ipython-input-5-0563709f3c08>:24: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
ax = fig.add_subplot()
The resulting pdf file does not contain a single page with multiple graphs (which is what I want like in the first image) but just a single page with a single graph:
What exactly am I doing wrong? I greatly appreciate any feedback you can give.
Here is a solution that should meet your needs. It reads the csv file into a dataframe and iterates through the columns of the dataframe to plot corresponding subplots.
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
def graph_file(f: str):
df = pd.read_csv(f)
fig, axs = plt.subplots(nrows=3, ncols=3)
fig.set_size_inches(20, 10)
fig.subplots_adjust(wspace=0.5)
fig.subplots_adjust(hspace=0.5)
fname = f[:-4] + '.pdf'
with PdfPages(fname) as pdf:
for col, ax in zip(df.columns[1:], axs.flatten()):
ax.plot(df['time (days)'], df[col])
ax.set(xlabel='time (days)', ylabel=col)
ax.tick_params(axis='x', labelrotation=30)
pdf.savefig(bbox_inches='tight')
plt.show()

Multiple files, multiple plots saved to a multipage, single pdf file

I am working with >100 csv files while I am opening and plotting in a loop. My aim is to save each plot on a pdf page and generate a big pdf file with each page containing plot from a single file. I am looking at these examples - (1) and (2). Trying out combinations using matplotlib.backends.backend_pdf I am unable to get the required result.
Here I re-create my code and the approach I am using:
pdf = PdfPages('alltogther.pdf')
fig, ax = plt.subplots(figsize=(20,10))
for file in glob.glob('path*'):
df_in=pd.read_csv(file)
df_d = df_in.resample('d')
df_m = df_in.resample('m')
y1=df_d['column1']
y2=df_m['column2']
plt.plot(y1,linewidth='2.5')
plt.plot(y2,linewidth='2.5')
pdf.savefig(fig)
With this all the plots are getting superimposed on the same figure and the pdf generated is empty.
You need to move the line
fig, ax = plt.subplots(figsize=(20,10))
Inside the loop, otherwise each iteration will use the same figure instance instead of a new instance. Also note that you need to close the pdf when you are done with it. So the code should be
pdf = PdfPages('alltogther.pdf')
for file in glob.glob('path*'):
fig, ax = plt.subplots(figsize=(20,10))
df_in=pd.read_csv(file)
df_d = df_in.resample('d')
df_m = df_in.resample('m')
y1=df_d['column1']
y2=df_m['column2']
plt.plot(y1,linewidth='2.5')
plt.plot(y2,linewidth='2.5')
pdf.savefig(fig)
pdf.close()
Edit
Complete, self-contained example:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
pdf = PdfPages('out.pdf')
for i in range(5):
fig, ax = plt.subplots(figsize=(20, 10))
plt.plot(np.random.random(10), linestyle=None, marker='.')
pdf.savefig(fig)
pdf.close()

How to import and plot every file in folder using Python and Matplotlib

I have a code that reads a .csv file from specified folder and it generates .png with plotted chart. How can I write loop which would read all files from folder one by one and for each plot their chart (.png) respectively.
import os
import sys
import numpy as np
import datetime
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
# csv header
time_format = '%Y-%m-%d %H:%M:%S'
col_names = ["first_action_time","stable", "smooth", "sbase", "prebase", "leastsquares","uplift","base"]
dtypes = ["object", "uint8", "uint8", "uint8", "uint8", "uint8", "uint8", "uint8"]
# read from csv
data = np.genfromtxt('D:\python34\\data_2016-10-09 08_26_28.csv',skip_header=1,usecols = (0,1,2,3,4,5,6,7), names=col_names, delimiter=';', dtype=dtypes)
# x-axis datetimeformat
x = [datetime.strptime(x.decode("utf-8"), time_format) for x in data['first_action_time']]
datemin=min(x)
datemax=max(x)
#plt.title(importame)
fig = plt.figure(figsize=(40,8))
ax = plt.axes()
ax.set_xlim(datemin, datemax)
plt.plot(x,data['stable'],color='purple',label='stable')
plt.plot(x,data['smooth'],color='green',label='smooth')
plt.plot(x,data['sbase'],color='orange',label='sbase')
#plt.plot(x,data['prebase'],color='yellow',label='prebase')
#plt.plot(x,data['leastsquares'],color='red',label='leastsquares')
plt.plot(x,data['uplift'],color='blue',label='uplift',linestyle='dotted')
plt.plot(x,np.array(data['base']),color='red',label='base',linestyle='dashed')
plt.legend()
fig.autofmt_xdate()
plt.savefig('D:\python34\\test.png')
The general and most simple case would be to create the figures in a loop and save them. The only important thing to mind is that the previous figure should be closed before creating a new one.
import matplotlib.pyplot as plt
import numpy as np
import glob
#create list of files
files = glob.glob("*.csv")
#loop over list
for f in files:
# read in data
data = np.genfromtxt(f)
#close previous figure, if one exists
plt.close()
#create new figure and do plotting
fig = plt.figure()
ax = plt.subplot(111)
ax.plot(data)
#save figure
plt.savefig(f[:-4]+".png")
See also
https://stackoverflow.com/a/16368570/4124317
Matplotlib and Pyplot.close() not releasing memory? - backend related Qt4Agg
How to speed up matplotlib when plotting and saving lots of figures?
for the case that plotting is too slow or consumes too much memory.

Extract header names from a CSV and use it to plot against each other in Python?

I am pretty new to python and coding in general. I have this code so far.
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('data.csv', delimiter=',', skiprows=1)
mSec = data[:,0]
Airspeed = data[:,10]
AS_Cmd = data[:,25]
airspeed = data[:,3]
plt.rc('xtick', labelsize=25) #increase xaxis tick size
plt.rc('ytick', labelsize=25) #increase yaxis tick size
fig, ax = plt.subplots(figsize=(40,40), edgecolor='b')
ax.patch.set_facecolor('white')
ax.plot(mSec, Airspeed, label='Ground speed [m/s]')
ax.plot(mSec, AS_Cmd, label='Voltage [V]')
plt.legend(loc='best',prop={'size':20})
fig.savefig('trans2.png', dpi=(200), bbox_inches='tight') #borderless on save
However, I don't want to individually read every data column there is. I want to be able to load a csv file and have it read out all column names, then asks the users what you want for your x-axis and y-axis and plots that graph. The csv file format is:
time(s),speed(mph),heading,bvoltage(v)
20,30,50,10
25,45,50,10
30,50,55,9
Here is my attempt at the code but I am missing a lot of information:
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('data.csv', delimiter=',')
## names = where I will store the column names
white True:
## display names to user
print ('Pick your x-axis')
xaxis = input()
print ('Pick your y-axis')
yaxis1 = input()
print('pick a 2nd y-axis or enter none')
yaxis2 = input()
if input()= 'none'
break;
else continue
#plot xaxis vs yaxis vs 2nd yaxis
I understand the loop is not correct. I don't want anyone to correct me on that I will figure it out myself, however, I would like a way to access those values from the CSV file so that I can use it in that method.
Using pandas you can do:
import pandas as pd
data = pd.read_csv("yourFile.csv", delimiter=",")
and plot columns with names ColName1, ColName2 against each other with:
data.plot(x='Col1', y='Col2')
If you have a first line in the csv file with the desired names of the columns, pandas will pick those automatically, otherwise you can play with the header argument of read_csv.
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
If you don't mind using/installing another module then pandas should do it.

Error messages in Matplotlib

I am trying to chart this data in matplotlib and I am getting the following error message:
raise TypeError('Unrecognized argument type %s to close'%type(arg))
TypeError: Unrecognized argument type <type 'list'> to close
The data I am sending to it is not a string, it is a float as you can see from the code below:
import os
import csv
import glob as g
import pprint as p
import matplotlib.pyplot as plt
os.chdir('F:\\')
def graphWriter():
for file in g.glob('*.TXT'):
for col in csv.DictReader(open(file,'rU')):
set_ = int(col[' Set'])
iriR = float(col[' IRI R e'])
iriL = float(col['IRI LWP '])
rutL = float(col[' RUT L e'])
rutR = float(col[' RUT R e'])
start = float(col['Start-Mi'])
end = float(col[' End-Mi'])
fig = plt.plot(iriR,iriL)
plt.show()
plt.close(fig)
graphWriter()
Though the window is coming up to chart the data and the units are correct, there is also no line in the chart, probably that's stemming from the apparent data issue. So the question is whats causing the error message, and whats causing there to be no data lines in the chart. But the two are most likely related. Here is some of the input data though I am only trying to graph the two datasets to the right side which would be iriR and iriL as show above:
(194.449, 194.549, 90.0, 77.9)
(194.549, 194.649, 84.6, 81.5)
(194.649, 194.749, 88.4, 84.1)
(194.749, 194.849, 69.5, 82.9)
(194.849, 194.949, 76.2, 71.0)
The problem is that the function plt.plot returns a list of lines (that were added to the plot), and not a Figure object --- while plt.close only accepts a Figure object. There are numerous ways to work around this,
First, get the figure object ("get current figure"):
fig = plt.gcf()
plt.close(fig)
Second, call close with no arguments: plt.close() --- this will automatically close the active figure.
Third, close all figures: plt.close('all').
All of these usages are covered in the matplotlib.pyplot.close documentation.
Edit:
The next issue is that you're not storing an array of values to your variables, instead you're just storing a single floating value. You can initialize a list, and store new elements to it.
os.chdir('F:\\')
iriR = [] # Initialize a list
def graphWriter():
for file in g.glob('*.TXT'):
for col in csv.DictReader(open(file,'rU')):
set_ = int(col[' Set'])
iriR.append(float(col[' IRI R e'])) # Append new entry
Do the same thing for the other variables that you want to plot.
Maybe this will work.
import pandas as pd
import matplotlib.pyplot as plt
import glob as g
def graphWriter():
data = {}
for file in g.glob('*.TXT'):
data[file] = pd.read_csv(file)
# Removes ')' and turn it into float
data[file][3] = data[file][3].apply(lambda x:x[:-1]).astype(float)
fig, ax = plt.subplots()
for d in data.itervalues():
ax.plot(d[:,2], d[:,3])
plt.show()
plt.close(fig)
graphWriter()
The function will obtain a list of files ended in .TXT then it will load them into a dictionary in which the keys are the names of the files. Later will plot them.
Update
Since the OP posted that pandas is unavailable, its is possible to use numpy.
import numpy as np
import matplotlib.pyplot as plt
import glob as g
def graphWriter():
data = {}
for file in g.glob('*.TXT'):
data[file] = np.fromregex(file, '\d*\.\d*',
dtype=[('1', float), ('2', float),
('3', float), ('4', float)])
fig, ax = plt.subplots()
for d in data.itervalues():
ax.plot(d['3'], d['4'])
plt.show()
plt.close(fig)
graphWriter()

Categories