Example Input Data:
I am a beginner in python. I use for loop to read several csv files look like above(all of those file are same format).
so far my code was look like below.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
ax, fig = plt.subplots(4,4,sharex=False, sharey=False, figsize=(22, 10), dpi=70, linewidth=0.5)
ax = np.array(ax)
def loop_directory(directory):
for filename in os.listdir(directory):
if filename.endswith(".csv"):
file_directory = os.path.join(directory, filename)
# print(filename)
df = pd.read_csv(file_directory)
df = df[df['Tavg'].isin([-999]) == False]
df[['Year','Month']] = df[['Year','Month']].astype(int).astype(str)
df["Year&Month"] = df[['Year', 'Month']].agg("/".join,axis=1)
df["Year&Month"] = pd.to_datetime(df["Year&Month"])
x = df["Year&Month"]
y = df["Tavg"]
for axes,col in zip(x, y):
axes.plot(df.index, df[col]) # here is the problem, i dont know how to use for loop to plot in subplots
plt.show()
if __name__ == "__main__":
loop_directory(r"C:\Users\LAB312\Desktop\vietnam\anomaly")
I've tried for ten more times but didn't work at all.
I want to know how to use those syntaxes ex. ax zip ,etc.
enter image description here
I want to plot in every subplot in one plot.
it should have plot every ax.
Firstly, you have your fig and ax reversed in your call to plt.subplots, it should be:
fig, ax = plt.subplots(4,4,sharex=False, sharey=False, figsize=(22, 10), dpi=70, linewidth=0.5)
You can then access each set of axes to call plot by indexing. You can index the 4 by 4 numpy array to get each axes set in your 4 by 4 grid of plots. i.e. ax[0, 0].plot(...), ax[0, 1].plot(...), etc. up to ax[3, 3].plot(...)
Your question needs a bit more information to clarify how you want the data plotted though! I can see you combine the first two columns so that you have 4 columns, but consider how do you want each sample to be plotted.
EDIT: As you want to plot your files sequentially in ax[0, 0], ax[0, 1], etc., you can flatten the 2D numpy array of axes to get a 1D iterable that you can loop through or index with one value. I don't have your files so I can't test it but here's some demo code that should give you an idea of what to do.
As #sam mentioned in the comments, you should seperate your csv collection logic and your plotting logic.
def loop_directory(directory):
# Get all files, filter for '.csv' and prepend dir path
files = os.listdir(directory)
csvs = [os.path.join(directory, f) for f in files if f.endswith('.csv')]
return csvs
def plot_csvs(csvs):
fig, ax = plt.subplots(4, 4, sharex=False, sharey=False, figsize=(22, 10), dpi=70, linewidth=0.5)
ax = np.array(ax).flatten() # Flatten to 1D, [0 ,0], [0, 1], etc
# This assumes number of subplots >= number of CSVs
for i, filename in enumerate(csvs):
df = pd.read_csv(filename)
# Do your processing here
x = df["Year&Month"]
y = df["Tavg"]
ax[i].plot(x, y)
plt.show()
csv_dir = '/path/to/csv/dir'
csv_paths = loop_directory(csv_dir)
plot_csvs(csv_paths)
Related
Suppose I have 3 directories of .jpg files: dataset 1, dataset 2, dataset 3.
I would like to make a 5 by 3 subplots using matplotlib. For each row, the subplot shows the data from dataset 1, dataset 2 and dataset 3 in order. The expected format is like this:
plot1, plot2, plot3,
plot4.......
plot13, plot14, plot15.
How should I do that?
something like this:
plt.figure(figsize=(10, 10))
for data1, data2, data3 in dataset1, dataset2, dataset3"
....
This example uses Path(...).glob() from pathlib to find all of the image paths in each directory, and unpack them in a list comprehension.
matplotlib.pyplot.imread and matplotlib.axes.Axes.imshow are used to read and show the images, respectively.
import matplotlib.pyplot as plt
from pathlib import Path
# create a list of directories
dirs = ['../Pictures/dataset1', '../Pictures/dataset2', '../Pictures/dataset3']
# extract the image paths into a list
files = [f for dir_ in dirs for f in list(Path(dir_).glob('*.jpg'))]
# create the figure
fig, axs = plt.subplots(nrows=5, ncols=3, figsize=(10, 10))
# flatten the axis into a 1-d array to make it easier to access each axes
axs = axs.flatten()
# iterate through and enumerate the files, use i to index the axes
for i, file in enumerate(files):
# read the image in
pic = plt.imread(file)
# add the image to the axes
axs[i].imshow(pic)
# add an axes title; .stem is a pathlib method to get the filename
axs[i].set(title=file.stem)
# add a figure title
fig.suptitle('Images from https://www.heroforge.com/', fontsize=18)
I'm trying to create scatter plot from several txt files. All files have the same structure: two columns with data and 'comma' as a separator:
54.1,12
65.7,11
122.2,18
etc
For small number of files i have this code:
import numpy as np
import matplotlib.pyplot as plt
import csv
# Create data
g1=np.loadtxt('214.txt',delimiter=',', unpack=True)
g2=np.loadtxt('228.txt',delimiter=',', unpack=True)
g3=np.loadtxt('491.txt',delimiter=',', unpack=True)
g4=np.loadtxt('647.txt',delimiter=',', unpack=True)
data = (g1, g2, g3,g4)
colors = ("red", "green", "blue", "black")
groups = ("214", "228", "491", "647")
# Create plot
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
for data, color, group in zip(data, colors, groups):
y, x = data
ax.scatter(x, y, alpha=0.8, c=color, edgecolors='none', s=30, label=group)
#Plot settings
plt.title('Matplot scatter plot')
plt.legend(loc=4)
axes = plt.gca()
axes.set_xlim([2,30])
axes.set_ylim([0,3000])
plt.gca().invert_yaxis()
plt.show()
Please advise how to modify it to read multiple (up to 50 - 100) txt files in folder, if number of them is different every time ?
I would search for all files in your current directory and identify which you want to extract data from. This can be done with something like:
from os import listdir, path
files = [f for f in listdir('.') if path.isfile(f)]
file_names = [file for file in files if file.startswith('file_name_identifer')]
This will give you a list of file names which contain the data you're wanting to extract, you can then just load them one by one in a for loop. Using similar loading techniques to what you've used above:
data = []
for file in file_names:
data.append(np.loadtxt('file', delimiter=',', unpack=True))
You could flatten this to a generator expression too:
data = [np.loadtxt('file', delimiter=',', unpack=True) for file in file_names]
If your files don't start with something which can be used to identify them, you can simply check some other way instead (change if file.startswith('file_name_indentifer') to something else which maybe checks if they're .txt files for instance: if file.endswith('.txt')).
You can get a list of all files in directory using method described in this post
And then do something like this:
data = []
for file in filenames:
data.append(np.loadtxt(file, delimiter=‘,’, unpack = True
#And do everything else you did with data
Though if your dataset is larger then available space in system memory I would consider adding datapoints to plot as you read the files
data = []
colors = [“red”,”green”,”blue”,”balck”]
for i, file in enumerate(filenames):
data = np.loadtxt(file, delimiter=‘,’,unpack=True)
group = file.split(‘.’)[0]
color = colors[i%len(colors)]
ax.scatter(data[0], data[1], alpha=0.8, c=color, edgecolors=‘none’, s=30, label=group)
P.S. quotes are typed wrong (both double and single ones) as I’m writing from a mobile device
Thanks for help. Here is what worked for me:
import numpy as np
import matplotlib.pyplot as plt
from os import listdir, path
import logging, sys
import random
data = []
#Get files with extension ".txt")
files = [f for f in listdir('.') if path.isfile(f)]
file_names = [file for file in files if file.endswith('.txt')]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
# Create plot
for file in file_names:
data=np.loadtxt(file, delimiter=",", unpack = True)
color = ["#"+''.join([random.choice('0123456789ABCDEF')for j in range(6)])]
ax.scatter(data[1], data[0], alpha=0.8, c=color, edgecolors="none", s=30, label=file)
#Plot settings
plt.title('Matplot scatter plot')
plt.legend(loc=4)
axes = plt.gca()
plt.gca().invert_yaxis()
plt.show()
I have two folders with similar number of files: maindirNo and maindirWith. I'm trying to plot each pair of similar files from folders on one plot:
for i in [maindirNo, maindirWith]:
for root, dirs, files in os.walk(i):
for fil in files:
if 'output.rsv' in fil:
df = pd.read_csv(os.path.join(i, fil), skiprows = 9, delimiter = r'\s+', header = None)
df['SIMULATEDm'] = mergedlevels
df['OBSERVEDm'] = df_observed['OBSERVEDm']
df['date'] = pd.date_range('1/1991','12/2040', freq='MS')
if i == maindirNo:
plt.plot(df['date'], df['SIMULATEDm'], 'b', label='No outlet')
if i == maindirWith:
plt.plot(df['date'], df['SIMULATEDm'], 'r', label='With outlet')
plt.legend(loc = 'lower right')
plt.savefig('C:/Users/sgulbin/Desktop/AGU_Conf/plots/%s.jpg' %fil)
plt.close()
The problem is that I either have all datesets plotted on one plot, or one plot for each file (I need two datasets on one plot). I assume I can append output to an empty dataframe and then plot it, but is there a simplest way to plot them through the loop?
P.S. I know there are kind of similar questions to this, but not exactly.
pandas uses matplotlib which gives fig and ax when you create many plots. ie. 5 plots in one column
fig, ax = plt.subplots(5, 1)
and then you can use ax[0], a[1] to choose plot for drawed line.
import matplotlib.pyplot as plt
import pandas as pd
import random
SIZE = 5
# create grid 5x1
fig, ax = plt.subplots(SIZE, 1)
# --- first folder --- blue ---
for idx in range(SIZE):
# dataframe with random data as example
df = pd.DataFrame([ random.randint(0,10) for _ in range(10) ])
# draw it
ax[idx].plot(df, 'b')
# --- second folder --- red ---
for idx in range(SIZE):
# dataframe with random data as example
df = pd.DataFrame([ random.randint(0,10) for _ in range(10) ])
# draw it
ax[idx].plot(df, 'r')
plt.show()
I have a more than 1000 .csv files (data_1.csv......data1000.csv), each containing X and Y values!
x1 y1 x2 y2
5.0 60 5.5 500
6.0 70 6.5 600
7.0 80 7.5 700
8.0 90 8.5 800
9.0 100 9.5 900
I have made a subplot program in python which can give two plots (plot1 - X1vsY1, Plot2 - X2vsY2) at a time using one file.
I need help in looping all the files, (open a file, read it, plot it, pick another file, open it, read it, plot it, ... until all the files in a folder get plotted)
I have the following code:
import pandas as pd
import matplotlib.pyplot as plt
df1=pd.read_csv("data_csv",header=1,sep=',')
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(df1.iloc[:,[1]],df1.iloc[:,[2]])
plt.subplot(2, 1, 2)
plt.plot(df1.iloc[:,[3]],df1.iloc[:,[4]])
plt.show()
How can this be accomplished more efficiently?
You can generate a list of filenames using glob and then plot them in a for loop.
import glob
import pandas as pd
import matplotlib.pyplot as plt
files = glob.glob(# file pattern something like '*.csv')
for file in files:
df1=pd.read_csv(file,header=1,sep=',')
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(df1.iloc[:,[1]],df1.iloc[:,[2]])
plt.subplot(2, 1, 2)
plt.plot(df1.iloc[:,[3]],df1.iloc[:,[4]])
plt.show() # this wil stop the loop until you close the plot
I used NetCDF(.nc) just in case anyone is interested in using NetCDF data. Also, you could replace it with .txt too, the idea is the same. I used this for a contour plot loop.
path_to_folder='#type the path to the files'
count=0
fig = plt.figure(figsize=(10,5))
files = []
for i in os.listdir(path_to_folder):
if i.endswith('.nc'):
count=count+1
files.append(open(i))
data=xr.open_dataset(i)
prec=data['tp']
plt.subplot(1, 2, count) # change 1 and 2 to the shape you want
prec.groupby('time.month').mean(dim=('time','longitude')).T.plot.contourf(cmap='Purples') *#this is to plot contour plot but u can replace with any plot command
print(files)
plt.savefig('try,png',dpi=500,orientation='landscape',format='png')
Here is the basic setup for what am using here at work. This code will plot the data from each file and through each file separately. This will work on any number of files as long as column names remain the same. Just direct it to the proper folder.
import os
import csv
def graphWriterIRIandRut():
m = 0
List1 = []
List2 = []
List3 = []
List4 = []
fileList = []
for file in os.listdir(os.getcwd()):
fileList.append(file)
while m < len(fileList):
for col in csv.DictReader(open(fileList[m],'rU')):
List1.append(col['Col 1 Name'])
List2.append(col['Col 2 Name'])
List3.append(col['Col 3 Name'])
List4.append(col['Col 4 Name'])
plt.subplot(2, 1, 1)
plt.grid(True)
colors = np.random.rand(n)
plt.plot(List1,List2,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.subplot(2, 1, 2)
plt.grid(True)
colors = np.random.rand(n)
plt.plot(List1,List3,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
m = m + 1
continue
plt.show()
plt.gcf().clear()
plt.close('all')
# plotting all the file data and saving the plots
import os
import csv
import matplotlib.pyplot as plt
def graphWriterIRIandRut():
m = 0
List1 = []
List2 = []
List3 = []
List4 = []
fileList = []
for file in os.listdir(os.getcwd()):
fileList.append(file)
while m < len(fileList):
for col in csv.DictReader(open(fileList[m],'rU')):
List1.append(col['x1'])
List2.append(col['y1'])
List3.append(col['x2'])
List4.append(col['y2'])
plt.subplot(2, 1, 1)
plt.grid(True)
# colors = np.random.rand(2)
plt.plot(List1,List2,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.subplot(2, 1, 2)
plt.grid(True)
# colors = np.random.rand(2)
plt.plot(List1,List3,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
m = m + 1
continue
plt.show()
plt.gcf().clear()
plt.close('all')
What we want to do is for each iteration, or file, create a new empty list. So for each iteration the data will be plotted, but once that data has been plotted a new empty list will be created, and plotted. Once all the data from each file has been plotted, then you want to finally to plt.show() which will show all the plots together. Here is a link to a similar problem I was having: Traceback lines on plot of multiple files. Goog luck!
import csv
import matplotlib.pyplot as plt
def graphWriter():
for file in os.listdir(os.getcwd()):
List1 = []
List2 = []
List3 = []
List4 = []
with open(filename, 'r') as file:
for col in csv.DictReader(file):
List1.append(col['x1'])
List2.append(col['y1'])
List3.append(col['x2'])
List4.append(col['y2'])
plt.subplot(2, 1, 1)
plt.grid(True)
colors = np.random.rand(2)
plt.plot(List1,List2,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.subplot(2, 1, 2)
plt.grid(True)
colors = np.random.rand(2)
plt.plot(List1,List3,c=colors)
plt.tick_params(axis='both', which='major', labelsize=8)
plt.show()
plt.gcf().clear()
plt.close('all')
If for some reason #Neill Herbst answer didnt work as expected (i consider the easiest way) I run with a problem reading the files I rearrenged the code that worked for me
import glob
import pandas as pd
import matplotlib.pyplot as plt
os.chdir(r'path')
for file in glob.glob("*.csv")::
df1=pd.read_csv(file,header=1,sep=',')
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(df1.iloc[:,[1]],df1.iloc[:,[2]])
plt.subplot(2, 1, 2)
plt.plot(df1.iloc[:,[3]],df1.iloc[:,[4]])
plt.show() # plot one csv when you close it, plots next one
#plt.show <------ if u want to see all the plots in different windows
Using p = Path(...): p → WindowsPath('so_data/files')
files = p.rglob(...) yields all files matching the pattern
file[0] → WindowsPath('so_data/files/data_1.csv')
p.parent / 'plots' / f'{file.stem}.png' → WindowsPath('so_data/plots/data_1.png')
p.parent → WindowsPath('so_data')
file.stem → data_1
This assumes all directories exist. Directory creation / checking is not included.
This example uses pandas, as does the OP.
Plotted with pandas.DataFrame.plot, which uses matplotlib as the default backend.
Use .iloc to specify the columns, and then x=0 will always be the x-axis data, based on the given example data.
Tested in python 3.8.11, pandas 1.3.2, matplotlib 3.4.3
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
p = Path('so_data/files') # specify the path to the files
files = p.rglob('data_*.csv') # generator for all files based on rglob pattern
for file in files:
df = pd.read_csv(file, header=0, sep=',') # specify header row and separator as needed
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(7, 5))
df.iloc[:, [0, 1]].plot(x=0, ax=ax1) # plot 1st x/y pair; assumes x data is at position 0
df.iloc[:, [2, 3]].plot(x=0, ax=ax2) # plot 2nd x/y pair; assumes x data is at position 0
fig.savefig(p.parent / 'plots' / f'{file.stem}.png')
plt.close(fig) # close each figure, otherwise they stay in memory
Sample Data
This is for testing the plotting code
Create a so_data/files directory manually.
df = pd.DataFrame({'x1': [5.0, 6.0, 7.0, 8.0, 9.0], 'y1': [60, 70, 80, 90, 100], 'x2': [5.5, 6.5, 7.5, 8.5, 9.5], 'y2': [500, 600, 700, 800, 900]})
for x in range(1, 1001):
df.to_csv(f'so_data/files/data_{x}.csv', index=False)
Alternate Answer
This answer addresses cases where there are many consecutive pairs of x/y columns
df.column creates an array of columns, that can be chunked into pairs
For consecutive column pairs, this answer works
list(zip(*[iter(df.columns)]*2)) → [('x1', 'y1'), ('x2', 'y2')]
If necessary, use some other pattern to create pairs of columns
Use .loc, since there will be column names, instead of .iloc for column indices.
p = Path('so_data/files')
files = p.rglob('data_*.csv')
for file in files:
df = pd.read_csv(file, header=0, sep=',')
col_pair = list(zip(*[iter(df.columns)]*2)) # extract column pairs
fig, axes = plt.subplots(len(col_pair), 1) # a number of subplots based on number of col_pairs
axes = axes.ravel() # flatten the axes if necessary
for cols, ax in zip(col_pair, axes):
df.loc[:, cols].plot(x=0, ax=ax) # assumes x data is at position 0
fig.savefig(p.parent / 'plots' / f'{file.stem}.png')
plt.close(fig)
I want to create a function which plot on screen a set of figures in a single window. By now I write this code:
import pylab as pl
def plot_figures(figures):
"""Plot a dictionary of figures.
Parameters
----------
figures : <title, figure> dictionary
"""
for title in figures:
pl.figure()
pl.imshow(figures[title])
pl.gray()
pl.title(title)
pl.axis('off')
It works perfectly but I would like to have the option for plotting all the figures in single window. And this code doesn't. I read something about subplot but it looks quite tricky.
You can define a function based on the subplots command (note the s at the end, different from the subplot command pointed by urinieto) of matplotlib.pyplot.
Below is an example of such a function, based on yours, allowing to plot multiples axes in a figure. You can define the number of rows and columns you want in the figure layout.
def plot_figures(figures, nrows = 1, ncols=1):
"""Plot a dictionary of figures.
Parameters
----------
figures : <title, figure> dictionary
ncols : number of columns of subplots wanted in the display
nrows : number of rows of subplots wanted in the figure
"""
fig, axeslist = plt.subplots(ncols=ncols, nrows=nrows)
for ind,title in enumerate(figures):
axeslist.ravel()[ind].imshow(figures[title], cmap=plt.gray())
axeslist.ravel()[ind].set_title(title)
axeslist.ravel()[ind].set_axis_off()
plt.tight_layout() # optional
Basically, the function creates a number of axes in the figures, according to the number of rows (nrows) and columns (ncols) you want, and then iterates over the list of axis to plot your images and adds the title for each of them.
Note that if you only have one image in your dictionary, your previous syntax plot_figures(figures) will work since nrows and ncols are set to 1 by default.
An example of what you can obtain:
import matplotlib.pyplot as plt
import numpy as np
# generation of a dictionary of (title, images)
number_of_im = 6
figures = {'im'+str(i): np.random.randn(100, 100) for i in range(number_of_im)}
# plot of the images in a figure, with 2 rows and 3 columns
plot_figures(figures, 2, 3)
You should use subplot.
In your case, it would be something like this (if you want them one on top of the other):
fig = pl.figure(1)
k = 1
for title in figures:
ax = fig.add_subplot(len(figures),1,k)
ax.imshow(figures[title])
ax.gray()
ax.title(title)
ax.axis('off')
k += 1
Check out the documentation for other options.
If you want to group multiple figures in one window you can do smth. like this:
import matplotlib.pyplot as plt
import numpy as np
img = plt.imread('C:/.../Download.jpg') # Path to image
img = img[0:150,50:200,0] # Define image size to be square --> Or what ever shape you want
fig = plt.figure()
nrows = 10 # Define number of columns
ncols = 10 # Define number of rows
image_heigt = 150 # Height of the image
image_width = 150 # Width of the image
pixels = np.zeros((nrows*image_heigt,ncols*image_width)) # Create
for a in range(nrows):
for b in range(ncols):
pixels[a*image_heigt:a*image_heigt+image_heigt,b*image_heigt:b*image_heigt+image_heigt] = img
plt.imshow(pixels,cmap='jet')
plt.axis('off')
plt.show()
As result you receive:
Building on the answer from: How to display multiple images in one figure correctly?, here is another method:
import math
import numpy as np
import matplotlib.pyplot as plt
def plot_images(np_images, titles = [], columns = 5, figure_size = (24, 18)):
count = np_images.shape[0]
rows = math.ceil(count / columns)
fig = plt.figure(figsize=figure_size)
subplots = []
for index in range(count):
subplots.append(fig.add_subplot(rows, columns, index + 1))
if len(titles):
subplots[-1].set_title(str(titles[index]))
plt.imshow(np_images[index])
plt.show()
You can also do this:
import matplotlib.pyplot as plt
f, axarr = plt.subplots(1, len(imgs))
for i, img in enumerate(imgs):
axarr[i].imshow(img)
plt.suptitle("Your title!")
plt.show()
def plot_figures(figures, nrows=None, ncols=None):
if not nrows or not ncols:
# Plot figures in a single row if grid not specified
nrows = 1
ncols = len(figures)
else:
# check minimum grid configured
if len(figures) > nrows * ncols:
raise ValueError(f"Too few subplots ({nrows*ncols}) specified for ({len(figures)}) figures.")
fig = plt.figure()
# optional spacing between figures
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for index, title in enumerate(figures):
plt.subplot(nrows, ncols, index + 1)
plt.title(title)
plt.imshow(figures[title])
plt.show()
Any grid configuration (or none) can be specified as long as the product of the number of rows and the number of columns is equal to or greater than the number of figures.
For example, for len(figures) == 10, these are acceptable
plot_figures(figures)
plot_figures(figures, 2, 5)
plot_figures(figures, 3, 4)
plot_figures(figures, 4, 3)
plot_figures(figures, 5, 2)
import numpy as np
def save_image(data, ws=0.1, hs=0.1, sn='save_name'):
import matplotlib.pyplot as plt
m = n = int(np.sqrt(data.shape[0])) # (36, 1, 32, 32)
fig, ax = plt.subplots(m,n, figsize=(m*6,n*6))
ax = ax.ravel()
for i in range(data.shape[0]):
ax[i].matshow(data[i,0,:,:])
ax[i].set_xticks([])
ax[i].set_yticks([])
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9,
top=0.9, wspace=ws, hspace=hs)
plt.tight_layout()
plt.savefig('{}.png'.format(sn))
data = np.load('img_test.npy')
save_image(data, ws=0.1, hs=0.1, sn='multiple_plot')