For loop to create multiple histogram png files - python

I am not sure as to why this happens. Maybe it is just a simple mistake that I cannot see, but by using this code:
for filename in glob.glob('/Users/jacob/Desktop/MERS/new/NOT COAL/gensets/statistics_per_lgu/per_lgu_files/*.csv'):
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.read_csv(filename)
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2, 2, i + 1)
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(df['New Capacity based on 0.8 PF'], bins=binwidth,
color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
ax.axvline(x=df['New Capacity based on 0.8 PF'].median(), linestyle='dashed', alpha=0.3, color='blue')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].median(),y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=df['New Capacity based on 0.8 PF'].mean(), linestyle='dashed', alpha=0.9, color='green')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].mean(),y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('/Users/jacob/Documents/Gensets_gis/historgrams/per_lgu_files/{}.png'.format(name))
I get all files created like this attached photo here.
Any ideas as to what I've done wrong?
Thanks in advance.
attached photo of one histogram output
My desired result would be something like this.
Desired output

It doesn't create new subplots but it use previous ones and then it draw new plots on old plots so you have to use clear subplot before you draw new histogram.
ax = plt.subplot(2, 2, i + 1)
ax.clear()
Example code. It gives desired output but if you remove `ax.clear() then first image will be OK but you get new plot with old plots on second and third image.
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
for n in range(3):
filename = f'example_data_{n}.csv'
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.DataFrame({'New Capacity based on 0.8 PF': random.choices(list(range(1000)), k=100)})
data = df['New Capacity based on 0.8 PF']
median = data.median()
mean = data.mean()
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2,2,i+1)
ax.clear() # <--- it removes previous histogram
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(data , bins=binwidth, color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
min_ylim, max_ylim = plt.ylim()
ax.axvline(x=median, linestyle='dashed', alpha=0.3, color='blue')
ax.text(x=median, y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=mean, linestyle='dashed', alpha=0.9, color='green')
ax.text(x=mean, y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('{}.png'.format(name))

Related

Matplotlib/Pandas - Plot not reflective of data

Trying to work out what is going wrong. I am using pandas to generate dataframes and matplotlib to plot a figure with 5 subplots.
Datasets are large xlsx sheets, all data is relative to depth below ground surface, duplicate depths have been removed, all number as text errors have been removed, although depth is shown between 0 - 60 ft data is not continuous over entire interval.
What I am going for
Current issue, x-axis not reflecting trends of data, line plotted straight through data
#Import libraries
import matplotlib.pyplot as plt
import pandas as pd
#Import Excel Data
df1 = pd.read_excel (r'017_FLD_and_FLS.xlsx')
df2 = pd.read_excel (r'017_SUMD_SUMS.xlsx')
#Sort data for plotting
gam = list(df2['Natural Gamma'])
cal_Pre = list(df2['Caliper (pre-pumping)'])
cal_post = list(df2['Caliper (post-pumping)'])
fc = list(df2['Formation Conductivity'])
fr = list(df2['Formation Resistivity'])
neu = list(df2['Neutron'])
den = list(df2['Spherical Density (long-S)'])
htp = list(df1['Heat Pulse Flow'])
tp = list(df1['Static Fluid Temperature'])
t1 = list(df1['Pumping Temperature Run 1'])
t2 = list(df1['Pumping Temperature Run 2'])
t3 = list(df1['Pumping Temperature Run 3'])
depth = list(df2['Depth'])
depth_t = list(df1['Depth'])
#Test to verify subset
#print(gam)
#print (tp)
#Plot space
fig = plt.figure(figsize=(15,20))
# Caliper
ax01 = plt.subplot(151)
plt.plot(cal_Pre, depth, color="black")
ax01 = plt.gca()
ax01.invert_yaxis()
ax01.set_xlabel('Caliper (Inches)', color="black")
ax01.set_ylabel('Depth (Feet)', color="black")
plt.grid(True)
ax11 = ax01.twiny()
ax11.plot(cal_post, depth, color = 'green')
ax11.set_xlabel('Gamma (counts)', color="green")
ax11.tick_params(axis='x', labelcolor="green")
plt.grid(True, linestyle='--')
# Gamma
ax02 = plt.subplot(152)
plt.plot(cal_Pre, depth, color="black")
ax02 = plt.gca()
ax02.invert_yaxis()
ax02.set_xlabel('Caliper (Inches)', color="black")
plt.grid(True)
# Neutron Density
ax03 = plt.subplot(153, sharey=ax01)
plt.plot(den, depth, color = 'red')
ax03 = plt.gca()
ax03.invert_yaxis()
ax03.set_xlabel('Density (counts)', color = 'red')
ax03.tick_params(axis='x', labelcolor="red")
plt.grid(True)
ax13 = ax03.twiny()
ax13.plot(neu, depth, color = 'blue')
ax13.invert_xaxis()
ax13.set_xlabel('Neutron (counts)', color="blue")
ax13.tick_params(axis='x', labelcolor="blue")
plt.grid(True, linestyle='--')
# Conductivity and Resistivity
ax04 = plt.subplot(154, sharey=ax01)
plt.plot(fc, depth, color="black")
ax04 = plt.gca()
ax04.invert_yaxis()
ax04.set_xlabel('Formation Conductivity(mS/cm)', color="black")
ax04.tick_params(axis='x', labelcolor="black")
plt.grid(True)
ax14 = ax04.twiny()
ax14.plot(fr, depth, color = 'blue')
ax14.invert_xaxis()
ax14.set_xlabel('Formation Resistivity (ohm-m)', color="blue")
ax14.tick_params(axis='x', labelcolor="blue")
plt.grid(True, linestyle='--')
# Temperature and heat pulse
ax05 = plt.subplot(155)
plt.plot(htp, depth_t, color="black")
ax05 = plt.gca()
ax05.invert_yaxis()
ax05.set_xlabel('Heat Pulse Flow (gpm)', color="black")
ax05.set_ylabel('Depth (Feet)', color="black")
plt.grid(True)
ax15 = ax05.twiny()
ax15.plot(tp, depth_t, color = 'blue')
ax15.invert_xaxis()
ax15.set_xlabel('Formation Resistivity (ohm-m)', color="blue")
ax15.tick_params(axis='x', labelcolor="blue")
ax15 = ax05.twiny()
ax15.plot(t3, depth_t, color = 'blue')
ax15.invert_xaxis()
ax15.set_xlabel('Formation Resistivity (ohm-m)', color="blue")
ax15.tick_params(axis='x', labelcolor="blue")
plt.grid(True, linestyle='--')
fig.suptitle('GB017', fontsize=30, x=0.5, y=1.01)
fig.tight_layout()
plt.show()
Open to suggestions on making code more elegant but maintaining ease of figure modification.

subplots, how to set the xlabel and xlim, but removing axis

I'd like to plot EEG data and get this result:
But I am stuck on how to display the x axis label and its xlim.
After reading other questions, which use set_visible(False), I cannot resolve my issue.
I write my code in order to be reproducible:
sfreq = 256
raw_data = np.random.rand(14, 1000 * sfreq)
duration = 10 # duration of the signal
start = 200 * sfreq
final = start + int(sfreq * duration)
channels = list(np.arange(1, len(channels) + 1 ))
fig, ax = plt.subplots(len(channels), 1, sharex=True, figsize=(10, 10))
for idx, node in enumerate(channels):
data = raw_data[idx, start:final]
times = np.arange(1, data.size + 1) / sfreq
ax[idx].plot(times, data, lw=1., ls='-', c='k')
ax[idx].axis('off') # to remove bounding subplot
ax[idx].set_yticks([]) # to remove values from y axis
ax[idx].text(-1, 0, node, fontsize=12) # write text
# plt.axis(True)
# plt.axes().get_xaxis().set_visible(True)
# plt.xlim([200, 220])
plt.xlabel('Time (seconds)', fontsize=12)
plt.tight_layout()
plt.show()
This is my result:
But I'd like this:
Here are some possible changes to the plot:
make the code more python by using zip instead of an index in the for loop
change the visibility of the "spines" (the lines surrounding the subplot) instead of use axis('off')
remove the padding (margins)
use the axes transform to position the text of the y-axis
...
import matplotlib.pyplot as plt
import numpy as np
sfreq = 256
raw_data = np.random.rand(14, 1000 * sfreq)
duration = 10 # duration of the signal
start = 200 * sfreq
final = start + int(sfreq * duration)
channels = np.arange(len(raw_data)) + 1
fig, axs = plt.subplots(len(channels), 1, sharex=True, figsize=(10, 10))
for ax, node, data in zip(axs, channels, raw_data):
data = data[start:final]
times = np.arange(1, data.size + 1) / sfreq
ax.plot(times, data, lw=1., ls='-', c='k')
ax.set_yticks([]) # remove y ticks
for sp in ax.spines:
ax.spines[sp].set_visible(False) # hide the 4 lines surrounding the subplot
ax.text(-0.01, 0.5, node, fontsize=12, ha='right', va='center', transform=ax.transAxes) # write text
ax.margins(x=0) # avoid the empty space left and right
if ax != axs[-1]:
# ax.tick_params(axis='x', length=0) # hide the tick marks
ax.tick_params(bottom=False) # no tick marks at the bottom
axs[-1].set_xlabel('Time (seconds)', fontsize=12, labelpad=-10) # use negative padding to get closer to the xaxis
axs[-1].set_xticks([0, duration])
axs[-1].set_xticklabels([start // sfreq, final // sfreq])
axs[-1].spines['bottom'].set_bounds([0, duration]) # only draw the spine between the two ticks
axs[-1].spines['bottom'].set_visible(True)
axs[-1].spines['bottom'].set_linewidth(2)
plt.tight_layout()
plt.show()

3D Plot of Multiple Time Series in Python

I've seen numerous examples of 3D plots using matplotlib/seaborn in Python but can't seem to get what I'm looking for; I have 50 or so timeseries that I would like to plot cleanly as in the following example below but with the name of the series on the axis; as an example I've marked in Goog, IBM, GE, Pepsi etc. Appreciate any pointers or examples. Thank you,
Example PLOT Click Here Please
Matplotlib has very rich gallery. I found this, you can only plot it once instead of animation. And manually put y-axis legend wherever you want.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
# Fixing random state for reproducibility
np.random.seed(19680801)
# Create new Figure with black background
fig = plt.figure(figsize=(12, 8))
# Add a subplot with no frame
ax = plt.subplot(111, frameon=False)
# Generate random data
data = np.random.uniform(0, 1, (64, 75))
X = np.linspace(-1, 1, data.shape[-1])
G = 1.5 * np.exp(-4 * X ** 2)
# Generate line plots
lines = []
for i in range(len(data)):
# Small reduction of the X extents to get a cheap perspective effect
xscale = 1 - i / 200.
# Same for linewidth (thicker strokes on bottom)
lw = 1.5 - i / 100.0
line, = ax.plot(xscale * X, i + G * data[i], color="b", lw=lw)
lines.append(line)
# Set y limit (or first line is cropped because of thickness)
ax.set_ylim(-1, 70)
# No ticks
ax.set_xticks([])
ax.set_yticks([])
# 2 part titles to get different font weights
ax.text(0.5, 1.0, "MATPLOTLIB ", transform=ax.transAxes,
ha="right", va="bottom", color="k",
family="sans-serif", fontweight="light", fontsize=16)
ax.text(0.5, 1.0, "UNCHAINED", transform=ax.transAxes,
ha="left", va="bottom", color="k",
family="sans-serif", fontweight="bold", fontsize=16)
def update(*args):
# Shift all data to the right
data[:, 1:] = data[:, :-1]
# Fill-in new values
data[:, 0] = np.random.uniform(0, 1, len(data))
# Update data
for i in range(len(data)):
lines[i].set_ydata(i + G * data[i])
# Return modified artists
return lines
# Construct the animation, using the update function as the animation director.
anim = animation.FuncAnimation(fig, update, interval=10)
plt.show()

Plot less points (line plot) for matplot lib

Currently I have a plot with too many points, I want to avoid overlapping. Want to know how to reduce the amount of points in order to have a smoother line.
Plot Code
fig = plt.figure(1, figsize = (18,10)) # Figure size in inches (size_x, size_y)
ax = plt.axes()
min_val = prediction_intervals2[:, 0]
max_val = prediction_intervals2[:, 1]
true_values = y_test
predicted_values = PLS_Model1.predict(X_test)
plt.plot(min_val, label = "Min", color='blue')
plt.plot(max_val, label = "Max", color='red')
plt.plot(true_values, label = "y", color = "black")
plt.plot(predicted_values, label = "y\u0302", marker='o')
plt.title('Conformal Predictor Final Predictions')
plt.legend()
plt.show()
Current Plot
Desired Plot
Plot that I want
I was able to revise my code properly and came to the desired output by just selecting less data points, quite simple. Posted the answer just in case.
min_val_normal = plot_normalized_table[['Min']]
max_val_normal = plot_normalized_table[['Max']]
original_normal = plot_normalized_table[['Original Label']]
interval_normal = plot_normalized_table[['Interval Size']]
normal_predicted = predicted_values[0:50]
fig = plt.figure(1, figsize = (18,10)) # Figure size in inches (size_x, size_y)
ax = plt.axes()
#predicted_values = PLS_Model1.predict(X_test) #Predictions from test data (run at least once for the plot to work)
plt.plot(min_val_normal, label = "Min", color='blue')
plt.plot(max_val_normal, label = "Max", color='red')
plt.plot(original_normal, label = "y", color = "black")
plt.plot(normal_predicted, label = "y\u0302", marker='o', )
plt.title('Normalized Final Conformal Predictions')
plt.xlim([-1, 51])
plt.ylim([-1, 2])
plt.legend()
plt.show()

Loop to create subplot /Python

i have a little problem to create a subplot loop.
The following code show my result for one plot.... So it starts with a dayloop than with a hour loop (8 timesteps).
If i run the code i get a nice QUiver plot with the colorbar.
for dd in range(1,15):
day=str(dd)
readfile=fns[files_indizes[dd]]
if dd < 10:
nc_u_comp = NetCDFFile(ROOT+u_comp1+'0'+day+comp)
nc_v_comp = NetCDFFile(ROOT+v_comp1+'0'+day+comp)
else:
nc_u_comp = NetCDFFile(ROOT+u_comp1+day+comp)
nc_v_comp = NetCDFFile(ROOT+v_comp1+day+comp)
time = nc_u_comp.variables['time'][:]
index=readfile.find(comp)
index=index+len(comp)
date=readfile[index-14:index-6]
plt.clf()
for tt in range(0,len(time)):
if tt < 10:
h =str(0)+str(tt)
else:
h=str(tt)
varU=nc_u_comp.variables['u10'][tt,:,:]
varV=nc_v_comp.variables['v10'][tt,:,:]
lat = nc_u_comp.variables['latitude'][:]
lon = nc_u_comp.variables['longitude'][:]
plt.rcParams["figure.figsize"] = [10,10]
#plane projection of the world
#map with box size (defintion on the top)
box = sgeom.box(minx=llcrnrlon, maxx=urcrnrlon, miny=llcrnrlat, maxy=urcrnrlat)
x0, y0, x1, y1 = box.bounds
#Map plot. The middel of the map is central_longitude
#proj = ccrs.PlateCarree(central_longitude=0)
proj=ccrs.PlateCarree()
#Change middelpoint of the map
box_proj = ccrs.PlateCarree(central_longitude=0)
ax2 = plt.axes(projection=proj)
ax2.set_extent([x0, x1, y0, y1], box_proj)
ax2.add_feature(cartopy.feature.BORDERS, linestyle='-', alpha=.5)
ax2.coastlines(resolution='50m')
#Definition of the scale_bar
gl = ax2.gridlines(ccrs.PlateCarree(), \
linestyle='--', alpha=1, linewidth=0.5, draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
magnitude = (varU ** 2 + varV ** 2) ** 0.5
strm =plt.streamplot(lon , lat , varU, varV, linewidth=2, density=2, color=magnitude)
cbar= plt.colorbar()
cbar.set_label('$m/s$')
name='Wind in 10 m '+ date + h+' UTC'
ax2.set_aspect('auto')
plt.title(name, y=1)
Now i want to create an 2x4 Subplot array with a colorbar allocate to the complete Subplot array.
I find some infromation in the internet, but it doesn't run with my code. Maybe someone can help me?
This shows how to plot an array of simple Cartopy maps in 4 rows 2 columns. Also shows how to plot a colorbar to accompany the maps array. Hope it helps.
import numpy as np
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import matplotlib as mpl
# create figure with figsize big enough to accomodate all maps, labels, etc.
fig = plt.figure(figsize=(8, 10), tight_layout=False)
# define plot array's arrangement
columns = 2
rows = 4
# set projection to use
projex = ccrs.PlateCarree()
# set the colormap and norm for
# the colorbar to use
cmap1 = mpl.cm.magma
norm1 = mpl.colors.Normalize(vmin=0, vmax=100)
def plotmymap(axs):
# your plot specs of each map should replace this
img = np.random.randint(100, size=(15, 30)) # 2d array of random values (1-100)
# render image on current axis
plims = plt.imshow(img, extent=[-180,180,-90,90], alpha=0.5, cmap=cmap1, norm=norm1)
axs.set_global()
axs.coastlines()
# add title to the map
axs.set_title("Map_"+str(i))
return plims # for use by colorbar
for i in range(1, columns*rows +1):
# add a subplot into the array of plots
ax = fig.add_subplot(rows, columns, i, projection=projex)
plims = plotmymap(ax) # a simple maps is created on subplot
# add a subplot for vertical colorbar
bottom, top = 0.1, 0.9
left, right = 0.1, 0.8
fig.subplots_adjust(top=top, bottom=bottom, left=left, right=right, hspace=0.15, wspace=0.25)
cbar_ax = fig.add_axes([0.85, bottom, 0.05, top-bottom])
fig.colorbar(plims, cax=cbar_ax) # plot colorbar
plt.show() # this plot all the maps
The resulting plots:

Categories