Matplotlib/Pandas - Plot not reflective of data

Matplotlib/Pandas - Plot not reflective of data - python

Trying to work out what is going wrong. I am using pandas to generate dataframes and matplotlib to plot a figure with 5 subplots.
Datasets are large xlsx sheets, all data is relative to depth below ground surface, duplicate depths have been removed, all number as text errors have been removed, although depth is shown between 0 - 60 ft data is not continuous over entire interval.
What I am going for
Current issue, x-axis not reflecting trends of data, line plotted straight through data
#Import libraries
import matplotlib.pyplot as plt
import pandas as pd
#Import Excel Data
df1 = pd.read_excel (r'017_FLD_and_FLS.xlsx')
df2 = pd.read_excel (r'017_SUMD_SUMS.xlsx')
#Sort data for plotting
gam = list(df2['Natural Gamma'])
cal_Pre = list(df2['Caliper (pre-pumping)'])
cal_post = list(df2['Caliper (post-pumping)'])
fc = list(df2['Formation Conductivity'])
fr = list(df2['Formation Resistivity'])
neu = list(df2['Neutron'])
den = list(df2['Spherical Density (long-S)'])
htp = list(df1['Heat Pulse Flow'])
tp = list(df1['Static Fluid Temperature'])
t1 = list(df1['Pumping Temperature Run 1'])
t2 = list(df1['Pumping Temperature Run 2'])
t3 = list(df1['Pumping Temperature Run 3'])
depth = list(df2['Depth'])
depth_t = list(df1['Depth'])
#Test to verify subset
#print(gam)
#print (tp)
#Plot space
fig = plt.figure(figsize=(15,20))
# Caliper
ax01 = plt.subplot(151)
plt.plot(cal_Pre, depth, color="black")
ax01 = plt.gca()
ax01.invert_yaxis()
ax01.set_xlabel('Caliper (Inches)', color="black")
ax01.set_ylabel('Depth (Feet)', color="black")
plt.grid(True)
ax11 = ax01.twiny()
ax11.plot(cal_post, depth, color = 'green')
ax11.set_xlabel('Gamma (counts)', color="green")
ax11.tick_params(axis='x', labelcolor="green")
plt.grid(True, linestyle='--')
# Gamma
ax02 = plt.subplot(152)
plt.plot(cal_Pre, depth, color="black")
ax02 = plt.gca()
ax02.invert_yaxis()
ax02.set_xlabel('Caliper (Inches)', color="black")
plt.grid(True)
# Neutron Density
ax03 = plt.subplot(153, sharey=ax01)
plt.plot(den, depth, color = 'red')
ax03 = plt.gca()
ax03.invert_yaxis()
ax03.set_xlabel('Density (counts)', color = 'red')
ax03.tick_params(axis='x', labelcolor="red")
plt.grid(True)
ax13 = ax03.twiny()
ax13.plot(neu, depth, color = 'blue')
ax13.invert_xaxis()
ax13.set_xlabel('Neutron (counts)', color="blue")
ax13.tick_params(axis='x', labelcolor="blue")
plt.grid(True, linestyle='--')
# Conductivity and Resistivity
ax04 = plt.subplot(154, sharey=ax01)
plt.plot(fc, depth, color="black")
ax04 = plt.gca()
ax04.invert_yaxis()
ax04.set_xlabel('Formation Conductivity(mS/cm)', color="black")
ax04.tick_params(axis='x', labelcolor="black")
plt.grid(True)
ax14 = ax04.twiny()
ax14.plot(fr, depth, color = 'blue')
ax14.invert_xaxis()
ax14.set_xlabel('Formation Resistivity (ohm-m)', color="blue")
ax14.tick_params(axis='x', labelcolor="blue")
plt.grid(True, linestyle='--')
# Temperature and heat pulse
ax05 = plt.subplot(155)
plt.plot(htp, depth_t, color="black")
ax05 = plt.gca()
ax05.invert_yaxis()
ax05.set_xlabel('Heat Pulse Flow (gpm)', color="black")
ax05.set_ylabel('Depth (Feet)', color="black")
plt.grid(True)
ax15 = ax05.twiny()
ax15.plot(tp, depth_t, color = 'blue')
ax15.invert_xaxis()
ax15.set_xlabel('Formation Resistivity (ohm-m)', color="blue")
ax15.tick_params(axis='x', labelcolor="blue")
ax15 = ax05.twiny()
ax15.plot(t3, depth_t, color = 'blue')
ax15.invert_xaxis()
ax15.set_xlabel('Formation Resistivity (ohm-m)', color="blue")
ax15.tick_params(axis='x', labelcolor="blue")
plt.grid(True, linestyle='--')
fig.suptitle('GB017', fontsize=30, x=0.5, y=1.01)
fig.tight_layout()
plt.show()
Open to suggestions on making code more elegant but maintaining ease of figure modification.

Related

Automatic add text to matplotlib plot in Python

I try to produce a plot and want to automatically add text (in this case is percentage) to each circle in correspond to each y axis types. Any help would be very helpful.
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.5, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=30, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)

You can use matplotlib.axes.Axes.text:
x_space = 0.4
y_space = 0.05
fontsize = 7
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val}%', fontsize = fontsize)
You have to adjust x_space, y_space and fontsize in order to fit properly the text within the circles.
Complete code
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.5, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=30, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
x_space = 0.4
y_space = 0.05
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val:>5.2f}%', fontsize = 7)
plt.show()
Same code as above, but with increased circle radius and font, in order to improve readability.
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# Make some data
index=['Stream flow',
'Soil moisture',
'Water indices',
'Others',
'Temperature',
'Precipitation',
'Vegetative indices']
value=[2.13, 6.38, 10.64, 12.77, 17.73, 21.99, 28.37]
# create dataframe
percentages = pd.Series(value,index=index)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(15,8))
# create for each expense type an horizontal line that starts at x = 0 with the length
plt.hlines(y=my_range, xmin=0, xmax=df['percentage']-0.85, color='black', alpha=0.8, linewidth=1)
# create for each expense type a dot at the level of the expense percentage value
line=plt.plot(df['percentage'], my_range, "o", markersize=50, color='#fd8c00', alpha=0.6, linewidth=0.3)
# set labels
ax.set_xlabel('Percentage', fontsize=15)
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=14)
plt.yticks(my_range, df.index)
ax.set_xlim(0,30)
ax.set_ylim(0, len(value) + 1)
x_space = 0.75
y_space = 0.06
fontsize = 12
for y_i, val in enumerate(value, 1):
ax.text(x = val - x_space, y = y_i - y_space, s = f'{val:>5.2f}%', fontsize = fontsize)
plt.show()
Even better, you can use matplotlib.axes.Axes.annotate to get rid of x_space and y_space:
fontsize = 12
for y_i, x_i in enumerate(value, 1):
ax.annotate(f'{x_i:>5.2f}%', xy = (x_i, y_i), xytext = (0, 0), textcoords = 'offset points', ha = 'center', va = 'center', fontsize = fontsize)
You still have to adjust the fontsize to properly fit the radius of the circles.

Gradient fill from zero till a curve

I have been using Is it possible to get color gradients under curve in matplotlib? as a reference (you can see the similarities, however i cant for the life of me figure out how to push the shading all the way down to 0 on the Y AXIS, for some reason which i cant find out, it has an upward sloping straight line cutting off the shading, i cant find anything in my data to suggest why its doing this.
for context the y axis can show positive and negative and i want to fill the scale the whole way so using gradient colour to fill from 0 to the line (positive) then fill from 0 to the negative line (see my blue example from a previous chart -same data-)
Here is my code
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.patches import Polygon
# Variables
AUM = df['#AHD_AUM'].head(104)
MM = df['#AHD_Managed_Money_Net'].head(104)
PRICE = df['#AHD_Price'].head(104)
DATES = df['DATES'].head(104)
# Date Friendly Variables for Plot
List_AUM = df['#AHD_AUM'].head(104).to_list()
List_MM = df['#AHD_Managed_Money_Net'].head(104).to_list()
List_DATES = df['DATES'].head(104).to_list()
X = 0 * df['#AHD_AUM'].head(104)
# Make a date list changing dates with numbers to avoid the issue with the plot
interpreting dates
for i in range(len(df['DATES'].head(104))):
count = i
df['count'][i] = 120 - i
# X and Y data variables changed to arrays as when i had these set as dates
matplotlib hates it
x = df['count'].head(104).to_numpy()
y = df['#AHD_Managed_Money_Net'].head(104).to_numpy()
#DD = AUM.to_numpy()
#MMM = MM.to_numpy()
def main():
for _ in range(len(DD)):
gradient_fill(x,y)
plt.show()
def gradient_fill(x,y, fill_color=None, ax=None, **kwargs):
"""
"""
if ax is None:
ax = plt.gca()
line, = ax.plot(x, y, **kwargs)
if fill_color is None:
fill_color = line.get_color()
zorder = line.get_zorder()
alpha = line.get_alpha()
alpha = 1.0 if alpha is None else alpha
z = np.empty((100, 1, 4), dtype=float)
rgb = mcolors.colorConverter.to_rgb(fill_color)
z[:,:,:3] = rgb
z[:,:,-1] = np.linspace(0, alpha, 100)[:,None]
xmin, xmax, ymin, ymax = x.min(), x.max(), y.min(), y.max()
im = ax.imshow(z, aspect='auto', extent=[xmin, xmax, ymin, ymax],
origin='lower', zorder=zorder)
xy = np.column_stack([x, y])
# xy = np.vstack([[xmin, ymin], xy, [xmax, ymin], [xmin, ymin]]) ### i dont
need this so i have just commented it out
clip_path = Polygon(xy, facecolor='none', edgecolor='none', closed=True)
ax.add_patch(clip_path)
im.set_clip_path(clip_path)
ax.autoscale(True)
return line, im
main()
this is my current output

An easier way to clip the gradient by the curve, is to use a polygon obtained from fill_between.
Here is some example code to get you started.
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(123)
x = np.linspace(0, 10, 200)
y = np.random.normal(0.01, 1, 200).cumsum()
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(x, y)
ylim = ax.get_ylim()
grad1 = ax.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap='Blues', vmin=-0.5, aspect='auto',
extent=[x.min(), x.max(), 0, y.max()], origin='lower')
poly_pos = ax.fill_between(x, y.min(), y, alpha=0.1)
grad1.set_clip_path(poly_pos.get_paths()[0], transform=ax.transData)
poly_pos.remove()
grad2 = ax.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap='Reds', vmin=-0.5, aspect='auto',
extent=[x.min(), x.max(), y.min(), 0], origin='upper')
poly_neg = ax.fill_between(x, y, y.max(), alpha=0.1)
grad2.set_clip_path(poly_neg.get_paths()[0], transform=ax.transData)
poly_neg.remove()
ax.set_ylim(ylim)
ax.axhline(0, color='black') # show a line at x=0
plt.show()
PS: vmin in imshow can be used to remove the color range where it's very light:
grad1 = ax.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap='Blues', vmin=-0.5, aspect='auto',
extent=[x.min(), x.max(), 0, y.max()], origin='lower')
grad2 = ax.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap='Reds', vmin=-0.5, aspect='auto',
extent=[x.min(), x.max(), y.min(), 0], origin='upper')

import pandas as pd # For data handling
import seaborn as sns # For plotting
import numpy as np
import matplotlib.pyplot as plt # For plotting
import matplotlib
#some preferred user settings
plt.rcParams['figure.figsize'] = (18.0, 12.0)
pd.set_option('display.max_columns', None)
%matplotlib inline
import warnings
warnings.filterwarnings(action='ignore')
from mpl_toolkits.axisartist.parasite_axes import HostAxes, ParasiteAxes
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import datetime as dt
import matplotlib.dates as mdates
import pandas
Metal = CAD
# Variables
AUM = Metal.iloc[:,[7]].head(104)
MM = Metal.iloc[:,[0]].head(104)
PRICE = Metal.iloc[:,[8]].head(104)
#Last_Report = Metal.iloc[:,[9]].head(1).dt.strftime('%d %b %Y').to_list()
DATES = Metal.iloc[:,[10]].head(104)
# Dataframe for Net Position High
Net_High = Metal[Metal.iloc[:,[0]] == Metal.iloc[:,[0]].max()]
# Variables for Chart Annotation for Net Position High
Pos_High_Date = Net_High.iloc[:, [0]]
Pos_High_AUM = Net_High.iloc[:, [7]][0]/[1000000000]
Pos_High_Price = Net_High.iloc[:, [8]].to_numpy()[0].round().astype('int')
Pos_High = Net_High.iloc[:, [0]][0].astype('int')
Str_Date = mdates.num2date(Pos_High_Date)
Str_Date = pd.to_datetime(Str_Date[0]).strftime("%d %b %y")[0]
# Dataframe for Net Position Low
Net_Low = df[df['#CAD_Managed_Money_Net'] == df['#CAD_Managed_Money_Net'].head(104).min()]
# Variables for Chart Annotation for Net Position High
Pos_Low_Date = Net_Low.iloc[:, [55]].to_numpy()
Pos_Low_AUM = Net_Low.iloc[:, [26]].to_numpy()[0].round()/[1000000000]
Pos_Low_Price = Net_Low.iloc[:, [27]].to_numpy()[0].round().astype('int')
Pos_Low = Net_Low['#CAD_Managed_Money_Net'][0].astype('int')
Str_Date_Low = mdates.num2date(Pos_Low_Date)
Str_Date_Low = pd.to_datetime(Str_Date_Low[0]).strftime("%d %b %y")[0]
# C Brand Colour Scheme
C = ['deepskyblue', '#003399', 'slategray', '#027608','#cc0000']
def make_patch_spines_invisible(ax):
ax.set_frame_on(True)
ax.patch.set_visible(False)
for sp in ax.spines.values():
sp.set_visible(False)
fig, host = plt.subplots(figsize=(25,15))
fig.subplots_adjust(right=0.8)
#twinx() creates another axes sharing the x axis we do this twice
par1 = host.twinx()
par2 = host.twinx()
# Offset the right spine of par2 the ticks
par2.spines["right"].set_position(("axes",1.08))
#because par2 was created by twinx the frame is off so we need to use the method created above
make_patch_spines_invisible(par2)
# second, show the right spine
par2.spines["right"].set_visible(True)
######### Colouring in Plots
x = DATES
y = MM
ylim = host.get_ylim()
Long = host.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap= 'Blues', vmin=-0.5, aspect='auto',
extent=[x.min(), x.max(), 0, y.max()], origin='lower')
poly_pos = host.fill_between(x, y.min(), y, alpha=0.1)
Long.set_clip_path(poly_pos.get_paths()[0], transform=host.transData)
poly_pos.remove()
Short = host.imshow(np.linspace(0, 1, 256).reshape(-1, 1), cmap='OrRd', vmin=-0.5, aspect='auto',
extent=[x.min(), x.max(), y.min(), 0], origin='upper')
poly_neg = host.fill_between(x, y, y.max(), alpha=0.1)
Short.set_clip_path(poly_neg.get_paths()[0], transform=host.transData)
poly_neg.remove()
##########
#plot data
p1, = host.plot(DATES, MM, label="Managed Money Net Position", linewidth=0.0,color = Citi[1], alpha = 0.8)
p2, = par1.plot(DATES, AUM, label="AUM",linewidth=1, marker = '$A$',mew = 1,mfc = 'w', color = Citi[0], alpha = 0.8)
p3, = par2.plot(DATES, PRICE, label="3M Price",linewidth=1, marker = '$p$', color = Citi[2], alpha = 0.8)
#Automatically scale and format
host_labels = ['{:,.0f}'.format(x) + 'K Lots' for x in host.get_yticks()/1000]
host.set_yticklabels(host_labels)
par1_labels = ['{:,.1f}'.format(x) + ' $Billion' for x in par1.get_yticks()/1000000000]
par1.set_yticklabels(par1_labels)
par2_labels = ['{:,.0f}'.format(x) + ' $' for x in par2.get_yticks()]
par2.set_yticklabels(par2_labels)
# x Axis formatting (date)
formatter = matplotlib.dates.DateFormatter('%b- %Y')
host.xaxis.set_major_formatter(formatter)
# Rotates and right-aligns the x labels so they don't crowd each other.
for label in host.get_xticklabels(which='major'):
label.set(rotation=30, horizontalalignment='right')
# Axis Labels
host.set_xlabel("Date")
host.set_ylabel("Managed Money Net Position")
par1.set_ylabel("AUM")
par2.set_ylabel("3M Price")
# Tick Parameters
tkw = dict(size=10, width=2.5)
# Set tick colours
host.tick_params(axis = 'y', colors = Citi[1], **tkw)
par1.tick_params(axis = 'y', colors = Citi[0], **tkw)
par2.tick_params(axis = 'y', colors = Citi[2], **tkw)
#host.tick_params(which='major',axis = 'x',direction='out', colors = Citi[2], **tkw)
#plt.xticks(x, rotation='vertical')
#host.xaxis.set_major_locator(AutoMajorLocator())
host.xaxis.set_major_locator(MultipleLocator(24))
host.tick_params('x',which='major', length=7)
#Label colours taken from plot
host.yaxis.label.set_color(p1.get_color())
par1.yaxis.label.set_color(p2.get_color())
par2.yaxis.label.set_color(p3.get_color())
# Map Title
host.set_title('Aluminium Managed Money Net Positioning as of %s'% Last_Report[0],fontsize='large')
#Colour Spines cant figure out how to do it for the host
par1.spines["right"].set_edgecolor(p2.get_color())
par2.spines["right"].set_edgecolor(p3.get_color())
###### Annotation Tests ##########
## Net Position High Box
host.annotate(f' Net Position High | {Pos_High} \n Date | {Str_Date} \n AUM | ${Pos_High_AUM[0].round(1)} Billion\n 3M Price | ${Pos_High_Price[0]}$',
xy=(Pos_High_Date, Pos_High), xycoords='data',
xytext=(0.02, .85), textcoords='axes fraction',
horizontalalignment='left',
verticalalignment='bottom',
color='white',
bbox=dict(boxstyle="round", fc= Citi[1],edgecolor='white'),
arrowprops=dict(
facecolor='black',
arrowstyle= '->'))
## Net Position Low Box
host.annotate(f' Net Position Low | {Pos_Low} \n Date | {Str_Date_Low} \n AUM | ${Pos_Low_AUM[0].round(1)} Billion\n 3M Price | ${Pos_Low_Price[0]}$',
xy=(Pos_Low_Date, Pos_Low), xycoords='data',
xytext=(0.02, .80), textcoords='axes fraction',
horizontalalignment='left',
verticalalignment='top',
color='white',
bbox=dict(boxstyle="round", fc= Citi[4],edgecolor='white'),
arrowprops=dict(
facecolor='black',
arrowstyle= '->'))
################
# Legend - a little complicated as we have to take from multiple axis
lines = [p1, p2, p3]
########## Plot text and line on chart if you want to
# host.axvline(x = DATES[52] , linestyle='dotted', color='black') ###Dotted Line when Needed
# host.text(2020.3, 10, 'Managed Money \n Aluminium')
# host.text(2020.5, 92, r'Ali',color='black')
# host.text(2020.8,15, r'some event', rotation=90)
host.legend(lines,[l.get_label() for l in lines],loc=2, fontsize=12,frameon=False)
plt.savefig('multiple_axes.png', dpi=300, bbox_inches='tight')

Different shading under Seaborn Distplot

I'm trying to create plot with shadings which are based on this MIC(1) line.
Different shading above than beneath.
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def createSkewDist(mean, sd, skew, size):
# calculate the degrees of freedom 1 required to obtain the specific skewness statistic, derived from simulations
loglog_slope=-2.211897875506251
loglog_intercept=1.002555437670879
df2=500
df1 = 10**(loglog_slope*np.log10(abs(skew)) + loglog_intercept)
# sample from F distribution
fsample = np.sort(stats.f(df1, df2).rvs(size=size))
# adjust the variance by scaling the distance from each point to the distribution mean by a constant, derived from simulations
k1_slope = 0.5670830069364579
k1_intercept = -0.09239985798819927
k2_slope = 0.5823114978219056
k2_intercept = -0.11748300123471256
scaling_slope = abs(skew)*k1_slope + k1_intercept
scaling_intercept = abs(skew)*k2_slope + k2_intercept
scale_factor = (sd - scaling_intercept)/scaling_slope
new_dist = (fsample - np.mean(fsample))*scale_factor + fsample
# flip the distribution if specified skew is negative
if skew < 0:
new_dist = np.mean(new_dist) - new_dist
# adjust the distribution mean to the specified value
final_dist = new_dist + (mean - np.mean(new_dist))
return final_dist
desired_mean = 30
desired_skew = 1.5
desired_sd = 20
final_dist = createSkewDist(mean=desired_mean, sd=desired_sd, skew=desired_skew, size=1000000)
# inspect the plots & moments, try random sample
fig, ax = plt.subplots(figsize=(12,7))
sns.distplot(final_dist,
hist=False,
ax=ax,
color='darkred',
kde_kws=dict(linewidth=4))
l1 = ax.lines[0]
# Get the xy data from the lines so that we can shade
x1 = l1.get_xydata()[:,0]
x1[0] = 0
y1 = l1.get_xydata()[:,1]
y1[0] = 0
ax.fill_between(x1,y1, color="lemonchiffon", alpha=0.3)
ax.set_ylim(0.0001,0.03)
ax.axhline(0.002, ls="--")
ax.set_xlim(1.5, 200)
ax.set_yticklabels([])
ax.set_xticklabels([])
trans = transforms.blended_transform_factory(
ax.get_yticklabels()[0].get_transform(), ax.transData)
ax.text(0,0.0025, "{}".format("MIC(1) = 1"), color="blue", transform=trans,
ha="right", va="top", fontsize = 12)
trans_2 = transforms.blended_transform_factory(
ax.get_xticklabels()[0].get_transform(), ax.transData)
ax.text(84,0, "{}".format("\n84"), color="darkred", transform=trans_2,
ha="center", va="top", fontsize = 12)
ax.text(1.5,0, "{}".format("\n0"), color="darkred", transform=trans_2,
ha="center", va="top", fontsize = 12)
ax.axvline(x = 84, ymin = 0, ymax = 0.03, ls = '--', color = 'darkred' )
ax.set_yticks([])
ax.set_xticks([])
ax.spines['top'].set_color(None)
ax.spines['right'].set_color(None)
ax.spines['left'].set_linewidth(2)
ax.spines['bottom'].set_linewidth(2)
ax.set_ylabel("Concentration [mg/L]", labelpad = 80, fontsize = 15)
ax.set_xlabel("Time [h]", labelpad = 80, fontsize = 15)
ax.set_title("AUC/MIC", fontsize = 20, pad = 30)
plt.annotate("AUC/MIC",
xy=(18, 0.02),
xytext=(18, 0.03),
arrowprops=dict(arrowstyle="->"), fontsize = 12);
;
That's what I have:
And that's what I'd like to have (it's done in paint, so forgive me :) ):
I was experimenting with fill_between and fill_betweenx. However, without any satisfying results. Definitely, run out of ideas. I'd really appreciate any help on this. Best wishes!

Your fill_between works as expected. The problem is that color="lemonchiffon" with alpha=0.3 is barely visible. Try to use a brighter color and/or a higher value for alpha.
So, this colors the part of the graph between zero and the kde curve.
Now, to create a different coloring above and below the horizontal line, where= and np.minimum can be used in fill_between:
pos_hline = 0.002
ax.fill_between(x1, pos_hline, y1, color="yellow", alpha=0.3, where=y1 > pos_hline)
ax.fill_between(x1, 0, np.minimum(y1, pos_hline), color="blue", alpha=0.3)
Without where=y1 > pos_hline, fill_between would also color the region above the curve where the curve falls below that horizontal line.
PS: Note that sns.histplot has been deprecated since Seaborn version 0.11. To only plot the kde curve, you can use sns.kdeplot:
sns.kdeplot(final_dist, ax=ax, color='darkred', linewidth=4)

For loop to create multiple histogram png files

I am not sure as to why this happens. Maybe it is just a simple mistake that I cannot see, but by using this code:
for filename in glob.glob('/Users/jacob/Desktop/MERS/new/NOT COAL/gensets/statistics_per_lgu/per_lgu_files/*.csv'):
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.read_csv(filename)
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2, 2, i + 1)
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(df['New Capacity based on 0.8 PF'], bins=binwidth,
color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
ax.axvline(x=df['New Capacity based on 0.8 PF'].median(), linestyle='dashed', alpha=0.3, color='blue')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].median(),y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=df['New Capacity based on 0.8 PF'].mean(), linestyle='dashed', alpha=0.9, color='green')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].mean(),y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('/Users/jacob/Documents/Gensets_gis/historgrams/per_lgu_files/{}.png'.format(name))
I get all files created like this attached photo here.
Any ideas as to what I've done wrong?
Thanks in advance.
attached photo of one histogram output
My desired result would be something like this.
Desired output

It doesn't create new subplots but it use previous ones and then it draw new plots on old plots so you have to use clear subplot before you draw new histogram.
ax = plt.subplot(2, 2, i + 1)
ax.clear()
Example code. It gives desired output but if you remove `ax.clear() then first image will be OK but you get new plot with old plots on second and third image.
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
for n in range(3):
filename = f'example_data_{n}.csv'
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.DataFrame({'New Capacity based on 0.8 PF': random.choices(list(range(1000)), k=100)})
data = df['New Capacity based on 0.8 PF']
median = data.median()
mean = data.mean()
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2,2,i+1)
ax.clear() # <--- it removes previous histogram
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(data , bins=binwidth, color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
min_ylim, max_ylim = plt.ylim()
ax.axvline(x=median, linestyle='dashed', alpha=0.3, color='blue')
ax.text(x=median, y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=mean, linestyle='dashed', alpha=0.9, color='green')
ax.text(x=mean, y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('{}.png'.format(name))

Does anyone know how to get rid of the black 'y' axis to the left in Matplotlib plot?

After moving all of my 'y' axes to subplots I get an unwanted axis. It's the black one on the left. Does anyone know how to get rid of it? I'm sure it's getting plotted when I call the figure, however I'm not sure how to get rid of it.
def mpl_plot(self, plot_page, replot = 0): #Data stored in lists
if plot_page == 1: #Plot 1st Page
#plt0 = self.mplwidget.axes
fig = self.mplwidget.figure #Add a figure
if plot_page == 2: #Plot 2nd Page
#plt0 = self.mplwidget_2.axes
fig = self.mplwidget_2.figure #Add a figure
if plot_page == 3: #Plot 3rd Page
#plt0 = self.mplwidget_3.axes
fig = self.mplwidget_3.figure #Add a figure
#Clears Figure if data is roplotted
if replot == 1:
fig.clf()
par0 = fig.add_subplot(111)
par1 = fig.add_subplot(111)
par2 = fig.add_subplot(111)
#Add Axes
plt = par0.twinx()
ax1 = par1.twinx()
ax2 = par2.twinx()
impeller = str(self.comboBox_impellers.currentText()) #Get Impeller
fac_curves = self.mpl_factory_specs(impeller)
fac_lift = fac_curves[0]
fac_power = fac_curves[1]
fac_flow = fac_curves[2]
fac_eff = fac_curves[3]
fac_max_eff = fac_curves[4]
fac_max_eff_bpd = fac_curves[5]
fac_ranges = self.mpl_factory_ranges()
min_range = fac_ranges[0]
max_range = fac_ranges[1]
#Plot Chart
plt.hold(True)
plt.plot(fac_flow, fac_lift, 'b', linestyle = "dashed", linewidth = 1)
ax1.plot(fac_flow, fac_power, 'r', linestyle = "dashed", linewidth = 1)
ax2.plot(fac_flow, fac_eff, 'g', linestyle = "dashed", linewidth = 1)
#Move spines
ax2.spines["right"].set_position(("outward", 25))
self.make_patch_spines_invisible(ax2)
ax2.spines["right"].set_visible(True)
#Plot x axis minor tick marks
minorLocatorx = AutoMinorLocator()
ax1.xaxis.set_minor_locator(minorLocatorx)
ax1.tick_params(which='both', width= 0.5)
ax1.tick_params(which='major', length=7)
ax1.tick_params(which='minor', length=4, color='k')
#Plot y axis minor tick marks
minorLocatory = AutoMinorLocator()
plt.yaxis.set_minor_locator(minorLocatory)
plt.tick_params(which='both', width= 0.5)
plt.tick_params(which='major', length=7)
plt.tick_params(which='minor', length=4, color='k')
#Make Border of Chart White
fig.set_facecolor('white')
#Plot Grid
plt.grid(b=True, which='both', color='k', linestyle='-')
#set shaded Area
plt.axvspan(min_range, max_range, facecolor='#9BE2FA', alpha=0.5) #Yellow rectangular shaded area
#Set Vertical Lines
plt.axvline(fac_max_eff_bpd, color = '#69767A')
#BEP MARKER *** Can change marker style if needed
bep = fac_max_eff * 0.90 #bep is 90% of maximum efficiency point
bep_corrected = bep * 0.90 # We knock off another 10% to place the arrow correctly on chart
ax2.annotate('BEP', xy=(fac_max_eff_bpd, bep_corrected), xycoords='data', #Subtract 2.5 shows up correctly on chart
xytext=(-50, 30), textcoords='offset points',
bbox=dict(boxstyle="round", fc="0.8"),
arrowprops=dict(arrowstyle="-|>",
shrinkA=0, shrinkB=10,
connectionstyle="angle,angleA=0,angleB=90,rad=10"),
)
#Set Scales
plt.set_ylim(0,max(fac_lift) + (max(fac_lift) * 0.40)) #Pressure
#plt.set_xlim(0,max(fac_flow))
ax1.set_ylim(0,max(fac_power) + (max(fac_power) * 0.40)) #Power
ax2.set_ylim(0,max(fac_eff) + (max(fac_eff) * 0.40)) #Effiency
plt.yaxis.tick_left()
# Set Axes Colors
plt.tick_params(axis='y', colors='b')
ax1.tick_params(axis='y', colors='r')
ax2.tick_params(axis='y', colors='g')
# Set Chart Labels
plt.yaxis.set_label_position("left")
plt.set_xlabel("BPD")
plt.set_ylabel("Feet" , color = 'b')
#ax1.set_ylabel("BHP", color = 'r')
#ax1.set_ylabel("Effiency", color = 'g')
# Set tight layout
fig.set_tight_layout
# Since we moved Feet Axis to subplot, extra unneeded axis was created. This Removes it
# Refresh
fig.canvas.update()
fig.canvas.draw()

Well it looks like you have three y-axes, referencing the one you want to not be shown, you could try adding:
ax.yaxis.set_tick_params(labelsize=0, length=0, which='major')
to just make invisible the labels and ticks. I think it's ax2 you want gone?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Matplotlib/Pandas - Plot not reflective of data - python

Related

Automatic add text to matplotlib plot in Python

Gradient fill from zero till a curve

Different shading under Seaborn Distplot

For loop to create multiple histogram png files

Does anyone know how to get rid of the black 'y' axis to the left in Matplotlib plot?

Categories

Resources