Plot less points (line plot) for matplot lib - python

Currently I have a plot with too many points, I want to avoid overlapping. Want to know how to reduce the amount of points in order to have a smoother line.
Plot Code
fig = plt.figure(1, figsize = (18,10)) # Figure size in inches (size_x, size_y)
ax = plt.axes()
min_val = prediction_intervals2[:, 0]
max_val = prediction_intervals2[:, 1]
true_values = y_test
predicted_values = PLS_Model1.predict(X_test)
plt.plot(min_val, label = "Min", color='blue')
plt.plot(max_val, label = "Max", color='red')
plt.plot(true_values, label = "y", color = "black")
plt.plot(predicted_values, label = "y\u0302", marker='o')
plt.title('Conformal Predictor Final Predictions')
plt.legend()
plt.show()
Current Plot
Desired Plot
Plot that I want

I was able to revise my code properly and came to the desired output by just selecting less data points, quite simple. Posted the answer just in case.
min_val_normal = plot_normalized_table[['Min']]
max_val_normal = plot_normalized_table[['Max']]
original_normal = plot_normalized_table[['Original Label']]
interval_normal = plot_normalized_table[['Interval Size']]
normal_predicted = predicted_values[0:50]
fig = plt.figure(1, figsize = (18,10)) # Figure size in inches (size_x, size_y)
ax = plt.axes()
#predicted_values = PLS_Model1.predict(X_test) #Predictions from test data (run at least once for the plot to work)
plt.plot(min_val_normal, label = "Min", color='blue')
plt.plot(max_val_normal, label = "Max", color='red')
plt.plot(original_normal, label = "y", color = "black")
plt.plot(normal_predicted, label = "y\u0302", marker='o', )
plt.title('Normalized Final Conformal Predictions')
plt.xlim([-1, 51])
plt.ylim([-1, 2])
plt.legend()
plt.show()

Related

How to plot 2 maps in 1 plot using Matplotlib? (Python)

I was trying to overlay 1 map on another, but I could not.
input_file = "slicedSmoothedStokesPlanck/Smoothed_Sliced_PSI_MAP.fits"
i_file = "slicedSmoothedStokesPlanck/Smoothed_Sliced_I_MAP.fits"
pl_b = fits.getdata(input_file, ext=0)
i_file_data = fits.getdata(i_file, ext=0)
fig = plt.figure()
ax = fig.add_subplot(111) #, projection=wcs
im = ax.imshow(texture, alpha=0.5) #, cmap='RdYlBu_r'
ax.imshow(i_file_data)
plt.title("TEST")
plt.show()
The above code, only shows last ax.imshow(i_file_data)
The idea is that I have a map1, and map2. I want to overlay map2 with alpha = 0.5 on map1, and plot it.
If I am getting you correctly then this should work. I assume your bottom layer has color and top layer has just boundaries.
fig, ax = plt.subplots(1,1, figsize(8, 8))
pl_b.plot(ax = ax, cmap = "RdYlBu_r", edgecolor = "black", lw = 0.1) #, this is your map 1
i_file_data.plot(ax = ax, facecolor = "none", edgecolor = "black", lw = 0.1, alpha = 0.5) #, this is your map 2
plt.title("TEST")
plt.show()
You can tweak the top layer and change the opacity accordingly.
If you want your map 2 be just the outline on top of map 1 then set facecolor = "none" in the second layer

Add a Right Yticks To a Plot

I am trying to make a plot of sort, this is my code and the output:
ticks = [3500, 5000]
labels = ["\u0332P", "P\u0305"]
plt.title("Nilai Premi Optimal \n dengan Batasan")
plt.xlabel("$\it{Bargaining Power}$ \u03BB")
plt.plot(xlamda, PsiBLamda, color = "red",linestyle='dashed',label = "$\u03C8_{B} (I^*(X))$")
plt.plot(xlamda, PsiSLamda, color = "blue",linestyle='dashed', label = "$\u03C8_{S} (I^*(X))$")
plt.legend(loc="upper left")
plt.plot(xlamda, PLamda, color = "black")
plt.xlim([0, 1])
plt.ylim([3500, 7000])
plt.show()
The plot output is correct, however I want to add a tick on the right y axis at the 5000 point with the label P. Here is an example:
How do I code that? Thank you
Check out secondary axes:
ticks = [3500, 5000]
labels = ["\u0332P", "P\u0305"]
fig, ax = plt.subplots() # need the axis object
plt.title("Nilai Premi Optimal \n dengan Batasan")
plt.xlabel("$\it{Bargaining Power}$ \u03BB")
plt.plot(xlamda, PsiBLamda, color = "red",linestyle='dashed',label = "$\u03C8_{B} (I^*(X))$")
plt.plot(xlamda, PsiSLamda, color = "blue",linestyle='dashed', label = "$\u03C8_{S} (I^*(X))$")
plt.legend(loc="upper left")
plt.plot(xlamda, PLamda, color = "black")
plt.xlim([0, 1])
plt.ylim([3500, 7000])
rightax = ax.secondary_yaxis('right') # create secondary axis on the right
rightax.set_yticks(ticks) # set tick locations
rightax.set_yticklabels(labels) # set tick labels
plt.show()

Render y-axis properly when overlaying pandas KDE and histogram

Similar questions to this have been asked before but not using these exact two plotting functions together so here we are:
I have a column from a pandas DataFrame that I am plotting both a histogram and the KDE. However, when I plot them, the y-axis is using the raw data value range instead of discrete number of samples/bin (what I want). How can I fix this? The actual plot is perfect, but the y-axis is wrong.
Data:
t2 = [140547476703.0, 113395471484.0, 158360225172.0, 105497674121.0, 186457736557.0, 153705359063.0, 36826568371.0, 200653068740.0, 190761317478.0, 126529980843.0, 98776029557.0, 132773701862.0, 14780432449.0, 167507656251.0, 121353262386.0, 136377019007.0, 134190768743.0, 218619462126.0, 07912778721.0, 215628911255.0, 147024833865.0, 94136343562.0, 135685803096.0, 165901502129.0, 45476074790.0, 125195690010.0, 113910844263.0, 123134290987.0, 112028565305.0, 93448218430.0, 07341012378.0, 93146854494.0, 132958913610.0, 102326700019.0, 196826471714.0, 122045354980.0, 76591131961.0, 134694468251.0, 120212625727.0, 108456858852.0, 106363042112.0, 193367024628.0, 39578667378.0, 178075400604.0, 155513974664.0, 132834624567.0, 137336282646.0, 125379267464.0]
Code:
fig = plt.figure()
# plot hist + kde
t2[t2.columns[0]].plot.kde(color = "maroon", label = "_nolegend_")
t2[t2.columns[0]].plot.hist(density = True, edgecolor = "grey", color = "tomato", title = t2.columns[0])
# plot mean/stdev
m = t2[t2.columns[0]].mean()
stdev = t2[t2.columns[0]].std()
plt.axvline(m, color = "black", ymax = 0.05, label = "mean")
plt.axvline(m-2*stdev, color = "black", ymax = 0.05, linestyle = ":", label = "+/- 2*Stdev")
plt.axvline(m+2*stdev, color = "black", ymax = 0.05, linestyle = ":")
plt.legend()
What it looks like now:
If you want the real counts, the you'll need to scale the KDE up by the width of the bins multiplied by the number of observations. The trickiest part is accessing the data pandas uses to plot the KDE. (I've removed parts related to the legend to simplify the problem at hand).
import matplotlib.pyplot as plt
import numpy as np
# Calculate KDE, get data
axis = t2[t2.columns[0]].plot.kde(color = "maroon", label = "_nolegend_")
xdata = axis.get_children()[0]._x
ydata = axis.get_children()[0]._y
plt.clf()
# Real figure
fig, ax = plt.subplots(figsize=(7,5))
# Plot Histogram, no density.
x = ax.hist(t2[t2.columns[0]], edgecolor = "grey", color = "tomato")
# size of the bins * N obs
scale = np.diff(x[1])[0]*len(t2)
# Plot scaled KDE
ax.plot(xdata, ydata*scale, color='blue')
ax.set_ylabel('N observations')
plt.show()

Set size of matplotlib subplots

I created two subplots on a MPL figure, but i'm having an hard time setting the size on them. I want the space to be splitted between the two charts, so each chart needs to have 50% of the total width of the figure, and i want them to have the same height of the figure, here is how i initialized the subplots:
fig = plt.figure(facecolor='#131722',dpi=155, figsize=(10, 3))
ax1 = plt.subplot2grid((3,3), (2,0), facecolor='#131722')
ax2 = plt.subplot2grid((5,3), (2,2), colspan=5, rowspan=4, facecolor='#131722')
Colors = [['#0400ff', '#FF0000'], ['#09ff00', '#ff8c00']]
for x in List:
Index = List.index(x)
rate_buy = []
total_buy = []
rate_sell = []
total_sell = []
for y in x['data']['asks']:
rate_sell.append(y[0])
total_sell.append(y[1])
for y in x['data']['bids']:
rate_buy.append(y[0])
total_buy.append(y[1])
rBuys = pd.DataFrame({'buy': rate_buy})
rSells = pd.DataFrame({'sell': rate_sell})
tBuys = pd.DataFrame({'total': total_buy})
tSells = pd.DataFrame({'total': total_sell})
ax1.plot(rBuys.buy, tBuys.total, color=Colors[Index][0], linewidth=0.5, alpha=1, label='test')
ax2.plot(rSells.sell, tSells.total, color=Colors[Index][1],alpha=0.5, linewidth=1, label=x['exchange'])
ax1.fill_between(rBuys.buy, 0, tBuys.total, facecolor=Colors[Index][0], alpha=0.4)
ax2.fill_between(rSells.sell, 0, tSells.total, facecolor=Colors[Index][1], alpha=0.4)
And this is what i'm getting:
use plt.tight_layout() before calling plt.show().

For loop to create multiple histogram png files

I am not sure as to why this happens. Maybe it is just a simple mistake that I cannot see, but by using this code:
for filename in glob.glob('/Users/jacob/Desktop/MERS/new/NOT COAL/gensets/statistics_per_lgu/per_lgu_files/*.csv'):
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.read_csv(filename)
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2, 2, i + 1)
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(df['New Capacity based on 0.8 PF'], bins=binwidth,
color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
ax.axvline(x=df['New Capacity based on 0.8 PF'].median(), linestyle='dashed', alpha=0.3, color='blue')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].median(),y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=df['New Capacity based on 0.8 PF'].mean(), linestyle='dashed', alpha=0.9, color='green')
min_ylim, max_ylim = plt.ylim()
ax.text(x=df['New Capacity based on 0.8 PF'].mean(),y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('/Users/jacob/Documents/Gensets_gis/historgrams/per_lgu_files/{}.png'.format(name))
I get all files created like this attached photo here.
Any ideas as to what I've done wrong?
Thanks in advance.
attached photo of one histogram output
My desired result would be something like this.
Desired output
It doesn't create new subplots but it use previous ones and then it draw new plots on old plots so you have to use clear subplot before you draw new histogram.
ax = plt.subplot(2, 2, i + 1)
ax.clear()
Example code. It gives desired output but if you remove `ax.clear() then first image will be OK but you get new plot with old plots on second and third image.
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
for n in range(3):
filename = f'example_data_{n}.csv'
base = os.path.basename(filename)
name = os.path.splitext(base)[0]
df = pd.DataFrame({'New Capacity based on 0.8 PF': random.choices(list(range(1000)), k=100)})
data = df['New Capacity based on 0.8 PF']
median = data.median()
mean = data.mean()
# Show 4 different binwidths
for i, binwidth in enumerate([10, 20, 30, 40]):
# Set up the plot
ax = plt.subplot(2,2,i+1)
ax.clear() # <--- it removes previous histogram
plt.subplots_adjust( wspace=0.5, hspace=0.5)
# Draw the plot
ax.hist(data , bins=binwidth, color='red', edgecolor='black',alpha=0.5)
# Title and labels
ax.set_title('Histogram with Binwidth = %d' % binwidth, size=10)
ax.set_xlabel('Capacity', size=11)
ax.set_ylabel('Frequency count', size=11)
min_ylim, max_ylim = plt.ylim()
ax.axvline(x=median, linestyle='dashed', alpha=0.3, color='blue')
ax.text(x=median, y= max_ylim*0.9, s='Median', alpha=0.7, color='blue',fontsize = 12)
ax.axvline(x=mean, linestyle='dashed', alpha=0.9, color='green')
ax.text(x=mean, y= max_ylim*0.5, s='Mean', alpha=0.9, color='green',fontsize = 12)
plt.tight_layout()
plt.grid(True)
plt.savefig('{}.png'.format(name))

Categories