Plotting Results from For Iteration - python

I am new to python and I want to ask how to plot a figure from for loop iteration?
Here is the code!
import numpy as np #numerical python
import matplotlib.pyplot as plt #python plotting
from math import exp #exponential math directory
T_initial = 293
T_reference = range(298,340,2)
R1_initial = 57500
R2_initial = 13300
R3_initial = 18000
R4_initial = 5600
Beta = 4150
Vin = 2.8
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
print(i,Vo_round)

You can plot the data like this:
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
plt.scatter(i, Vo_round)
plt.show()
Is this what you were looking for?

Put the values of the items you want to plot into two different arrays using the 'append' method (one for the 'x' axis and one for the 'y' axis).
Then just plot the graph with the matplotlib
It should be something like the below:
is1 = list()
vos = list()
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
print(i,Vo_round)
is1.append(i)
vos.append(Vo_round)
plt.plot(is1,vos)
Here is a reference for plotting

Two options without a for-loop
Create a function
def v_o(T_reference):
T_initial = 293
R1_initial = 57500
R2_initial = 13300
R3_initial = 18000
R4_initial = 5600
Beta = 4150
Vin = 2.8
R1_refe = R1_initial*exp(Beta*((1/T_reference)-(1/T_initial)))
Rs = (R2_initial/(R2_initial + R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
return Vo_round
Option 1: Use a pandas dataframe
import pandas as pd
import matplotlib.pyplot as plt
# create the dataframe with T_reference
df = pd.DataFrame({'t_ref': [*range(298, 340,2)]})
# Call the function to calculate v_o
df['v_o'] = df.t_ref.apply(v_o)
# plot
df.plot('t_ref', 'v_o', legend=False)
plt.show()
Option 2: use map
T_reference = [*range(298, 340,2)]
v_o = list(map(v_o, T_reference))
plt.plot(T_reference, v_o)
plt.show()
Plot
The plot from both options looks like the following

Related

Reverse Array in a dataframe

Hi I am trying to extract data from a netCDF file, but the data is upside down. How can I reverse the database:
The data I want to extract is the height data from the (netcdf) at the points I have in the CSV file. my Data:
import numpy as np
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import pandas as pd
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Path, PathPatch
csv_data = np.loadtxt('CSV with target coordinates',skiprows=1,delimiter=',')
num_el = csv_data[:,0]
lat = csv_data[:,1]
lon = csv_data[:,2]
value = csv_data[:,3]
data = Dataset("elevation Data",'r')
lon_range = data.variables['x_range'][:]
lat_range = data.variables['y_range'][:]
topo_range = data.variables['z_range'][:]
spacing = data.variables['spacing'][:]
dimension = data.variables['dimension'][:]
z = data.variables['z'][:]
lon_num = dimension[0]
lat_num = dimension[1]
etopo_lon = np.linspace(lon_range[0],lon_range[1],dimension[0])
etopo_lat = np.linspace(lat_range[0],lat_range[1],dimension[1])
topo = np.reshape(z, (lat_num, lon_num))
height = np.empty_like(num_el)
desired_lat_idx = np.empty_like(num_el)
desired_lon_idx = np.empty_like(num_el)
for i in range(len(num_el)):
tmp_lat = np.abs(etopo_lat - lat[i]).argmin()
tmp_lon = np.abs(etopo_lon - lon[i]).argmin()
desired_lat_idx[i] = tmp_lat
desired_lon_idx[i] = tmp_lon
height[i] = topo[tmp_lat,tmp_lon]
height[height<-10]=0
print(len(desired_lat_idx))
print(len(desired_lon_idx))
print(len(height))
dfl= pd.DataFrame({
'Latitude' : lat.reshape(-1),
'Longitude': lon.reshape(-1),
'Altitude': height.reshape(-1)
});
print(dfl)
# but the Lat should not be changed here (the dfl must be correct)
df =dfl
lat=np.array(df['Latitude'])
lon=np.array(df['Longitude'])
val=np.array(df['Altitude'])
m = basemap.Basemap(projection='robin', lon_0=0, lat_0=0, resolution='l',area_thresh=1000)
m.drawcoastlines(color = 'black')
x,y = m(lon,lat)
colormesh= m.contourf(x,y,val,100, tri=True, cmap = 'terrain')
plt.colorbar(location='bottom',pad=0.04,fraction=0.06)
plt.show()
I have already tried:
lat = csv_data[:,1]
lat= lat*(-1)
But this didnĀ“t work
It's a plotting artifact().
Just do:
colormesh= m.contourf(x,y[::-1],val,100, tri=True, cmap = 'terrain')
y[::-1] will reverse the order of the y latitude elements (as opposed to the land-mass outlines; and while keeping the x longitude coordinates the same) and hence flip them.
I've often had this problem with plotting numpy image data in the past.
Your raw CSV data are unlikely to be flipped themselves (why would they be?). You should try sanity-checking them [I am not a domain expert I'm afraid]! Overlaying an actual coordinate grid can help with this.
Another way to do it is given here: Reverse Y-Axis in PyPlot
You could also therefore just do
ax = plt.gca()
ax.invert_yaxis()

boxplot structure disappears when pandas contains nan [duplicate]

I am using matplotlib to plot a box figure but there are some missing values (NaN). Then I found it doesn't display the box figure within the columns having NaN values.
Do you know how to solve this problem?
Here are the codes.
import numpy as np
import matplotlib.pyplot as plt
#==============================================================================
# open data
#==============================================================================
filename='C:\\Users\\liren\\OneDrive\\Data\\DATA in the first field-final\\ks.csv'
AllData=np.genfromtxt(filename,delimiter=";",skip_header=0,dtype='str')
TreatmentCode = AllData[1:,0]
RepCode = AllData[1:,1]
KsData= AllData[1:,2:].astype('float')
DepthHeader = AllData[0,2:].astype('float')
TreatmentUnique = np.unique(TreatmentCode)[[3,1,4,2,8,6,9,7,0,5,10],]
nT = TreatmentUnique.size#nT=number of treatments
#nD=number of deepth;nR=numbers of replications;nT=number of treatments;iT=iterms of treatments
nD = 5
nR = 6
KsData_3D = np.zeros((nT,nD,nR))
for iT in range(nT):
Treatment = TreatmentUnique[iT]
TreatmentFilter = TreatmentCode == Treatment
KsData_Filtered = KsData[TreatmentFilter,:]
KsData_3D[iT,:,:] = KsData_Filtered.transpose()iD = 4
fig=plt.figure()
ax = fig.add_subplot(111)
plt.boxplot(KsData_3D[:,iD,:].transpose())
ax.set_xticks(range(1,nT+1))
ax.set_xticklabels(TreatmentUnique)
ax.set_title(DepthHeader[iD])
Here is the final figure and some of the treatments are missing in the box.
You can remove the NaNs from the data first, then plot the filtered data.
To do that, you can first find the NaNs using np.isnan(data), then perform the bitwise inversion of that Boolean array using the ~: bitwise inversion operator. Use that to index the data array, and you filter out the NaNs.
filtered_data = data[~np.isnan(data)]
In a complete example (adapted from here)
Tested in python 3.10, matplotlib 3.5.1, seaborn 0.11.2, numpy 1.21.5, pandas 1.4.2
For 1D data:
import matplotlib.pyplot as plt
import numpy as np
# fake up some data
np.random.seed(2022) # so the same data is created each time
spread = np.random.rand(50) * 100
center = np.ones(25) * 50
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
data = np.concatenate((spread, center, flier_high, flier_low), 0)
# Add a NaN
data[40] = np.NaN
# Filter data using np.isnan
filtered_data = data[~np.isnan(data)]
# basic plot
plt.boxplot(filtered_data)
plt.show()
For 2D data:
For 2D data, you can't simply use the mask above, since then each column of the data array would have a different length. Instead, we can create a list, with each item in the list being the filtered data for each column of the data array.
A list comprehension can do this in one line: [d[m] for d, m in zip(data.T, mask.T)]
import matplotlib.pyplot as plt
import numpy as np
# fake up some data
np.random.seed(2022) # so the same data is created each time
spread = np.random.rand(50) * 100
center = np.ones(25) * 50
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
data = np.concatenate((spread, center, flier_high, flier_low), 0)
data = np.column_stack((data, data * 2., data + 20.))
# Add a NaN
data[30, 0] = np.NaN
data[20, 1] = np.NaN
# Filter data using np.isnan
mask = ~np.isnan(data)
filtered_data = [d[m] for d, m in zip(data.T, mask.T)]
# basic plot
plt.boxplot(filtered_data)
plt.show()
I'll leave it as an exercise to the reader to extend this to 3 or more dimensions, but you get the idea.
Use seaborn, which is a high-level API for matplotlib
seaborn.boxplot filters NaN under the hood
import seaborn as sns
sns.boxplot(data=data)
1D
2D
NaN is also ignored if plotting from df.plot(kind='box') for pandas, which uses matplotlib as the default plotting backend.
import pandas as pd
df = pd.DataFrame(data)
df.plot(kind='box')
1D
2D

How to create a dashboard with widgets (selector) and interactivity (tap stream) between plots in HoloViews/Bokeh?

I'm trying to create a dashboard that consists of two plots (heatmap and line graph) and one widget (selector):
When you select an option from widget both plots get updated;
When you tap on the first plot the second plot is updated based on tap info.
Currently I'm trying to do it in HoloViews. It seems that this should be very easy to do but I somehow can't wrap my head around it.
The code below shows how it should look like. However, the selector is not connected in any way to the dashboard since I don't know how to do it.
import pandas as pd
import numpy as np
import panel as pn
import holoviews as hv
hv.extension('bokeh')
def create_test_df(k_features, n_tickers=5, m_windows=5):
start_date = pd.Timestamp('01-01-2020')
window_len = pd.Timedelta(days=1)
cols = ['window_dt', 'ticker'] + [f'feature_{i}' for i in range(k_features)]
data = {c: [] for c in cols}
for w in range(m_windows):
window_dt = start_date + w*window_len
for t in range(n_tickers):
ticker = f'ticker_{t}'
data['window_dt'].append(window_dt)
data['ticker'].append(ticker)
for f in range(k_features):
data[f'feature_{f}'].append(np.random.rand())
return pd.DataFrame(data)
k_features = 3
features = [f'feature_{i}' for i in range(k_features)]
df = create_test_df(k_features)
selector = pn.widgets.Select(options=features)
heatmap = hv.HeatMap(df[['window_dt', 'ticker', f'{selector.value}']])
posxy = hv.streams.Tap(source=heatmap, x='01-01-2020', y='ticker_4')
def tap_heatmap(x, y):
scalar = np.random.randn()
x = np.linspace(-2*np.pi, 2*np.pi, 100)
data = list(zip(x, np.sin(x*scalar)))
return hv.Curve(data)
pn.Row(heatmap, hv.DynamicMap(tap_heatmap, streams=[posxy]), selector)
Ok I finally got it. It turned out to be simple (just as expected) but not quite intuitive. Basically, different approach for implementing selector (dropdown menu) should be used. Working code for such example is below:
import pandas as pd
import numpy as np
import panel as pn
import holoviews as hv
hv.extension('bokeh')
def create_test_df(k_features, n_tickers=5, m_windows=5):
start_date = pd.Timestamp('01-01-2020')
window_len = pd.Timedelta(days=1)
cols = ['window_dt', 'ticker'] + [f'feature_{i}' for i in range(k_features)]
data = {c: [] for c in cols}
for w in range(m_windows):
window_dt = start_date + w*window_len
for t in range(n_tickers):
ticker = f'ticker_{t}'
data['window_dt'].append(window_dt)
data['ticker'].append(ticker)
for f in range(k_features):
data[f'feature_{f}'].append(np.random.rand())
return pd.DataFrame(data)
def load_heatmap(feature):
return hv.HeatMap(df[['window_dt', 'ticker', f'{feature}']])
def tap_heatmap(x, y):
scalar = np.random.randn()
x = np.linspace(-2*np.pi, 2*np.pi, 100)
data = list(zip(x, np.sin(x*scalar)))
return hv.Curve(data)
k_features = 3
features = [f'feature_{i}' for i in range(k_features)]
df = create_test_df(k_features)
heatmap_dmap = hv.DynamicMap(load_heatmap, kdims='Feature').redim.values(Feature=features)
posxy = hv.streams.Tap(source=heatmap_dmap, x='01-01-2020', y='ticker_0')
sidegraph_dmap = hv.DynamicMap(tap_heatmap, streams=[posxy])
pn.Row(heatmap_dmap, sidegraph_dmap)

Visualizing SOM and adding labels to the map

I have been trying to apply SOM on my dataframe, my dataframe has 25 columns where each column represents a house, each house has a values for power consumption for two years, and I want to cluster the data with number of clusters = 3.
I have done the following:
import sys
sys.path.insert(0, '../')
%load_ext autoreload
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import plot,axis,show,pcolor,colorbar,bone
from matplotlib.patches import Patch
%matplotlib inline
from minisom import MiniSom
from sklearn.preprocessing import minmax_scale, scale
%autoreload 2
data1 = pd.read_excel(r"C:\Users\user\Desktop\Thesis\Tarek\Consumption.xlsx")
data1['h1'] = data1['h1'].str.split(';').str[2].astype('float')
data1['h2'] = data1['h2'].str.split(';').str[2].astype('float')
data1['h3'] = data1['h3'].str.split(';').str[2].astype('float')
data1['h4'] = data1['h4'].str.split(';').str[2].astype('float')
data1['h5'] = data1['h5'].str.split(';').str[2].astype('float')
data1['h6'] = data1['h6'].str.split(';').str[2].astype('float')
data1['h7'] = data1['h7'].str.split(';').str[2].astype('float')
data1['h8'] = data1['h8'].str.split(';').str[2].astype('float')
data1['h9'] = data1['h9'].str.split(';').str[2].astype('float')
data1['h10'] = data1['h10'].str.split(';').str[2].astype('float')
data1['h11'] = data1['h11'].str.split(';').str[2].astype('float')
data1['h12'] = data1['h12'].str.split(';').str[2].astype('float')
data1['h13'] = data1['h13'].str.split(';').str[2].astype('float')
data1['h14'] = data1['h14'].str.split(';').str[2].astype('float')
data1['h15'] = data1['h15'].str.split(';').str[2].astype('float')
data1['h16'] = data1['h16'].str.split(';').str[2].astype('float')
data1['h17'] = data1['h17'].str.split(';').str[2].astype('float')
data1['h18'] = data1['h18'].str.split(';').str[2].astype('float')
data1['h19'] = data1['h19'].str.split(';').str[2].astype('float')
data1['h20'] = data1['h20'].str.split(';').str[2].astype('float')
data1['h21'] = data1['h21'].str.split(';').str[2].astype('float')
data1['h22'] = data1['h22'].str.split(';').str[2].astype('float')
data1['h23'] = data1['h23'].str.split(';').str[2].astype('float')
data1['h24'] = data1['h24'].str.split(';').str[2].astype('float')
data1['h25'] = data1['h25'].str.split(';').str[2].astype('float')
data1.fillna(0,inplace=True)
data1=data1.round(decimals=2)
X=data1.values
som =MiniSom(x=3,y=3,input_len=25,sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_batch(data=X ,num_iteration=1000,verbose=True)
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o' , 's','v']
colors = ['r', 'g','y']
for i, x in enumerate(X):
w = som.winner(x)
plot(w[0] + 0.5,
w[1] + 0.5,
markers[i],
markeredgecolor = colors[i],
markerfacecolor = 'None',
markersize = 10,
markeredgewidth = 2)
show()
when I am running the code, I am getting this error:
IndexError: list index out of range
please any tips to add the markers and colors in the right way without having any problems, and I would be glad if any one can help, I am a bit new to Python and tried to find a solution but I couldn`t find any.
The problem seems to be that the length of your X=data1.values is around 25 but the length of your markers and colors is only 3. So in the following for loop, when i is 3, you are trying to access markers[3] and colors[3] which throws an IndexError because both markers and colors goes up to index 2 (indexing starts from 0 in python)
for i, x in enumerate(X):
One solution is to define custom list of 25 markers and 25 colors. While you might want to define your own markers, you can leave the colors out and let the code choose automatic colors for the markeredgecolor

Obtaining values used in boxplot, using python and matplotlib

I can draw a boxplot from data:
import numpy as np
import matplotlib.pyplot as plt
data = np.random.rand(100)
plt.boxplot(data)
Then, the box will range from the 25th-percentile to 75th-percentile, and the whisker will range from the smallest value to the largest value between (25th-percentile - 1.5*IQR, 75th-percentile + 1.5*IQR), where the IQR denotes the inter-quartile range. (Of course, the value 1.5 is customizable).
Now I want to know the values used in the boxplot, i.e. the median, upper and lower quartile, the upper whisker end point and the lower whisker end point. While the former three are easy to obtain by using np.median() and np.percentile(), the end point of the whiskers will require some verbose coding:
median = np.median(data)
upper_quartile = np.percentile(data, 75)
lower_quartile = np.percentile(data, 25)
iqr = upper_quartile - lower_quartile
upper_whisker = data[data<=upper_quartile+1.5*iqr].max()
lower_whisker = data[data>=lower_quartile-1.5*iqr].min()
I was wondering, while this is acceptable, would there be a neater way to do this? It seems that the values should be ready to pull-out from the boxplot, as it's already drawn.
Why do you want to do so? what you are doing is already pretty direct.
Yeah, if you want to fetch them for the plot, when the plot is already made, simply use the get_ydata() method.
B = plt.boxplot(data)
[item.get_ydata() for item in B['whiskers']]
It returns an array of the shape (2,) for each whiskers, the second element is the value we want:
[item.get_ydata()[1] for item in B['whiskers']]
I've had this recently and have written a function to extract the boxplot values from the boxplot as a pandas dataframe.
The function is:
def get_box_plot_data(labels, bp):
rows_list = []
for i in range(len(labels)):
dict1 = {}
dict1['label'] = labels[i]
dict1['lower_whisker'] = bp['whiskers'][i*2].get_ydata()[1]
dict1['lower_quartile'] = bp['boxes'][i].get_ydata()[1]
dict1['median'] = bp['medians'][i].get_ydata()[1]
dict1['upper_quartile'] = bp['boxes'][i].get_ydata()[2]
dict1['upper_whisker'] = bp['whiskers'][(i*2)+1].get_ydata()[1]
rows_list.append(dict1)
return pd.DataFrame(rows_list)
And is called by passing an array of labels (the ones that you would pass to the boxplot plotting function) and the data returned by the boxplot function itself.
For example:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def get_box_plot_data(labels, bp):
rows_list = []
for i in range(len(labels)):
dict1 = {}
dict1['label'] = labels[i]
dict1['lower_whisker'] = bp['whiskers'][i*2].get_ydata()[1]
dict1['lower_quartile'] = bp['boxes'][i].get_ydata()[1]
dict1['median'] = bp['medians'][i].get_ydata()[1]
dict1['upper_quartile'] = bp['boxes'][i].get_ydata()[2]
dict1['upper_whisker'] = bp['whiskers'][(i*2)+1].get_ydata()[1]
rows_list.append(dict1)
return pd.DataFrame(rows_list)
data1 = np.random.normal(loc = 0, scale = 1, size = 1000)
data2 = np.random.normal(loc = 5, scale = 1, size = 1000)
data3 = np.random.normal(loc = 10, scale = 1, size = 1000)
labels = ['data1', 'data2', 'data3']
bp = plt.boxplot([data1, data2, data3], labels=labels)
print(get_box_plot_data(labels, bp))
plt.show()
Outputs the following from get_box_plot_data:
label lower_whisker lower_quartile median upper_quartile upper_whisker
0 data1 -2.491652 -0.587869 0.047543 0.696750 2.559301
1 data2 2.351567 4.310068 4.984103 5.665910 7.489808
2 data3 7.227794 9.278931 9.947674 10.661581 12.733275
And produces the following plot:
upper_whisker = data[data<=upper_quartile+1.5*iqr].max()
lower_whisker = data[data>=lower_quartile-1.5*iqr].min()
equal to
upper_whisker = data.max()
lower_whisker = data.min()
if you just want to get the real data points in the dataset. But statistically speaking, the whisker values are upper_quantile+1.5IQR and lower_quantile-1.5IQR

Categories