I have the code below. I'm trying to build an interactive DBSCAN clustering plot. When I run it, I get three plots but not interactive. Where is the problem in the code and how to fix it.
df_mv = pd.read_csv(r"https://raw.githubusercontent.com/HanaBachi/MachineLearning/main/multishape.csv") # load from Prof. Pyrcz's GitHub
df_mv.head()
text_trap = io.StringIO()
sys.stdout = text_trap
l = widgets.Text(value=' DBSCAN, Hana Bachi, The University of Texas at Austin',
layout=Layout(width='950px', height='30px'))
eps = widgets.FloatSlider(min=0, max = 2, value=0.1, step = 0.1, description = 'eps',orientation='horizontal', style = {'description_width': 'initial'}, continuous_update=False)
minPts = widgets.FloatSlider(min=0, max = 5, value=1, step = 1, description = 'minPts %',orientation='horizontal',style = {'description_width': 'initial'}, continuous_update=False)
color = ['blue','red','green','yellow','orange','white','magenta','cyan']
style = {'description_width': 'initial'}
ui = widgets.HBox([eps,minPts],)
ui2 = widgets.VBox([l,ui],)
# create activation function plots
def DBSCAN_plot(eps, minPts):
db = DBSCAN(eps=0.155, min_samples=5).fit(df_mv)
labels = db.labels_
# n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
# x = df_mv.values[:,0]
# y = df_mv.values[:,1]
# cmap = plt.cm.rainbow
#norm = mc.BoundaryNorm(labels, cmap.N)
plt.figure(figsize=(14,7))
plt.scatter(x, y, c=labels, cmap='tab10', s=50)
plt.scatter(x[np.where(labels==-1)], y[np.where(labels==-1)], c='k', marker='x', s=100)
plt.title('DBSCAN interactive plot', fontsize = 20)
plt.colorbar()
plt.show()
plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.0, wspace=0.2, hspace=0.3)
plt.show()
# create dashboard/formatting
uia = widgets.HBox([interactive_plot1],)
uia2 = widgets.VBox([eps, uia],)
uib = widgets.HBox([interactive_plot1],)
uib2 = widgets.VBox([minPts, uib],)
interactive_plot1 = widgets.interactive_output(DBSCAN_plot, {'eps': eps,'minPts':minPts})
interactive_plot1.clear_output(wait = True) #
How can I make this plot interactive in function of eps and minPts
Related
Hello everyone how can i make legend for 3 different bar color that code with subplot?
Data frame:
This is my code:
fig,axs = plt.subplots(2,3, figsize=(30,20))
axs[0,1].bar(x = df_7_churn_tenure['Kategori'],height = df_7_churn_tenure['Jumlah Churn'],color = ['lightsalmon','maroon','darkorange'])
axs[0,1].legend(['Low Tenure = x<24','Medium Tenure = 24<= x <=48','High Tenure = x >=48'],loc='best',fontsize=12)
plt.show()
And the result for barplot legend only shows 1 label like this:
Is there any solution to shows all of my legend?
Try this:
fig,axs = plt.subplots(2,3, figsize=(30,20))
axs[0,1].bar(x = df_7_churn_tenure['Kategori'],height = df_7_churn_tenure['Jumlah Churn'],color ['lightsalmon','maroon','darkorange'])
axs = axs[0,1]
lns1 = axs.plot('-',label = 'Low Tenure = x<24')
lns2 = axs.plot('-',label = 'Medium Tenure = 24<= x <=48')
lns3 = axs.plot('-',label = 'High Tenure = x >=48')
# added these three lines
lns = lns1+lns2+lns3
labs = [l.get_label() for l in lns]
axs.legend(lns, labs,loc=0)
plt.show()
I want to keep the labels when you hover, but hide the labels from just appearing over the Sankey as text.
Here is my code:
labels = df_mapping['Name'].to_numpy().tolist() + labels
count_dict = {}
source = []
target = []
value = df_subset['Stuff'].to_numpy().tolist()
index = 0
for x in unique_broad:
count_dict[x] = len(df_mapping.loc[df_mapping['Stuff'] == x])
for key in count_dict:
for i in range(count_dict[key]):
source.append(index)
index += 1
for key in count_dict:
for i in range(count_dict[key]):
target.append(index)
index += 1
number_of_colors = len(source)
color_link = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
for i in range(number_of_colors)]
link = dict(source=source, target=target, value=value, color=color_link)
node = dict(label=labels, pad=35, thickness=10)
data = go.Sankey(link=link, node=node)
fig = go.Figure(data)
fig.update_layout(
hovermode = 'x',
title="Sankey for Stuff",
font=dict(size=8, color='white'),
paper_bgcolor='#51504f'
)
return fig
You can make the labels invisible by setting the color of the labels to rgba(0,0,0,0). This ensures that the label will remain in the hovertemplate, but not show up on the nodes.
To do this you can pass textfont=dict(color="rgba(0,0,0,0)", size=1) to go.Sankey such as in the example you used from the Plotly sankey diagram documentation:
import plotly.graph_objects as go
import urllib.request, json
url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
response = urllib.request.urlopen(url)
data = json.loads(response.read())
# override gray link colors with 'source' colors
opacity = 0.4
# change 'magenta' to its 'rgba' value to add opacity
data['data'][0]['node']['color'] = ['rgba(255,0,255, 0.8)' if color == "magenta" else color for color in data['data'][0]['node']['color']]
data['data'][0]['link']['color'] = [data['data'][0]['node']['color'][src].replace("0.8", str(opacity))
for src in data['data'][0]['link']['source']]
fig = go.Figure(data=[go.Sankey(
textfont=dict(color="rgba(0,0,0,0)", size=1),
valueformat = ".0f",
valuesuffix = "TWh",
# Define nodes
node = dict(
pad = 15,
thickness = 15,
line = dict(color = "black", width = 0.5),
label = data['data'][0]['node']['label'],
color = data['data'][0]['node']['color']
),
# Add links
link = dict(
source = data['data'][0]['link']['source'],
target = data['data'][0]['link']['target'],
value = data['data'][0]['link']['value'],
label = data['data'][0]['link']['label'],
color = data['data'][0]['link']['color']
))])
fig.update_layout(title_text="Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
font_size=10)
fig.show()
You get the following:
I'm trying to create subplots on one image, including confusion matrix(heatmap), ROC and feature importance. I want to display an annotated heatmap. But there is an error about trace1 when I ran the code. Can someone help me to figure out what's wrong?
The error massage is
ValueError:
Invalid element(s) received for the 'data' property of
Invalid elements include: ......
#threshold_plot - if True returns threshold plot for model
def coupon_use_prediction(algorithm,training_x,testing_x,
training_y,testing_y,cols,cf,threshold_plot) :
#model
algorithm.fit(training_x,training_y)
predictions = algorithm.predict(testing_x)
probabilities = algorithm.predict_proba(testing_x)
#coeffs
if cf == "coefficients" :
coefficients = pd.DataFrame(algorithm.coef_.ravel())
elif cf == "features" :
coefficients = pd.DataFrame(algorithm.feature_importances_)
column_df = pd.DataFrame(cols)
coef_sumry = (pd.merge(coefficients,column_df,left_index= True,
right_index= True, how = "left"))
coef_sumry.columns = ["coefficients","features"]
coef_sumry = coef_sumry.sort_values(by = "coefficients",ascending = False)
print (algorithm)
print ("\n Classification report : \n",classification_report(testing_y,predictions))
print ("Accuracy Score : ",accuracy_score(testing_y,predictions))
#confusion matrix
conf_matrix = confusion_matrix(testing_y,predictions)
#roc_auc_score
model_roc_auc = roc_auc_score(testing_y,predictions)
print ("Area under curve : ",model_roc_auc,"\n")
fpr,tpr,thresholds = roc_curve(testing_y,probabilities[:,1])
#plot confusion matrix(x = predicted, y = actual)
trace1 = ff.create_annotated_heatmap(z = conf_matrix,
x = ["Not use","Use"],
y = ["Not use","Use"],
showscale = False,name = "matrix")
#plot roc curve
trace2 = go.Scatter(x = fpr,y = tpr,
name = "Roc : " + str(model_roc_auc),
line = dict(color = ('rgb(22, 96, 167)'),width = 2))
trace3 = go.Scatter(x = [0,1],y=[0,1],
line = dict(color = ('rgb(205, 12, 24)'),width = 2,
dash = 'dot'))
#plot coeffs
trace4 = go.Bar(x = coef_sumry["features"],y = coef_sumry["coefficients"],
name = "coefficients",
marker = dict(color = coef_sumry["coefficients"],
colorscale = "Picnic",
line = dict(width = .6,color = "black")))
#subplots
fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
subplot_titles=('Confusion Matrix',
'Receiver operating characteristic',
'Feature Importances'))
fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)
fig.append_trace(trace3,1,2)
fig.append_trace(trace4,2,1)
fig['layout'].update(showlegend=False, title="Model performance" ,
autosize = False,height = 900,width = 800,
plot_bgcolor = 'rgba(240,240,240, 0.95)',
paper_bgcolor = 'rgba(240,240,240, 0.95)',
margin = dict(b = 195))
fig["layout"]["xaxis2"].update(dict(title = "false positive rate"))
fig["layout"]["yaxis2"].update(dict(title = "true positive rate"))
fig["layout"]["xaxis3"].update(dict(showgrid = True,tickfont = dict(size = 10),
tickangle = 90))
py.iplot(fig)
if threshold_plot == True :
visualizer = DiscriminationThreshold(algorithm)
visualizer.fit(training_x,training_y)
visualizer.poof()
logit = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False)
coupon_use_prediction(logit,train_X,test_X,train_Y,test_Y,
cols,"coefficients",threshold_plot = True)
trace1 is the output of ff.create_annotated_heatmap, which returns an object of type plotly.graph_objs._figure.Figure. Here it is the line of code where it happens. You can't add a whole figure as a trace, only the data part of it, something like:
fig.add_trace(go.Heatmap(trace1.data[0]))
The annotations are stored in the layout so you will have to modify those out too somehow.
I'm trying to animate a scatterplot but get the following error. I had it working previously but its now returning this error on repeat.
ValueError: 'vertices' must be a 2D list or array with shape Nx2
I'll attach the animation code below. I had it working before so know it works.
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import matplotlib.transforms as transforms
XA = np.random.randint(80, size=(1000, 15))
YA = np.random.randint(80, size=(1000, 15))
XB = np.random.randint(80, size=(1000, 15))
YB = np.random.randint(80, size=(1000, 15))
XC = np.random.randint(80, size=(1000, 1))
YC = np.random.randint(80, size=(1000, 1))
fig, ax = plt.subplots(figsize = (10,6))
ax.axis('equal')
'''' Scatter Plot '''
scatter_A = ax.scatter(XA[0], YA[0], c=['blue'], alpha = 0.7, s = 20, edgecolor = 'black', zorder = 2)
scatter_B = ax.scatter(XB[0], YB[0], c=['white'], alpha = 0.7, s = 20, edgecolor = 'black', zorder = 2)
offset = lambda p: transforms.ScaledTranslation(p/82.,0, plt.gcf().dpi_scale_trans)
trans = plt.gca().transData
scatter_C = ax.scatter(XC[0], YC[0], c=['red'], marker = 'o', alpha = 0.7, s = 10, edgecolor = 'black', zorder = 2,transform=trans+offset(+2))
'''Animate Function '''
def animate(i) :
scatter_A.set_offsets([[[[[[[[[[[[[[[XA[0+i][0], YA[0+i][0]], [XA[0+i][1], YA[0+i][1]], [XA[0+i][2], YA[0+i][2]], [XA[0+i][3], YA[0+i][3]], [XA[0+i][4], YA[0+i][4]],[XA[0+i][5], YA[0+i][5]], [XA[0+i][6], YA[0+i][6]], [XA[0+i][7], YA[0+i][7]], [XA[0+i][8], YA[0+i][8]], [XA[0+i][9], YA[0+i][9]], [XA[0+i][10], YA[0+i][10]], [XA[0+i][11], YA[0+i][11]], [XA[0+i][12], YA[0+i][12]], [XA[0+i][13], YA[0+i][13]], [XA[0+i][14], YA[0+i][14]]]]]]]]]]]]]]]])
scatter_B.set_offsets([[[[[[[[[[[[[[[XB[0+i][0], YB[0+i][0]], [XB[0+i][1], YB[0+i][1]], [XB[0+i][2], YB[0+i][2]], [XB[0+i][3], YB[0+i][3]], [XB[0+i][4], YB[0+i][4]],[XB[0+i][5], YB[0+i][5]], [XB[0+i][6], YB[0+i][6]], [XB[0+i][7], YB[0+i][7]], [XB[0+i][8], YB[0+i][8]], [XB[0+i][9], YB[0+i][9]], [XB[0+i][10], YB[0+i][10]], [XB[0+i][11], YB[0+i][11]], [XB[0+i][12], YB[0+i][12]], [XB[0+i][13], YB[0+i][13]], [XB[0+i][14], YB[0+i][14]]]]]]]]]]]]]]]])
scatter_C.set_offsets([[XC[0+i][0], YC[0+i][0]]])
ani = animation.FuncAnimation(fig, animate, np.arange(0,1000),
interval = 100, blit = False)
Writer = animation.writers['ffmpeg']
writer = Writer(fps = 10, bitrate = 8000)
ax.autoscale()
plt.draw()
I am running Spyder 3.1.2 through Anaconda 1.6.4, Python 3.5, Python 5.1.0
The error message should give you all the hints you need. Removing the redundant brackets in your set_offsets() calls does the trick:
def animate(i) :
scatter_A.set_offsets([[XA[0+i][0], YA[0+i][0]], [XA[0+i][1], YA[0+i][1]], [XA[0+i][2], YA[0+i][2]], [XA[0+i][3], YA[0+i][3]], [XA[0+i][4], YA[0+i][4]],[XA[0+i][5], YA[0+i][5]], [XA[0+i][6], YA[0+i][6]], [XA[0+i][7], YA[0+i][7]], [XA[0+i][8], YA[0+i][8]], [XA[0+i][9], YA[0+i][9]], [XA[0+i][10], YA[0+i][10]], [XA[0+i][11], YA[0+i][11]], [XA[0+i][12], YA[0+i][12]], [XA[0+i][13], YA[0+i][13]], [XA[0+i][14], YA[0+i][14]]])
scatter_B.set_offsets([[XB[0+i][0], YB[0+i][0]], [XB[0+i][1], YB[0+i][1]], [XB[0+i][2], YB[0+i][2]], [XB[0+i][3], YB[0+i][3]], [XB[0+i][4], YB[0+i][4]],[XB[0+i][5], YB[0+i][5]], [XB[0+i][6], YB[0+i][6]], [XB[0+i][7], YB[0+i][7]], [XB[0+i][8], YB[0+i][8]], [XB[0+i][9], YB[0+i][9]], [XB[0+i][10], YB[0+i][10]], [XB[0+i][11], YB[0+i][11]], [XB[0+i][12], YB[0+i][12]], [XB[0+i][13], YB[0+i][13]], [XB[0+i][14], YB[0+i][14]]])
scatter_C.set_offsets([[XC[0+i][0], YC[0+i][0]]])
I'm surprised that your code worked before. Note that I'm not running exactly the same setup, I'm on macosx 10.13.5 with Python 3.6 installed through macports.
I am getting a very strange error using basemap. No error appears, yet my 3rd plot has no data plotted when data does indeed exist. Below is my code. When run, you will see that both modis and seawifs data is plotted, but viirs is not. I can't figure out why.
import numpy as np
import urllib
import urllib2
import netCDF4
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from datetime import datetime, date, time, timedelta
import json
import math
def indexsearch(datebroken,year, month, day):
for i in range(0,len(datebroken)):
if (datebroken[i,0] == year and datebroken[i,1] == month and datebroken[i,2] == day):
return i
url = 'http://coastwatch.pfeg.noaa.gov/erddap/griddap/erdMWchlamday.nc?chlorophyll' +\
'[(2002-07-16T12:00:00Z):1:(2015-04-16T00:00:00Z)][(0.0):1:(0.0)][(36):1:(39)][(235):1:(240)]'
file = 'erdMWchlamday.nc'
urllib.urlretrieve(url, file)
ncfilemod = netCDF4.Dataset(file)
ncv1 = ncfilemod.variables
print ncv1.keys()
time1=ncv1['time'][:]
inceptiondate = datetime(1970, 1, 1, 0, 0, 0)
timenew1=[]
for i in time1[:]:
newdate = inceptiondate + timedelta(seconds=i)
timenew1.append(newdate.strftime('%Y%m%d%H'))
datebroken1 = np.zeros((len(timenew1),4),dtype=int)
for i in range(0,len(timenew1)):
datebroken1[i,0] = int(timenew1[i][0:4])
datebroken1[i,1] = int(timenew1[i][4:6])
datebroken1[i,2] = int(timenew1[i][6:8])
datebroken1[i,3] = int(timenew1[i][8:10])
lon1= ncv1['longitude'][:]
lat1 = ncv1['latitude'][:]
lons1, lats1 = np.meshgrid(lon1,lat1)
chla1 = ncv1['chlorophyll'][:,0,:,:]
url = 'http://coastwatch.pfeg.noaa.gov/erddap/griddap/erdSWchlamday.nc?chlorophyll' +\
'[(1997-09-16):1:(2010-12-16T12:00:00Z)][(0.0):1:(0.0)][(36):1:(39)][(235):1:(240)]'
file = 'erdSWchlamday.nc'
urllib.urlretrieve(url, file)
#Ncfile 2
ncfilewif = netCDF4.Dataset(file)
ncv2 = ncfilewif.variables
print ncv2.keys()
time2=ncv2['time'][:]
inceptiondate = datetime(1970, 1, 1, 0, 0, 0)
timenew2=[]
for i in time2[:]:
newdate = inceptiondate + timedelta(seconds=i)
timenew2.append(newdate.strftime('%Y%m%d%H'))
datebroken2 = np.zeros((len(timenew2),4),dtype=int)
for i in range(0,len(timenew2)):
datebroken2[i,0] = int(timenew2[i][0:4])
datebroken2[i,1] = int(timenew2[i][4:6])
datebroken2[i,2] = int(timenew2[i][6:8])
datebroken2[i,3] = int(timenew2[i][8:10])
lon2= ncv2['longitude'][:]
lat2 = ncv2['latitude'][:]
lons2, lats2 = np.meshgrid(lon2,lat2)
chla2 = ncv2['chlorophyll'][:,0,:,:]
url = 'http://coastwatch.pfeg.noaa.gov/erddap/griddap/erdVH2chlamday.nc?chla' +\
'[(2012-01-15):1:(2015-05-15T00:00:00Z)][(39):1:(36)][(-125):1:(-120)]'
file = 'erdVH2chlamday.nc'
urllib.urlretrieve(url, file)
ncfileviir = netCDF4.Dataset(file)
ncv3 = ncfileviir.variables
print ncv3.keys()
time3=ncv3['time'][:]
inceptiondate = datetime(1970, 1, 1, 0, 0, 0)
timenew3=[]
for i in time3[:]:
newdate = inceptiondate + timedelta(seconds=i)
timenew3.append(newdate.strftime('%Y%m%d%H'))
datebroken3 = np.zeros((len(timenew3),4),dtype=int)
for i in range(0,len(timenew3)):
datebroken3[i,0] = int(timenew3[i][0:4])
datebroken3[i,1] = int(timenew3[i][4:6])
datebroken3[i,2] = int(timenew3[i][6:8])
datebroken3[i,3] = int(timenew3[i][8:10])
lon3= ncv3['longitude'][:]
lat3 = ncv3['latitude'][:]
lons3, lats3 = np.meshgrid(lon3,lat3)
chla3 = ncv3['chla'][:,:,:]
i1=indexsearch(datebroken1,2012,6,16)
print i1
i2=indexsearch(datebroken2,2010,6,16)
print i2
i3=indexsearch(datebroken3,2012,6,15)
print i3
chla1plot = chla1[i1,:,:]
chla2plot = chla2[i2,:,:]
chla3plot = chla3[i3,:,:]
ncfileviir.close()
ncfilemod.close()
ncfilewif.close()
Important code is below here. All code above is just pulling the data into python to plot.
minlat = 36
maxlat = 39
minlon = 235
maxlon = 240
# Create map
fig = plt.figure()
#####################################################################################################################
#plot figure 1
ax1 = fig.add_subplot(221)
m = Basemap(projection='merc', llcrnrlat=minlat,urcrnrlat=maxlat,llcrnrlon=minlon, urcrnrlon=maxlon,resolution='h')
cs1 = m.pcolormesh(lons1,lats1,chla1plot,cmap=plt.cm.jet,latlon=True)
m.drawcoastlines()
m.drawmapboundary()
m.fillcontinents()
m.drawcountries()
m.drawstates()
m.drawrivers()
#Sets up parallels and meridians.
parallels = np.arange(36.,39,1.)
# labels = [left,right,top,bottom]
m.drawparallels(parallels,labels=[False,True,True,False])
meridians = np.arange(235.,240.,1.)
m.drawmeridians(meridians,labels=[True,False,False,True])
ax1.set_title('Modis')
#####################################################################################################################
#plot figure 2
ax2 = fig.add_subplot(222)
cs2 = m.pcolormesh(lons2,lats2,chla2plot,cmap=plt.cm.jet,latlon=True)
m.drawcoastlines()
m.drawmapboundary()
m.fillcontinents()
m.drawcountries()
m.drawstates()
m.drawrivers()
#Sets up parallels and meridians.
parallels = np.arange(36.,39,1.)
# labels = [left,right,top,bottom]
m.drawparallels(parallels,labels=[False,True,True,False])
meridians = np.arange(235.,240.,1.)
m.drawmeridians(meridians,labels=[True,False,False,True])
ax2.set_title('SeaWIFS')
#####################################################################################################################
#plot figure 3
ax3 = fig.add_subplot(223)
cs3 = m.pcolormesh(lons3,np.flipud(lats3),np.flipud(chla3plot),cmap=plt.cm.jet,latlon=True)
m.drawcoastlines()
m.drawmapboundary()
m.fillcontinents()
m.drawcountries()
m.drawstates()
m.drawrivers()
#Sets up parallels and meridians.
parallels = np.arange(36.,39,1.)
# labels = [left,right,top,bottom]
m.drawparallels(parallels,labels=[False,True,True,False])
meridians = np.arange(235.,240.,1.)
m.drawmeridians(meridians,labels=[True,False,False,True])
ax3.set_title('VIIRS')
# Save figure (without 'white' borders)
#plt.savefig('SSTtest.png', bbox_inches='tight')
plt.show()
My results are shown here!
![results]: http://i.stack.imgur.com/dRjkU.png
The issue that I found was that I had
minlat = 36
maxlat = 39
minlon = 235
maxlon = 240
m = Basemap(projection='merc', llcrnrlat=minlat,urcrnrlat=maxlat,llcrnrlon=minlon, urcrnrlon=maxlon,resolution='h')
The final plot was -125 to -120 which basemap did not automatically handle, but instead placed the plot at an area where I did not have data. I added a new m = basemap statement and changed the meridian numbers for the third graph using -125 to -120 as my longitude and the graph plotted just fine.