I'm trying to create subplots on one image, including confusion matrix(heatmap), ROC and feature importance. I want to display an annotated heatmap. But there is an error about trace1 when I ran the code. Can someone help me to figure out what's wrong?
The error massage is
ValueError:
Invalid element(s) received for the 'data' property of
Invalid elements include: ......
#threshold_plot - if True returns threshold plot for model
def coupon_use_prediction(algorithm,training_x,testing_x,
training_y,testing_y,cols,cf,threshold_plot) :
#model
algorithm.fit(training_x,training_y)
predictions = algorithm.predict(testing_x)
probabilities = algorithm.predict_proba(testing_x)
#coeffs
if cf == "coefficients" :
coefficients = pd.DataFrame(algorithm.coef_.ravel())
elif cf == "features" :
coefficients = pd.DataFrame(algorithm.feature_importances_)
column_df = pd.DataFrame(cols)
coef_sumry = (pd.merge(coefficients,column_df,left_index= True,
right_index= True, how = "left"))
coef_sumry.columns = ["coefficients","features"]
coef_sumry = coef_sumry.sort_values(by = "coefficients",ascending = False)
print (algorithm)
print ("\n Classification report : \n",classification_report(testing_y,predictions))
print ("Accuracy Score : ",accuracy_score(testing_y,predictions))
#confusion matrix
conf_matrix = confusion_matrix(testing_y,predictions)
#roc_auc_score
model_roc_auc = roc_auc_score(testing_y,predictions)
print ("Area under curve : ",model_roc_auc,"\n")
fpr,tpr,thresholds = roc_curve(testing_y,probabilities[:,1])
#plot confusion matrix(x = predicted, y = actual)
trace1 = ff.create_annotated_heatmap(z = conf_matrix,
x = ["Not use","Use"],
y = ["Not use","Use"],
showscale = False,name = "matrix")
#plot roc curve
trace2 = go.Scatter(x = fpr,y = tpr,
name = "Roc : " + str(model_roc_auc),
line = dict(color = ('rgb(22, 96, 167)'),width = 2))
trace3 = go.Scatter(x = [0,1],y=[0,1],
line = dict(color = ('rgb(205, 12, 24)'),width = 2,
dash = 'dot'))
#plot coeffs
trace4 = go.Bar(x = coef_sumry["features"],y = coef_sumry["coefficients"],
name = "coefficients",
marker = dict(color = coef_sumry["coefficients"],
colorscale = "Picnic",
line = dict(width = .6,color = "black")))
#subplots
fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
subplot_titles=('Confusion Matrix',
'Receiver operating characteristic',
'Feature Importances'))
fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)
fig.append_trace(trace3,1,2)
fig.append_trace(trace4,2,1)
fig['layout'].update(showlegend=False, title="Model performance" ,
autosize = False,height = 900,width = 800,
plot_bgcolor = 'rgba(240,240,240, 0.95)',
paper_bgcolor = 'rgba(240,240,240, 0.95)',
margin = dict(b = 195))
fig["layout"]["xaxis2"].update(dict(title = "false positive rate"))
fig["layout"]["yaxis2"].update(dict(title = "true positive rate"))
fig["layout"]["xaxis3"].update(dict(showgrid = True,tickfont = dict(size = 10),
tickangle = 90))
py.iplot(fig)
if threshold_plot == True :
visualizer = DiscriminationThreshold(algorithm)
visualizer.fit(training_x,training_y)
visualizer.poof()
logit = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False)
coupon_use_prediction(logit,train_X,test_X,train_Y,test_Y,
cols,"coefficients",threshold_plot = True)
trace1 is the output of ff.create_annotated_heatmap, which returns an object of type plotly.graph_objs._figure.Figure. Here it is the line of code where it happens. You can't add a whole figure as a trace, only the data part of it, something like:
fig.add_trace(go.Heatmap(trace1.data[0]))
The annotations are stored in the layout so you will have to modify those out too somehow.
Related
I have the code below. I'm trying to build an interactive DBSCAN clustering plot. When I run it, I get three plots but not interactive. Where is the problem in the code and how to fix it.
df_mv = pd.read_csv(r"https://raw.githubusercontent.com/HanaBachi/MachineLearning/main/multishape.csv") # load from Prof. Pyrcz's GitHub
df_mv.head()
text_trap = io.StringIO()
sys.stdout = text_trap
l = widgets.Text(value=' DBSCAN, Hana Bachi, The University of Texas at Austin',
layout=Layout(width='950px', height='30px'))
eps = widgets.FloatSlider(min=0, max = 2, value=0.1, step = 0.1, description = 'eps',orientation='horizontal', style = {'description_width': 'initial'}, continuous_update=False)
minPts = widgets.FloatSlider(min=0, max = 5, value=1, step = 1, description = 'minPts %',orientation='horizontal',style = {'description_width': 'initial'}, continuous_update=False)
color = ['blue','red','green','yellow','orange','white','magenta','cyan']
style = {'description_width': 'initial'}
ui = widgets.HBox([eps,minPts],)
ui2 = widgets.VBox([l,ui],)
# create activation function plots
def DBSCAN_plot(eps, minPts):
db = DBSCAN(eps=0.155, min_samples=5).fit(df_mv)
labels = db.labels_
# n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
# x = df_mv.values[:,0]
# y = df_mv.values[:,1]
# cmap = plt.cm.rainbow
#norm = mc.BoundaryNorm(labels, cmap.N)
plt.figure(figsize=(14,7))
plt.scatter(x, y, c=labels, cmap='tab10', s=50)
plt.scatter(x[np.where(labels==-1)], y[np.where(labels==-1)], c='k', marker='x', s=100)
plt.title('DBSCAN interactive plot', fontsize = 20)
plt.colorbar()
plt.show()
plt.subplots_adjust(left=0.0, bottom=0.0, right=2.0, top=1.0, wspace=0.2, hspace=0.3)
plt.show()
# create dashboard/formatting
uia = widgets.HBox([interactive_plot1],)
uia2 = widgets.VBox([eps, uia],)
uib = widgets.HBox([interactive_plot1],)
uib2 = widgets.VBox([minPts, uib],)
interactive_plot1 = widgets.interactive_output(DBSCAN_plot, {'eps': eps,'minPts':minPts})
interactive_plot1.clear_output(wait = True) #
How can I make this plot interactive in function of eps and minPts
I want to plot a Pandas series with Matplotlib, but I get the error message:
index type not supported.
Here's my code:
inputX = values_dataset[:,:-horizon]
inputX = inputX.reshape(inputX.shape[0], time_steps, dim)
yhat = model.predict(inputX)
yhat = scalery.inverse_transform(yhat)
series_actual = series.loc[Syhat.index[0]:Syhat.index[-1]]
Syhat_native = {}
if get_transform_status(target_var) == 1:
for i in range(horizon):
Syhat_horizoni = pd.Series(yhat[:,i], index= train_val_test_time).iloc[i:]
series_shift = series_actual.shift(i).dropna()
Syhat_native[i] = inv_diff(series_shift, Syhat_horizoni, 12)
else:
for i in range(horizon):
# The code goes here
Syhat_horizoni = pd.Series(yhat[:,i], index= train_val_test_time)
Syhat_native[i] = Syhat_horizoni.shift(i)
Here's the code when I want have a plot:
#now we plot the
if horizon>1:
fig, ax = plt.subplots(1, horizon, figsize=[horizon*10,5])
for i in range(horizon):
Syactual_horizoni= series_actual.shift(i)
Syactual_horizoni.plot(ax=ax[i], color='b',label = 'actual')
# This line below where the error occured
Syhat_native[i].plot(ax=ax[i], color='r', label = 'pred')
ax[i].set_title('horizon-'+str(i))
ax[i].legend()
ax[i].axvline(train_time[-1])
# fig.savefig("./visualization/LSTM-bidirectional-Covid-h14/LSTM-Bidirectional-Covid-Transformed-1.png")
fig.suptitle('TRAIN+TEST SET inversed',fontsize=20 )
Is there something I missed?
i already got the answer, the Syhat_native series index is not in datetime format. so i did this
inputX = values_dataset[:,:-horizon]
inputX = inputX.reshape(inputX.shape[0], time_steps, dim)
yhat = model.predict(inputX)
yhat = scalery.inverse_transform(yhat)
series_actual = series.loc[Syhat.index[0]:Syhat.index[-1]]
Syhat_native = {}
if get_transform_status(target_var) == 1:
for i in range(horizon):
Syhat_horizoni = pd.Series(yhat[:,i], index= train_val_test_time).iloc[i:]
series_shift = series_actual.shift(i).dropna()
Syhat_native[i] = inv_diff(series_shift, Syhat_horizoni, 12)
else:
for i in range(horizon):
# The code goes here
Syhat_horizoni = pd.Series(yhat[:,i], index= train_val_test_time)
Syhat_native[i] = Syhat_horizoni.shift(i)
Syhat_native[i].index = pd.to_datetime(Syhat_native[i].index)
Encountering error with plotly:
from plotly import graph_objs as go
import numpy as np
prediction = model.predict_generator(test_generator)
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))
trace1 = go.Scatter(
x = date_train,
y = close_train,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = date_test,
y = prediction,
mode = 'lines',
name = 'Prediction'
)
trace3 = go.Scatter(
x = date_test,
y = close_test,
mode='lines',
name = 'Ground Truth'
)
layout = go.Layout(
title = "Google Stock",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1], layout=layout)
fig.show()
Can someone help in resolving this? I tried upgrading plotly to the latest version but even that did not help.
With Bokeh, how do I get a handle to the Renderer (or GlyphRenderer) for an Annotation? Is this possible?
I would like to be able to toggle a Band (which is an Annotation) on and off with an interactive legend, so I need to be able to pass a list of Renderers to the LegendItem constructor.
This code:
maxline = fig.line(x='Date', y=stn_max, line_width=0.5, legend=stn_max, name="{}_line".format(stn_max), color=stn_color, alpha=0.75, source=source)
minline = fig.line(x='Date', y=stn_min, line_width=0.5, legend=stn_min, name="{}_line".format(stn_min), color=stn_color, alpha=0.75, source=source)
band = bkm.Band(base='Date', lower=stn_min, upper=stn_max, fill_alpha=0.50, line_width=0.5, fill_color=stn_color, source=source)
bkm.LegendItem(label=stn, renderers=[maxline, minline, band])
Produces this error
...
ValueError: expected an element of List(Instance(GlyphRenderer)), got seq with invalid items [Band(id='1091', ...)]
For LegendItem only instances of GlyphRenderer can be passed to its renderers attribute and Band is not based on GlyphRenderer so it gives error. In the code below the Band visibility is being toggled by means of a callback:
from bokeh.plotting import figure, show
from bokeh.models import Band, ColumnDataSource, Legend, LegendItem, CustomJS
import pandas as pd
import numpy as np
x = np.random.random(2500) * 140 - 20
y = np.random.normal(size = 2500) * 2 + 5
df = pd.DataFrame(data = dict(x = x, y = y)).sort_values(by = "x")
sem = lambda x: x.std() / np.sqrt(x.size)
df2 = df.y.rolling(window = 100).agg({"y_mean": np.mean, "y_std": np.std, "y_sem": sem})
df2 = df2.fillna(method = 'bfill')
df = pd.concat([df, df2], axis = 1)
df['lower'] = df.y_mean - df.y_std
df['upper'] = df.y_mean + df.y_std
source = ColumnDataSource(df.reset_index())
p = figure(tools = "pan,wheel_zoom,box_zoom,reset,save")
scatter = p.scatter(x = 'x', y = 'y', line_color = None, fill_alpha = 0.3, size = 5, source = source)
band = Band(base = 'x', lower = 'lower', upper = 'upper', source = source)
p.add_layout(band)
p.title.text = "Rolling Standard Deviation"
p.xaxis.axis_label = 'X'
p.yaxis.axis_label = 'Y'
callback = CustomJS(args = dict(band = band), code = """
if (band.visible == false)
band.visible = true;
else
band.visible = false; """)
legend = Legend(items = [ LegendItem(label = "x", renderers = [scatter, band.source.selection_policy]) ])
legend.click_policy = 'hide'
scatter.js_on_change('visible', callback)
p.add_layout(legend)
show(p)
Result:
As far as I'm aware, I've copied the documentation exactly. I basically used the documentation code and tweaked it for my purposes. But when I run this bit of code, no hover feature with text appears on my plot.
#Initialize df
aviation_data = pd.DataFrame(columns=["Latitude","Longitude","Fatalities"])
aviation_data["Latitude"] = [40.53666,60.94444]
aviation_data["Longitude"] = [-81.955833,-159.620834]
aviation_data["Fatalities"] = [True,False]
#Initialize colorscale
scl = [[0,"rgb(216,15,15)"],[1,"rgb(5,10,172)"]]
#Initialize text data
text_df = "Fatal: " + aviation_data["Fatalities"].apply(lambda x: str(np.bool(x))) + '<br>' + \
"Latitude: " + aviation_data["Latitude"].apply(lambda x: str(x)) + '<br>' + \
"Longitude" + aviation_data["Longitude"].apply(lambda x: str(x))
#Initialize data
data = [ dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = aviation_data["Longitude"],
lat = aviation_data["Latitude"],
text = text_df,
mode = 'markers',
marker = dict(
size = 5,
opacity = 0.5,
reversescale=True,
autocolorscale=False,
symbol = 'circle',
line = dict(
width=1,
color='rgba(102, 102, 102)'
),
colorscale = scl,
cmin = 0,
color = aviation_data["Fatalities"].astype(int),
cmax = 1
))]
#Initialize layout
layout = dict(
title ='Aviation Incidents for the Years 2014-2016<br>\
(red indicates fatal incident, blue indicates non-fatal)',
geo = dict(
scope='usa',
projection=dict(type='albers usa'),
showland = True,
landcolor = "rgb(206, 206, 206)",
countrywidth = 0.5,
subunitwidth = 0.5
),
)
#Plot
fig = dict(data=data,layout=layout)
iplot(fig,validate=False)
Anyone know why my hover text isn't showing up?
In the last line of code you need to call this:
plotly.offline.plot(fig, validate=False)
Instead of:
iplot(fig, validate=False)
Also do not forget import plotly:
import plotly
Hope this will help