I wish to learn how to use Plotly with Python for data analysis.
I have been using this website as reference.
My current code looks like this:
from plotly import tools
import plotly as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected=True)
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(X_train, y_train)
p1 = go.Scatter(x=X_test,
y=y_test,
mode='markers',
marker=dict(color='black')
)
p2 = go.Scatter(x=X_test,
y=regr.predict(X_test),
mode='lines',
line=dict(color='blue', width=3)
)
layout = go.Layout(xaxis=dict(ticks='', showticklabels=False,
zeroline=False),
yaxis=dict(ticks='', showticklabels=False,
zeroline=False),
showlegend=False, hovermode='closest')
fig = go.Figure(data=[p1, p2], layout=layout)
py.offline.iplot(fig)
However, my output looks like
If I were to follow the website by every line, I would get this:
from plotly import tools
import plotly as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected=True)
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(X_train, y_train)
def data_to_plotly(x):
k = []
for i in range(0, len(x)):
k.append(x[i][0])
return k
p1 = go.Scatter(x=data_to_plotly(X_test),
y=y_test,
mode='markers',
marker=dict(color='black')
)
p2 = go.Scatter(x=data_to_plotly(X_test),
y=regr.predict(X_test),
mode='lines',
line=dict(color='blue', width=3)
)
layout = go.Layout(xaxis=dict(ticks='', showticklabels=False,
zeroline=False),
yaxis=dict(ticks='', showticklabels=False,
zeroline=False),
showlegend=False, hovermode='closest')
fig = go.Figure(data=[p1, p2], layout=layout)
py.offline.iplot(fig)
But it would generate the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-84-5895927e91e2> in <module>
21 return k
22
---> 23 p1 = go.Scatter(x=data_to_plotly(X_test),
24 y=y_test,
25 mode='markers',
<ipython-input-84-5895927e91e2> in data_to_plotly(x)
17
18 for i in range(0, len(x)):
---> 19 k.append(x[i][0])
20
21 return k
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
I'm new to Plotly. How do I fix this?
EDIT:
my X_test looks like this:
X_test
I see that X_train is a dataframe, Plotly is actually pretty friendly with Pandas, there are several examples in plotly's pandas example gallery so you don't have to deal with in between functions like data_to_plotly (that tutorial sadly looks quite outdated). In this case the scatters should look something like
p1 = go.Scatter(x=X_test['Explained by: GDP per capita'],
y=y_test, # Assuming y_test is a numpy array or pandas series
# if it is also a dataframe you have to specify the column
mode='markers',
marker=dict(color='black')
)
p2 = go.Scatter(x=X_test['Explained by: GDP per capita'],
y=regr.predict(X_test),
mode='lines',
line=dict(color='blue', width=3)
)
Related
edited to address the comments
added lines at the beginning where the data was imported from MNIST
added the full error message from jupyter notebook as text
I am trying to implement a very simple code in python (jupyter notebook, if it matters):
from sklearn.datasets import fetch_openml
x, y = fetch_openml('mnist_784', version=1, return_X_y=True, data_home='./data/')
y = y.astype(int)
fig, ax = plt.subplots(2, 4, figsize=(20, 8))
for a in ax.ravel():
j = np.random.choice(len(y))
sns.heatmap(x[j].reshape(28,28), ax=a, cbar=False, cmap='gray_r')
a.set_title(f'Label: {y[j]}')
a.set_xticks([])
a.set_yticks([])
and I get the following error shown in the screenshot. I don't think this is a code problem, as this was taken directly from the lecturer's notes. Could anyone help me troubleshoot and enlighten me, please?
See error message below:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in
pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in
pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 46220
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-6-02155e9f4730> in <module>
2 for a in ax.ravel():
3 j = np.random.choice(len(y))
----> 4 sns.heatmap(x[j].reshape(28,28), ax=a, cbar=False, cmap='gray_r')
5 a.set_title(f'Label: {y[j]}')
6 a.set_xticks([])
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 46220
KeyError: 46220
I suppose that with the line below you were trying to access the row j of the pandas DataFrame x:
sns.heatmap(x[j].reshape(28,28), ax=a, cbar=False, cmap='gray_r')
However in order to access the values of a row by name you should use x.iloc[j].values instead. Lots of examples can be found here.
The complete code is:
from sklearn.datasets import fetch_openml
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
x, y = fetch_openml('mnist_784', version=1, return_X_y=True, data_home='./data/')
y = y.astype(int)
fig, ax = plt.subplots(2, 4, figsize=(20, 8))
for a in ax.ravel():
j = np.random.choice(len(y))
sns.heatmap(x.iloc[j].values.reshape(28,28), ax=a, cbar=False, cmap='gray_r')
a.set_title(f'Label: {y[j]}')
a.set_xticks([])
a.set_yticks([])
The result produced:
I am relatively new to python and programming and have been trying to make some initial plots of precipitation data for the Indian subcontinent specifically for the indian summer monsoon through the period of June,July,August and September. I have managed to understand some of the code in a tutorial to obtain plot for JJA shown below but failing to modify it suitably to show me season as JJAS instead of JJA. Simply substituting JJAS in place of JJA ofcourse yielded the error
KeyError: 'JJAS'
I have seen one solution to this on the same forum but I am unable to adapt it to my code. I would be extremely grateful if I could receive any advice on this. Thank you !
Below is the code
import xarray as xr
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import cmocean
accesscm2_pr_file = r'C:\Users\uSER\Desktop\DissTrack1\ESGF data files\pr_Amon_CAMS-CSM1-0_historical_r1i1p1f1_gn_185001-201412.nc'
dset = xr.open_dataset(accesscm2_pr_file)
clim = dset['pr'].groupby('time.season').mean('time', keep_attrs=True)
clim.data = clim.data * 86400
clim.attrs['units'] = 'mm/day'
fig = plt.figure(figsize=[12,5])
ax = fig.add_subplot(111, projection=ccrs.PlateCarree(central_longitude=180))
clim.sel(season='JJAS').plot.contourf(ax=ax,
levels=np.arange(0, 13.5, 1.5),
extend='max',
transform=ccrs.PlateCarree(),
cbar_kwargs={'label': clim.units},
cmap=cmocean.cm.haline_r)
ax.coastlines()
plt.show()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'JJAS'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16124/3658430410.py in <module>
15 fig = plt.figure(figsize=[12,5])
16 ax = fig.add_subplot(111, projection=ccrs.PlateCarree(central_longitude=180))
---> 17 clim.sel(season='JJAS').plot.contourf(ax=ax,
18 levels=np.arange(0, 13.5, 1.5),
19 extend='max',
~\anaconda3\lib\site-packages\xarray\core\dataarray.py in sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
1269 Dimensions without coordinates: points
1270 """
-> 1271 ds = self._to_temp_dataset().sel(
1272 indexers=indexers,
1273 drop=drop,
~\anaconda3\lib\site-packages\xarray\core\dataset.py in sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
2363 """
2364 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel")
-> 2365 pos_indexers, new_indexes = remap_label_indexers(
2366 self, indexers=indexers, method=method, tolerance=tolerance
2367 )
~\anaconda3\lib\site-packages\xarray\core\coordinates.py in remap_label_indexers(obj, indexers, method, tolerance, **indexers_kwargs)
419 }
420
--> 421 pos_indexers, new_indexes = indexing.remap_label_indexers(
422 obj, v_indexers, method=method, tolerance=tolerance
423 )
~\anaconda3\lib\site-packages\xarray\core\indexing.py in remap_label_indexers(data_obj, indexers, method, tolerance)
272 coords_dtype = data_obj.coords[dim].dtype
273 label = maybe_cast_to_coords_dtype(label, coords_dtype)
--> 274 idxr, new_idx = convert_label_indexer(index, label, dim, method, tolerance)
275 pos_indexers[dim] = idxr
276 if new_idx is not None:
~\anaconda3\lib\site-packages\xarray\core\indexing.py in convert_label_indexer(index, label, index_name, method, tolerance)
189 indexer = index.get_loc(label_value)
190 else:
--> 191 indexer = index.get_loc(label_value, method=method, tolerance=tolerance)
192 elif label.dtype.kind == "b":
193 indexer = label
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'JJAS'
Indeed, grouping by "time.season" will only split your data into "DJF", "MAM", "JJA", and "SON". For other combinations of months you will need to define your own mask(s) to apply when taking a mean. For "JJAS" I often use something like this:
jjas = dset.time.dt.month.isin(range(6, 10))
clim = dset.sel(time=jjas).mean("time")
I am trying to run this code:
(this will download the MNIST dataset to %HOME directory!)
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()
X, y = mnist["data"], mnist["target"]
import matplotlib as mpl
import matplotlib.pyplot as plt
some_digit = X[0] # **ERROR LINE** <---------
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()
I have this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-45-d5d685fca2de> in <module>
2 import matplotlib.pyplot as plt
3 import numpy as np
----> 4 some_digit = X[0]
5 some_digit_image = some_digit.reshape(28, 28)
6 plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
~/.local/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 0
Code example is from this book: Hands-on Machine Learning with Scikit-Learn, Keras, and TensorFlow
I tried X.iloc[0] but its also not working.
From your dataframe pic, there is no column header named 0. If you want to access column by index, you can use .iloc which is primarily integer position based:
df.iloc[:, 0]
Or access by column header list
df[df.columns[0]]
I am trying to show image of specific index using matplotlib, but it is showing me error which i did not get why ? I am trying to get index 0 of mnist data and resize it to 28 by 28 pixel and then show that index value by plot.show() function.
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
some_digit = X[0]
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(
some_digit_image,
cmap = matplotlib.cm.binary,
interpolation="nearest")
plt.axis("off")
plt.show()
KeyError Traceback (most recent call last)
~/Machinelearning/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-35-246778f0802e> in <module>
3 import matplotlib.pyplot as plt
4
----> 5 some_digit = X[0]
6 some_digit_image = some_digit.reshape(28, 28)
7
~/Machinelearning/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/Machinelearning/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 0
Change
import numpy as np
x = [36000]
to
np.array(x.iloc[36000])
Could you please get me where I am going wrong in the below code. I am new in python. Even i call the pandas core library. While calling the function i got error at last.
import pandas as pd
import numpy as np
from scipy.optimize import Bounds
from scipy.optimize import minimize
from google.colab import files
from pandas._libs.index import IndexEngine
from pandas._libs.hashtable import PyObjectHashTable
f= files.upload()
df=pd.read_excel(r'symbol.xlsx',index_col='date',parse_dates=True)
df.head()
dir(df)
m=3
def get_edges(df, m):
edges = {}
series = df.columns
for s in series:
std = df[s].std()
edges[s] = [std * (-m / 2 + x) for x in range(m + 1)]
if edges[s][0] > df[s].min():
edges[s] = edges[s][1:]
edges[s].insert(0, df[s].min())
if edges[s][-1] < df[s].max():
edges[s] = edges[s][:-1]
edges[s].append(df[s].max())
return edges
edges = get_edges(df,m)
g = pd.DataFrame()
for key, value in edges.items():
g[key] = pd.cut(df[key],value,labels=False, include_lowest=True)
def get_p(g, m):
list_ = []
for i in range(m):
for j in range(m):
list_.append([i,j])
index_col = ['i','j']
p_index = pd.DataFrame(list_, columns=index_col)
series = g.columns
f = lambda x: x/x.sum()
list_ = []
for beta in series:
for alpha in series:
p = pd.concat([g[beta],g[alpha].shift(-1)],axis=1)[:-1].astype(dtype='int')
p.columns = index_col
p = pd.DataFrame(p.groupby(index_col).size(),columns=['freq']).reset_index()
p = p.merge(p_index, on=index_col, how='right').fillna(0)
p[f'p{beta}_{alpha}'] = p['freq'].groupby(p[i]).transform(f)
p = p.drop('freq', axis=1).set_index(index_col)
list_.append(p)
return pd.concat(list_, axis=1)
ge=get_p(g,m)
print(ge)
My dataset looks like. It is time series based dataset. Bid and fut are the log price of two markets.
bid fut
date
2020-05-04 09:15:01 9.810001 9.811235
2020-05-04 09:15:02 9.806426 9.807802
2020-05-04 09:15:03 9.803253 9.802976
2020-05-04 09:15:04 9.804358 9.805047
2020-05-04 09:15:05 9.805047 9.80794
Here is the error part. It is showing an unusal error even after giving the right input.
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 2
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
3 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 2