During handling of the above exception, another exception occurred Pandas - python

Could you please get me where I am going wrong in the below code. I am new in python. Even i call the pandas core library. While calling the function i got error at last.
import pandas as pd
import numpy as np
from scipy.optimize import Bounds
from scipy.optimize import minimize
from google.colab import files
from pandas._libs.index import IndexEngine
from pandas._libs.hashtable import PyObjectHashTable
f= files.upload()
df=pd.read_excel(r'symbol.xlsx',index_col='date',parse_dates=True)
df.head()
dir(df)
m=3
def get_edges(df, m):
edges = {}
series = df.columns
for s in series:
std = df[s].std()
edges[s] = [std * (-m / 2 + x) for x in range(m + 1)]
if edges[s][0] > df[s].min():
edges[s] = edges[s][1:]
edges[s].insert(0, df[s].min())
if edges[s][-1] < df[s].max():
edges[s] = edges[s][:-1]
edges[s].append(df[s].max())
return edges
edges = get_edges(df,m)
g = pd.DataFrame()
for key, value in edges.items():
g[key] = pd.cut(df[key],value,labels=False, include_lowest=True)
def get_p(g, m):
list_ = []
for i in range(m):
for j in range(m):
list_.append([i,j])
index_col = ['i','j']
p_index = pd.DataFrame(list_, columns=index_col)
series = g.columns
f = lambda x: x/x.sum()
list_ = []
for beta in series:
for alpha in series:
p = pd.concat([g[beta],g[alpha].shift(-1)],axis=1)[:-1].astype(dtype='int')
p.columns = index_col
p = pd.DataFrame(p.groupby(index_col).size(),columns=['freq']).reset_index()
p = p.merge(p_index, on=index_col, how='right').fillna(0)
p[f'p{beta}_{alpha}'] = p['freq'].groupby(p[i]).transform(f)
p = p.drop('freq', axis=1).set_index(index_col)
list_.append(p)
return pd.concat(list_, axis=1)
ge=get_p(g,m)
print(ge)
My dataset looks like. It is time series based dataset. Bid and fut are the log price of two markets.
bid fut
date
2020-05-04 09:15:01 9.810001 9.811235
2020-05-04 09:15:02 9.806426 9.807802
2020-05-04 09:15:03 9.803253 9.802976
2020-05-04 09:15:04 9.804358 9.805047
2020-05-04 09:15:05 9.805047 9.80794
Here is the error part. It is showing an unusal error even after giving the right input.
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 2
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
3 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 2

Related

Pandas KeyError, accessing column

I am trying to run this code:
(this will download the MNIST dataset to %HOME directory!)
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()
X, y = mnist["data"], mnist["target"]
import matplotlib as mpl
import matplotlib.pyplot as plt
some_digit = X[0] # **ERROR LINE** <---------
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()
I have this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-45-d5d685fca2de> in <module>
2 import matplotlib.pyplot as plt
3 import numpy as np
----> 4 some_digit = X[0]
5 some_digit_image = some_digit.reshape(28, 28)
6 plt.imshow(some_digit_image, cmap = mpl.cm.binary, interpolation="nearest")
~/.local/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/.local/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 0
Code example is from this book: Hands-on Machine Learning with Scikit-Learn, Keras, and TensorFlow
I tried X.iloc[0] but its also not working.
From your dataframe pic, there is no column header named 0. If you want to access column by index, you can use .iloc which is primarily integer position based:
df.iloc[:, 0]
Or access by column header list
df[df.columns[0]]

Key Error: 1 During handling of the above exception, another exception occurred

data1 = pd.read_csv("1-success 1.txt")
record = False
stop = False
distss = []
index = 1
while stop == False:
dists = float(data1[index])
index = index + 1
if dists > 5:
record = True
distss.append(dists)
if record == True and dists <5:
stop = True
len(distss)
This code is supposed to read a txt file and then in the next cell plot the points that were contained within the text file, the problem seems to be with reading the txt file, as the plotting works perfectly fine, the following error is given:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
D:\Lubertus\Apps\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method,
tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-3-646e69692594> in <module>
6 index = 1
7 while stop == False:
----> 8 dists = float(data1[index])
9 index = index + 1
10 if dists > 5:
D:\Lubertus\Apps\Anaconda\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
D:\Lubertus\Apps\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method,
tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 1
Any explanation and assistance would be much appreciated. This only happens when i run certain text files, and doesn't when I run others. At first i thought it might be with:
dists = float(data1.title[index])
Because it gave the error that data1 doesn't have a title attribute, taking this away gave the above error.
Use iloc function instead of direct index of the dataframe:
data1 = pd.read_csv("1-success 1.txt")
record = False
stop = False
distss = []
index = 1
while stop == False:
dists = float(data1.iloc[index])
index = index + 1
if dists > 5:
record = True
distss.append(dists)
if record == True and dists <5:
stop = True
len(distss)

Key Error : 'image' while adding new column to dataframe

I am doing a CNN project on google colab and I have uploaded the image dataset in google drive. After reading csv file for labels I have created a dataframe whose first five elements are as below:
image level
0 10_left 0
1 10_right 0
2 13_left 0
3 13_right 0
4 15_left 1
Now I need to create a column 'path' which contains path of each image.
base_image_dir = 'My Drive/Fist500'
import os
df['path'] = df['image'].map(lambda x: os.path.join(base_image_dir,'{}.jpg'.format(x)))
However on running this I get following error:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'image'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
2 frames
<ipython-input-72-cacf7c6ca99e> in <module>()
1 base_image_dir = 'My Drive/Fist500'
2 import os
----> 3 df['path'] = df['image'].map(lambda x: os.path.join(base_image_dir,'{}.jpg'.format(x)))
4
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'image'

Dict 'Key Error' when formatting dictionary in Python

I have 30 csv files where each file has it's own DataFrame (due to the requirements, I cannot merge the DataFrames). I want to have a dictionary, where the key is the name of the csv file and the value is the DataFrame itself. This is what I have for that:
import pandas as pd
import glob
import os
files = glob.glob('data\*.csv')
roster = {os.path.basename(fp).split('.')[0] : pd.read_csv(fp) for fp in files}
The CSV files have a column called 'Season' where the format is like this: '2018-19', '2017-18' and these values vary file to file. I want to only take rows that's after 1980. With help of jazrael from a previous question, I was able to use his suggestion. However, I am running into a KeyError. From my understanding, that means I am using the wrong column name or wrong key. However, both of those are correct. This is what my friend jazrael suggested:
dfs_dict = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
for k, v in dfs_dict.items()}
And this is my error:
KeyError Traceback (most recent call last)
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-2-8f59bae477f8> in <module>
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
<ipython-input-2-8f59bae477f8> in <dictcomp>(.0)
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
C:\Anaconda\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
I am quite new to Python, would appreciate it if anyone can explain what I am doing wrong :)

Unable to transform my input series and window-size into a set of input/output pairs for the RNN model

I am currently building a reccurent neural network model and i am currently stuck when i was about to transform my input data into a set on input/output for the RNN model.
I have tried the windoe_tranform_series function that takes the series, window_size and the stepsize as inputs but i keep getting a KEYERROR.
cutting our time series into sequences
The function below transforms the input series and window-size into a set #of input/output pairs for our RNN model.
def window_transform_series(series,window_size,step_size):
inputs = []
outputs = []
ctr = 0
for i in range(window_size, len(series), step_size):
inputs.append(series[ctr:i])
outputs.append(series[i])
ctr = ctr + step_size
return inputs,outputs
window_size = 7
step_size = 5
inputs, outputs = window_transform_series(carbon_persil,window_size,step_size)
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 7
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-9810d786d8b5> in <module>
2 window_size = 7
3 step_size = 5
----> 4 inputs, outputs = window_transform_series(carbon_persil,window_size,step_size)
<ipython-input-41-82e8b484e9e9> in window_transform_series(series, window_size, step_size)
9 for i in range(window_size, len(series), step_size):
10 inputs.append(series[ctr:i])
---> 11 outputs.append(series[i])
12 ctr = ctr + step_size
13 return inputs,outputs
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 7
Your series is not long enough. See the following example snippet.
import numpy as np
import pandas as pd
data = np.array(['a','b','c','d'])
s = pd.Series(data) # create dummy series
Now, print (s[2]) would print 'c' as the output.
But if you try to print something out of range, it gives the KeyError.
So, print (s[5]) here gives KeyError: 5. In your case, you start the for loop with window_size=7 and since the length of your series is less than 7, it gives KeyError: 7 on line outputs.append(series[i]).
Interestingly, this error doesn't happen when you try to slice the series with an out of range index.
E.g. if you try to do print (s[1:5]) in the example above, it would just print the following instead of the KeyError.
1 b
2 c
3 d
Therefore, the KeyError is bypassed in your inputs.append(series[ctr:i]) line.

Categories