Getting KeyError with pandax corr matrix - python

I read a csv file using pandas, i can read all the columns from the data frame but am getting a tricky keyerror error when i try to check for the correlation matrix for some features.
def read_csv(path):
return pd.read_csv(path, skipinitialspace = True)
data = read_csv(r'C:\Users\SAM\Documents\PYTHON\MACHINE\dataset\data.csv')
data['Release Clause'][0]
>>> '€226.5M' #works fine
corr_matrix = data.corr()
corr_matrix['Release Clause'].sort_values(ascending=False)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\ProgramData\Miniconda3\envs\machine\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3078 return self._engine.get_loc(key)
3079 except KeyError:
-> 3080 return self._engine.get_loc(self._maybe_cast_indexer(key))
3081
3082 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Release Clause'

Related

KeyError in pandas to_datetime

I am loading a csv file which have datetime, when trying to convert using df['times'] = pd.to_datetime(df['times'], format='%Y-%m-%d %H:%M:%S') its giving keyerror. Also please find the of Jupyter notebook Screenshot
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'times'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-6-4f05a9476911> in <module>
----> 1 df['times'] = pd.to_datetime(df['times'], format='%Y-%m-%d %H:%M:%S')
2 df.dtypes
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'times'
From your screenshot, it looks like your column names include quotes. Try this:
df["'times'"] = pd.to_datetime(df["'times'"], format='%Y-%m-%d %H:%M:%S')
Alternatively (and probably better), you could strip the quotes from your column names right after loading the data from the file.
df.columns = df.columns.str.strip("'")

How to select a specific column in excel using python?

I am new to machine learning. I have downloaded a breast cancer data set to try to make a classification on it.
When I try to print the head function (data.head()) I get this
id;diagnosis;radius_mean;texture_mean;perimeter_mean;area_mean;smoothness_mean;compactness_mean;concavity_mean;concave points_mean;symmetry_mean;fractal_dimension_mean;radius_se;texture_se;perimeter_se;area_se;smoothness_se;compactness_se;concavity_se;concave points_se;symmetry_se;fractal_dimension_se;radius_worst;texture_worst;perimeter_worst;area_worst;smoothness_worst;compactness_worst;concavity_worst;concave points_worst;symmetry_worst;fractal_dimension_worst
When I try to make the id column the labels using the following code code :
train_labels = data['id'].values
it doesn't work. Instead it shows this long error message:
KeyError Traceback (most recent call last)
C:\Users\win10\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: ';id'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-23e840b4d870> in <module>
----> 1 train_labels = data[';id'].values
C:\Users\win10\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2978 if self.columns.nlevels > 1:
2979 return self._getitem_multilevel(key)
-> 2980 indexer = self.columns.get_loc(key)
2981 if is_integer(indexer):
2982 indexer = [indexer]
C:\Users\win10\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'id'
So how can I get any specific column in my data set?
It seems that you've read the file using a at least a wrong separator (sep). By default pd.read_csv uses , and in data.head() ; separators are visible. If you are using pd.read_csv, try passing sep=';'.
The code you are using for accessing a specific column (data['id']) seems otherwise correct.
I hope this helps!
The dataset is not comma separated, but it's semicolon separated so you have to pass a parameter in read_csv function which is sep =';' and this will solve your problem.
data = pd.read_csv('file_name', sep = ';')
data['column_name']
You'll be able to fetch the column for this dataset.

Error in sklearn : grid_ridge_m.cv_results_

I am using scikit-learn version is 0.22.1. and I am getting error at grid_scores(0.18) and cv_results(0.18+) since I have sklearn 0.22 I used cv_result..
fig,ax= plt.subplots()
fig.set_size_inches(12,5)
#df = pd.DataFrame(grid_ridge_m.grid_scores_)
df = pd.DataFrame(grid_ridge_m.cv_results_)
df["alpha"] = df["parameters"].apply(lambda x:x["alpha"])
df["rmsle"] = df["mean_validation_score"].apply(lambda x:-x)
sn.pointplot(data=df,x="alpha",y="rmsle",ax=ax)
ERROR : when I am using (grid_ridge_m.cv_results_) I am getting below error
KeyError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'parameters'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-46-69e67dace19f> in <module>
17 #df = pd.DataFrame(grid_ridge_m.grid_scores_)
18 df = pd.DataFrame(grid_ridge_m.cv_results_)
---> 19 df["alpha"] = df["parameters"].apply(lambda x:x["alpha"])
20 df["rmsle"] = df["mean_validation_score"].apply(lambda x:-x)
21 sn.pointplot(data=df,x="alpha",y="rmsle",ax=ax)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'parameters'

How to fix KeyError: 'Address' in jupyter notebook

I'm currently learning Data Analysis with Pandas. I was practicing indexing and slicing data frames, and I imported a CSV file named 'supermarkets.csv' using the read_csv() which was successful. Now I want to slice the data frame but I intend to use the Address Column which is the most unique column in the file as the index using the set_index() function but I keep getting error anytime I update it to a variable. All within Jupyter Notebook.
The Code:
import pandas
dframe = pandas.read_csv("supermarket.csv")
dframe.set_index("Address") #the outputted the dataframe with the new index
dframe = dframe.set_index("Address") #this is where the issue keeps coming up
The Error Message:
KeyError Traceback (most recent call last)
c:\program files\python35\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Address'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-16-469e201b9d49> in <module>
----> 1 dframe.set_index("Address")
c:\program files\python35\lib\site-packages\pandas\core\frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
4176 names.append(None)
4177 else:
-> 4178 level = frame[col]._values
4179 names.append(col)
4180 if drop:
c:\program files\python35\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
c:\program files\python35\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Address'
I've tried to use this format but to no avail data.set_index('Address',inplace=True)

KeyError: 'Message' in Jupyter Notebook

I am new to machine learning and I am facing this issue.
I have uploaded dataset with two columns and headings 'Message' and 'Priority'.
when I run this command,I get this:
'df.columns'
'Index(['Message\tPriority'], dtype='object')'
But, when I run this command, I get the following error:
X = df['Message']
ylabels = df['Priority']
KeyError Traceback (most recent call last)
D:\anna\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Message'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-72-238deec7e797> in <module>
1 # Features and Labels
----> 2 X = df['Message']
3 ylabels = df['Priority']
D:\anna\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
D:\anna\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Message'
I will guess.
Line Index(['Message\tPriority'] shows that this data uses tab \t as separator but standard read_csv() use , as separator and you have to add option sep="\t"
df = pd.read_csv(filename, sep="\t")

Categories