I am new to machine learning and I am facing this issue.
I have uploaded dataset with two columns and headings 'Message' and 'Priority'.
when I run this command,I get this:
'df.columns'
'Index(['Message\tPriority'], dtype='object')'
But, when I run this command, I get the following error:
X = df['Message']
ylabels = df['Priority']
KeyError Traceback (most recent call last)
D:\anna\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Message'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-72-238deec7e797> in <module>
1 # Features and Labels
----> 2 X = df['Message']
3 ylabels = df['Priority']
D:\anna\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
D:\anna\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Message'
I will guess.
Line Index(['Message\tPriority'] shows that this data uses tab \t as separator but standard read_csv() use , as separator and you have to add option sep="\t"
df = pd.read_csv(filename, sep="\t")
Related
I am loading a csv file which have datetime, when trying to convert using df['times'] = pd.to_datetime(df['times'], format='%Y-%m-%d %H:%M:%S') its giving keyerror. Also please find the of Jupyter notebook Screenshot
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'times'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-6-4f05a9476911> in <module>
----> 1 df['times'] = pd.to_datetime(df['times'], format='%Y-%m-%d %H:%M:%S')
2 df.dtypes
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'times'
From your screenshot, it looks like your column names include quotes. Try this:
df["'times'"] = pd.to_datetime(df["'times'"], format='%Y-%m-%d %H:%M:%S')
Alternatively (and probably better), you could strip the quotes from your column names right after loading the data from the file.
df.columns = df.columns.str.strip("'")
I am using scikit-learn version is 0.22.1. and I am getting error at grid_scores(0.18) and cv_results(0.18+) since I have sklearn 0.22 I used cv_result..
fig,ax= plt.subplots()
fig.set_size_inches(12,5)
#df = pd.DataFrame(grid_ridge_m.grid_scores_)
df = pd.DataFrame(grid_ridge_m.cv_results_)
df["alpha"] = df["parameters"].apply(lambda x:x["alpha"])
df["rmsle"] = df["mean_validation_score"].apply(lambda x:-x)
sn.pointplot(data=df,x="alpha",y="rmsle",ax=ax)
ERROR : when I am using (grid_ridge_m.cv_results_) I am getting below error
KeyError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'parameters'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-46-69e67dace19f> in <module>
17 #df = pd.DataFrame(grid_ridge_m.grid_scores_)
18 df = pd.DataFrame(grid_ridge_m.cv_results_)
---> 19 df["alpha"] = df["parameters"].apply(lambda x:x["alpha"])
20 df["rmsle"] = df["mean_validation_score"].apply(lambda x:-x)
21 sn.pointplot(data=df,x="alpha",y="rmsle",ax=ax)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'parameters'
I am doing a CNN project on google colab and I have uploaded the image dataset in google drive. After reading csv file for labels I have created a dataframe whose first five elements are as below:
image level
0 10_left 0
1 10_right 0
2 13_left 0
3 13_right 0
4 15_left 1
Now I need to create a column 'path' which contains path of each image.
base_image_dir = 'My Drive/Fist500'
import os
df['path'] = df['image'].map(lambda x: os.path.join(base_image_dir,'{}.jpg'.format(x)))
However on running this I get following error:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'image'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
2 frames
<ipython-input-72-cacf7c6ca99e> in <module>()
1 base_image_dir = 'My Drive/Fist500'
2 import os
----> 3 df['path'] = df['image'].map(lambda x: os.path.join(base_image_dir,'{}.jpg'.format(x)))
4
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'image'
I'm currently learning Data Analysis with Pandas. I was practicing indexing and slicing data frames, and I imported a CSV file named 'supermarkets.csv' using the read_csv() which was successful. Now I want to slice the data frame but I intend to use the Address Column which is the most unique column in the file as the index using the set_index() function but I keep getting error anytime I update it to a variable. All within Jupyter Notebook.
The Code:
import pandas
dframe = pandas.read_csv("supermarket.csv")
dframe.set_index("Address") #the outputted the dataframe with the new index
dframe = dframe.set_index("Address") #this is where the issue keeps coming up
The Error Message:
KeyError Traceback (most recent call last)
c:\program files\python35\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Address'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-16-469e201b9d49> in <module>
----> 1 dframe.set_index("Address")
c:\program files\python35\lib\site-packages\pandas\core\frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
4176 names.append(None)
4177 else:
-> 4178 level = frame[col]._values
4179 names.append(col)
4180 if drop:
c:\program files\python35\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
c:\program files\python35\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Address'
I've tried to use this format but to no avail data.set_index('Address',inplace=True)
I have 30 csv files where each file has it's own DataFrame (due to the requirements, I cannot merge the DataFrames). I want to have a dictionary, where the key is the name of the csv file and the value is the DataFrame itself. This is what I have for that:
import pandas as pd
import glob
import os
files = glob.glob('data\*.csv')
roster = {os.path.basename(fp).split('.')[0] : pd.read_csv(fp) for fp in files}
The CSV files have a column called 'Season' where the format is like this: '2018-19', '2017-18' and these values vary file to file. I want to only take rows that's after 1980. With help of jazrael from a previous question, I was able to use his suggestion. However, I am running into a KeyError. From my understanding, that means I am using the wrong column name or wrong key. However, both of those are correct. This is what my friend jazrael suggested:
dfs_dict = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
for k, v in dfs_dict.items()}
And this is my error:
KeyError Traceback (most recent call last)
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-2-8f59bae477f8> in <module>
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
<ipython-input-2-8f59bae477f8> in <dictcomp>(.0)
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
C:\Anaconda\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
I am quite new to Python, would appreciate it if anyone can explain what I am doing wrong :)