While running a code block I found the following error:
KeyError Traceback (most recent call last)
<ipython-input-5-1031c1f0832d> in <module>
19 TBM_data=pd.read_csv(files, header=0, sep='\t')
20 df_tbm=pd.DataFrame(TBM_data)
---> 21 df_tbmnew=df_tbm.loc[(df_tbm[124]!=0) & (df_tbm[140]!=0) & (df_tbm[141]!=0),[1,124,140,141]]
22 df_tbmnew['id']=df_tbmnew.index
~\Miniconda3\envs\tensorflow\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~\Miniconda3\envs\tensorflow\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 124
I am trying to read multiple .txt files inside a for loop and create subsets from those files using df.loc in Python. The code block is:
files_path=r"E:\CSM Research\Jilin data work\Pre-collapse segments data\1. Stakes (70817-70800)"
read_files=glob.glob(os.path.join(files_path,"*.txt"))
for files in read_files:
x=x+1
TBM_data=pd.read_csv(files, header=0, sep='\t')
df_tbm=pd.DataFrame(TBM_data)
df_tbmnew=df_tbm.loc[(df_tbm[124]!=0) & (df_tbm[140]!=0) & (df_tbm[141]!=0),[1,124,140,141]]
header=0 means to take first line of csv as column names, if you need pandas to auto-numerate column names then you need to pass header=None. Also as now you need probably to skip first csv row with foreign column names you should also pass second param skiprows=1.
Related
I have been trying to implement z-score normalization to all of the numeric values present in combined_data with the following code:
from scipy.stats import zscore
# Calculate the zscores and drop zscores into new column
combined_data['zscore'] = zscore(combined_data['zscore'])
Here, combined_data is the combination of training and testing datasets as a dataframe and passed through one-hot encoding.
I am seeing the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/pandas/core/indexes/base.py:2646, in Index.get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
File pandas/_libs/index.pyx:111, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/index.pyx:138, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:1619, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:1627, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'zscore'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
Input In [29], in <cell line: 2>()
1 # Calculate the zscores and drop zscores into new column
----> 2 combined_data['zscore'] = zscore(combined_data['zscore'])
File ~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/pandas/core/frame.py:2800, in DataFrame.__getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
File ~/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/pandas/core/indexes/base.py:2648, in Index.get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
File pandas/_libs/index.pyx:111, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/index.pyx:138, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:1619, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:1627, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'zscore'
The dataset combined_data contains 257673 rows & 198 columns
Here is the sample data of combined_data
id dur spkts dpkts sbytes dbytes rate sttl dttl sload ... state_CLO state_CON state_ECO state_FIN state_INT state_PAR state_REQ state_RST state_URN state_no
60662 60663 1.193334 10 10 608 646 15.921779 254 252 3673.740967 ... 0 0 0 1 0 0 0 0 0 0
image of sample data
I am new to such error. What am I doing wrong?
[UPDATE: The code was trying to create a separate column with with zscore which is not possible to do so as it is mentioned below]
You should apply the function zscore to the whole dataframe, not to a non-existent column:
result = zscore(combined_data)
The result is a numpy array. You cannot make it a column of the original dataframe. But you can create another DataFrame:
pd.DataFrame(result, columns=combined_data.columns, index=combined_data.index)
I am new to machine learning. I have downloaded a breast cancer data set to try to make a classification on it.
When I try to print the head function (data.head()) I get this
id;diagnosis;radius_mean;texture_mean;perimeter_mean;area_mean;smoothness_mean;compactness_mean;concavity_mean;concave points_mean;symmetry_mean;fractal_dimension_mean;radius_se;texture_se;perimeter_se;area_se;smoothness_se;compactness_se;concavity_se;concave points_se;symmetry_se;fractal_dimension_se;radius_worst;texture_worst;perimeter_worst;area_worst;smoothness_worst;compactness_worst;concavity_worst;concave points_worst;symmetry_worst;fractal_dimension_worst
When I try to make the id column the labels using the following code code :
train_labels = data['id'].values
it doesn't work. Instead it shows this long error message:
KeyError Traceback (most recent call last)
C:\Users\win10\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: ';id'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-45-23e840b4d870> in <module>
----> 1 train_labels = data[';id'].values
C:\Users\win10\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2978 if self.columns.nlevels > 1:
2979 return self._getitem_multilevel(key)
-> 2980 indexer = self.columns.get_loc(key)
2981 if is_integer(indexer):
2982 indexer = [indexer]
C:\Users\win10\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'id'
So how can I get any specific column in my data set?
It seems that you've read the file using a at least a wrong separator (sep). By default pd.read_csv uses , and in data.head() ; separators are visible. If you are using pd.read_csv, try passing sep=';'.
The code you are using for accessing a specific column (data['id']) seems otherwise correct.
I hope this helps!
The dataset is not comma separated, but it's semicolon separated so you have to pass a parameter in read_csv function which is sep =';' and this will solve your problem.
data = pd.read_csv('file_name', sep = ';')
data['column_name']
You'll be able to fetch the column for this dataset.
I'm currently learning Data Analysis with Pandas. I was practicing indexing and slicing data frames, and I imported a CSV file named 'supermarkets.csv' using the read_csv() which was successful. Now I want to slice the data frame but I intend to use the Address Column which is the most unique column in the file as the index using the set_index() function but I keep getting error anytime I update it to a variable. All within Jupyter Notebook.
The Code:
import pandas
dframe = pandas.read_csv("supermarket.csv")
dframe.set_index("Address") #the outputted the dataframe with the new index
dframe = dframe.set_index("Address") #this is where the issue keeps coming up
The Error Message:
KeyError Traceback (most recent call last)
c:\program files\python35\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Address'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-16-469e201b9d49> in <module>
----> 1 dframe.set_index("Address")
c:\program files\python35\lib\site-packages\pandas\core\frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
4176 names.append(None)
4177 else:
-> 4178 level = frame[col]._values
4179 names.append(col)
4180 if drop:
c:\program files\python35\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
c:\program files\python35\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Address'
I've tried to use this format but to no avail data.set_index('Address',inplace=True)
I have 30 csv files where each file has it's own DataFrame (due to the requirements, I cannot merge the DataFrames). I want to have a dictionary, where the key is the name of the csv file and the value is the DataFrame itself. This is what I have for that:
import pandas as pd
import glob
import os
files = glob.glob('data\*.csv')
roster = {os.path.basename(fp).split('.')[0] : pd.read_csv(fp) for fp in files}
The CSV files have a column called 'Season' where the format is like this: '2018-19', '2017-18' and these values vary file to file. I want to only take rows that's after 1980. With help of jazrael from a previous question, I was able to use his suggestion. However, I am running into a KeyError. From my understanding, that means I am using the wrong column name or wrong key. However, both of those are correct. This is what my friend jazrael suggested:
dfs_dict = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
for k, v in dfs_dict.items()}
And this is my error:
KeyError Traceback (most recent call last)
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-2-8f59bae477f8> in <module>
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
<ipython-input-2-8f59bae477f8> in <dictcomp>(.0)
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
C:\Anaconda\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
I am quite new to Python, would appreciate it if anyone can explain what I am doing wrong :)
running the pandas data_reader code throws an error on some stock symbols
running the following code:
import pandas as pd
import pandas_datareader as dr
%matplotlib inline
df = dr.data.get_data_yahoo('FRE.DE',start='2018-10-1', end='2018-11-30')
throws the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Date'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-17-d55e44b68e87> in <module>
----> 1 df = dr.data.get_data_yahoo('FRE.DE',start='2018-10-1', end='2018-11-30')
2 df
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas_datareader/data.py in get_data_yahoo(*args, **kwargs)
68
69 def get_data_yahoo(*args, **kwargs):
---> 70 return YahooDailyReader(*args, **kwargs).read()
71
72
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas_datareader/base.py in read(self)
208 if isinstance(self.symbols, (compat.string_types, int)):
209 df = self._read_one_data(self.url,
--> 210 params=self._get_params(self.symbols))
211 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
212 elif isinstance(self.symbols, DataFrame):
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas_datareader/yahoo/daily.py in _read_one_data(self, url, params)
140 prices.columns = [col.capitalize() for col in prices.columns]
141 prices['Date'] = to_datetime(
--> 142 to_datetime(prices['Date'], unit='s').dt.date)
143
144 if 'Data' in prices.columns:
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Date'
when I replace 'FRE.DE' with 'FRM.DE' which is a different ticker symbol, it works perfect.
thinking, hmm, maybe the data for the symbol does not exists, I went to the yahoo finance page :
https://de.finance.yahoo.com/quote/FRE.DE/history?p=FRE.DE
and there the historic data if displayed.
For me your code actually works with FRE.DE, but doesn't work with FRM.DE.
which is consistent with the fact that
this page:
https://de.finance.yahoo.com/quote/FRE.DE
is found
and this page:
https://de.finance.yahoo.com/quote/FRM.DE
is not found