data.get_data_yahoo throws error on some ticker symbols - python

running the pandas data_reader code throws an error on some stock symbols
running the following code:
import pandas as pd
import pandas_datareader as dr
%matplotlib inline
df = dr.data.get_data_yahoo('FRE.DE',start='2018-10-1', end='2018-11-30')
throws the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Date'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-17-d55e44b68e87> in <module>
----> 1 df = dr.data.get_data_yahoo('FRE.DE',start='2018-10-1', end='2018-11-30')
2 df
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas_datareader/data.py in get_data_yahoo(*args, **kwargs)
68
69 def get_data_yahoo(*args, **kwargs):
---> 70 return YahooDailyReader(*args, **kwargs).read()
71
72
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas_datareader/base.py in read(self)
208 if isinstance(self.symbols, (compat.string_types, int)):
209 df = self._read_one_data(self.url,
--> 210 params=self._get_params(self.symbols))
211 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
212 elif isinstance(self.symbols, DataFrame):
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas_datareader/yahoo/daily.py in _read_one_data(self, url, params)
140 prices.columns = [col.capitalize() for col in prices.columns]
141 prices['Date'] = to_datetime(
--> 142 to_datetime(prices['Date'], unit='s').dt.date)
143
144 if 'Data' in prices.columns:
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~/miniconda2/envs/py37/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Date'
when I replace 'FRE.DE' with 'FRM.DE' which is a different ticker symbol, it works perfect.
thinking, hmm, maybe the data for the symbol does not exists, I went to the yahoo finance page :
https://de.finance.yahoo.com/quote/FRE.DE/history?p=FRE.DE
and there the historic data if displayed.

For me your code actually works with FRE.DE, but doesn't work with FRM.DE.
which is consistent with the fact that
this page:
https://de.finance.yahoo.com/quote/FRE.DE
is found
and this page:
https://de.finance.yahoo.com/quote/FRM.DE
is not found

Related

KeyError in pandas to_datetime

I am loading a csv file which have datetime, when trying to convert using df['times'] = pd.to_datetime(df['times'], format='%Y-%m-%d %H:%M:%S') its giving keyerror. Also please find the of Jupyter notebook Screenshot
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'times'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-6-4f05a9476911> in <module>
----> 1 df['times'] = pd.to_datetime(df['times'], format='%Y-%m-%d %H:%M:%S')
2 df.dtypes
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~/Software/Anytrader/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'times'
From your screenshot, it looks like your column names include quotes. Try this:
df["'times'"] = pd.to_datetime(df["'times'"], format='%Y-%m-%d %H:%M:%S')
Alternatively (and probably better), you could strip the quotes from your column names right after loading the data from the file.
df.columns = df.columns.str.strip("'")

Error in sklearn : grid_ridge_m.cv_results_

I am using scikit-learn version is 0.22.1. and I am getting error at grid_scores(0.18) and cv_results(0.18+) since I have sklearn 0.22 I used cv_result..
fig,ax= plt.subplots()
fig.set_size_inches(12,5)
#df = pd.DataFrame(grid_ridge_m.grid_scores_)
df = pd.DataFrame(grid_ridge_m.cv_results_)
df["alpha"] = df["parameters"].apply(lambda x:x["alpha"])
df["rmsle"] = df["mean_validation_score"].apply(lambda x:-x)
sn.pointplot(data=df,x="alpha",y="rmsle",ax=ax)
ERROR : when I am using (grid_ridge_m.cv_results_) I am getting below error
KeyError Traceback (most recent call last)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'parameters'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-46-69e67dace19f> in <module>
17 #df = pd.DataFrame(grid_ridge_m.grid_scores_)
18 df = pd.DataFrame(grid_ridge_m.cv_results_)
---> 19 df["alpha"] = df["parameters"].apply(lambda x:x["alpha"])
20 df["rmsle"] = df["mean_validation_score"].apply(lambda x:-x)
21 sn.pointplot(data=df,x="alpha",y="rmsle",ax=ax)
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'parameters'

Key Error : 'image' while adding new column to dataframe

I am doing a CNN project on google colab and I have uploaded the image dataset in google drive. After reading csv file for labels I have created a dataframe whose first five elements are as below:
image level
0 10_left 0
1 10_right 0
2 13_left 0
3 13_right 0
4 15_left 1
Now I need to create a column 'path' which contains path of each image.
base_image_dir = 'My Drive/Fist500'
import os
df['path'] = df['image'].map(lambda x: os.path.join(base_image_dir,'{}.jpg'.format(x)))
However on running this I get following error:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'image'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
2 frames
<ipython-input-72-cacf7c6ca99e> in <module>()
1 base_image_dir = 'My Drive/Fist500'
2 import os
----> 3 df['path'] = df['image'].map(lambda x: os.path.join(base_image_dir,'{}.jpg'.format(x)))
4
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
-> 2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'image'

KeyError: 'Message' in Jupyter Notebook

I am new to machine learning and I am facing this issue.
I have uploaded dataset with two columns and headings 'Message' and 'Priority'.
when I run this command,I get this:
'df.columns'
'Index(['Message\tPriority'], dtype='object')'
But, when I run this command, I get the following error:
X = df['Message']
ylabels = df['Priority']
KeyError Traceback (most recent call last)
D:\anna\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Message'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-72-238deec7e797> in <module>
1 # Features and Labels
----> 2 X = df['Message']
3 ylabels = df['Priority']
D:\anna\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
D:\anna\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Message'
I will guess.
Line Index(['Message\tPriority'] shows that this data uses tab \t as separator but standard read_csv() use , as separator and you have to add option sep="\t"
df = pd.read_csv(filename, sep="\t")

Dict 'Key Error' when formatting dictionary in Python

I have 30 csv files where each file has it's own DataFrame (due to the requirements, I cannot merge the DataFrames). I want to have a dictionary, where the key is the name of the csv file and the value is the DataFrame itself. This is what I have for that:
import pandas as pd
import glob
import os
files = glob.glob('data\*.csv')
roster = {os.path.basename(fp).split('.')[0] : pd.read_csv(fp) for fp in files}
The CSV files have a column called 'Season' where the format is like this: '2018-19', '2017-18' and these values vary file to file. I want to only take rows that's after 1980. With help of jazrael from a previous question, I was able to use his suggestion. However, I am running into a KeyError. From my understanding, that means I am using the wrong column name or wrong key. However, both of those are correct. This is what my friend jazrael suggested:
dfs_dict = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
for k, v in dfs_dict.items()}
And this is my error:
KeyError Traceback (most recent call last)
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-2-8f59bae477f8> in <module>
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
<ipython-input-2-8f59bae477f8> in <dictcomp>(.0)
1 league = {k:v[v['Season'].str.extract('(\d{4})', expand=False).astype(float) > 1980]
----> 2 for k, v in league.items()}
3
4
5 #BOS[BOS['Season'].str.split('-').str[0].astype(int) < 2017
C:\Anaconda\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Season'
I am quite new to Python, would appreciate it if anyone can explain what I am doing wrong :)

Categories