everyone! I'm going through this course and am having issues. The line I'm having problems with is
df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
You can find this in the def process_data_for_labels(ticker): function. Can anyone tell me what's going on? I copied his code exactly and am getting the same error.
import bs4 as bs
import requests
import pickle
import datetime as dt
import os
import pandas as pd
import pandas_datareader. data as web
import time
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
from collections import Counter
style.use('dark_background')
def save_sp500_tickers():
resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class':'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker.rstrip())
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
#save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2015, 1, 1)
end = dt.datetime(2020, 7, 1)
for ticker in tickers:
if not os.path.exists('stock_dfs/{ticker}.csv'):
if '.' in ticker:
ticker = ticker.replace('.', '-')
time.sleep(1)
print(ticker)
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print(f'Already have {ticker}')
#get_data_from_yahoo()
def compile_data():
with open("sp500tickers.pickle","rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count,ticker in enumerate(tickers):
if '.' in ticker:
ticker = ticker.replace('.', '-')
df = pd.read_csv(f'stock_dfs/{ticker}.csv')
df.set_index('Date', inplace=True)
df.rename(columns={'Adj Close':ticker}, inplace=True)
df.drop(['Open','High','Low','Close','Volume'],1,inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
#compile_data()
def visualize_data():
df = pd.read_csv('sp500_joined_closes.csv')
#df['AAPL'].plot()
#plt.show()
df_corr = df.corr()
print(df_corr.head())
data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)
plt.xticks(rotation=90)
heatmap.set_clim(-1, 1)
plt.tight_layout()
plt.show()
#visualize_data()
# Machine Learning
def process_data_for_labels(ticker):
hm_days = 7
df = pd.read_csv('sp500_joined_closes.csv', index_col=0)
tickers = df.columns.values.tolist()
df.fillna(0,inplace=True)
for i in range(1, hm_days+1):
df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
df.fillna(0, inplace=True)
return tickers, df
def buy_sell_hold(*args):
cols = [c for c in args]
requirement = 0.2
for col in cols:
if col > requirement:
return 1
if col < -requirement:
return -1
return 0
def extract_featuresets(ticker):
tickers, df = process_data_for_labels(ticker)
end = [eval(f"df[f'{ticker}_{i}']") for i in range(1, 8)]
df[f'{ticker}_target'] = list(map(
buy_sell_hold,
[exec(f"df[f'{ticker}_{i}']") for i in range(1, 8)]
))
vals = df[f'{ticker}_target'].values.tolist()
str_vals = [str(i) for i in vals]
print('Data spread: ', Counter(str_vals))
df.fillna(0, inplace=True)
df = df.replace([np.inf, -np.inf], np.nan)
df.dropna(inplace=True)
df_vals = df[[ticker for ticker in tickers]].pct_change()
df_vals = df_vals.replace([np.inf, -np.inf], 0)
df_vals.fillna(0, inplace=True)
X = df_vals.values
y = df['{ticker}_target'].values
return X,y,df
extract_featuresets('APPL')
Error:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'APPL'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 176, in <module>
extract_featuresets('APPL')
File "test.py", line 152, in extract_featuresets
tickers, df = process_data_for_labels(ticker)
File "test.py", line 132, in process_data_for_labels
df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'APPL'
You've identified correctly where the problem begins but you need to continue to follow the breadcrumbs.
The error says "KeyError: 'APPL'", where 'APPL' is one of the stock tickers and are column names / keys to your stock price dataframe 'df' (at least the program expects that). However in this instance, 'df' doesn't contain the key / header of 'APPL'. Maybe something went wrong when loading the data where 'pd.read_csv' is used to read the CSV file? Or maybe the file itself is missing data?
Try open a python terminal and simply load the CSV file, is it what you (or the program) would expect?
Keep digging!
Related
here is the mismatch error I keep getting. I'm inputting "202710".
Traceback (most recent call last):
File "nbastatsrecieveit.py", line 29, in <module>
df.columns = headers
File "C:\Users\*\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\generic.py", line 5149, in __setattr__
return object.__setattr__(self, name, value)
File "pandas\_libs\properties.pyx", line 66, in pandas._libs.properties.AxisProperty.__set__
File "C:\Users\*\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\generic.py", line 564, in _set_axis
self._mgr.set_axis(axis, labels)
File "C:\Users\*\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\internals\managers.py", line 226, in set_axis
raise ValueError(
ValueError: Length mismatch: Expected axis has 0 elements, new values have 24 elements
To be honest, I'm not sure as to how to go about fixing this problem as it works with specific player IDs but not all of then. Here is the rest of my code:
from nba_api.stats.endpoints import shotchartdetail
import pandas as pd
import json
from openpyxl import Workbook
print('Player ID?')
playerid = input()
filename = str(playerid) + '.xlsx'
response = shotchartdetail.ShotChartDetail(
team_id= 0,
context_measure_simple = 'FGA',
#last_n_games = numGames,
game_id_nullable = '0041900403',
player_id= playerid
)
content = json.loads(response.get_json())
# transform contents into dataframe
results = content['resultSets'][0]
headers = results['headers']
rows = results['rowSet']
#df = pd.DataFrame(rows)
df = pd.DataFrame(rows)
df.columns = headers
# write to excel file
df.to_excel(filename, index=False)
This is because your df is empty for ID 202710. Exception handling will resolve the issue here-
df = pd.DataFrame(rows)
try:
df.columns = headers
except:
pass
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web
import pickle
import requests
def save_sp500_tickers():
resp =
requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
return tickers
# save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2010, 1, 1)
end = dt.datetime.now()
for ticker in tickers:
# just in case your connection breaks, we'd like to save our progress!
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = web.DataReader(ticker, 'yahoo', start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df = df.drop("Symbol", axis=1)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
def compile_data():
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
error occurs here it states "File stock_dfs/BRK.B.csv does not exist" but it wasnt imported / stored locally in the first place so why is this an issue? full error at the bottom
df.set_index('Date', inplace=True)
df.rename(columns={'Adj Close': ticker}, inplace=True)
df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
compile_data()
pandas_datareader\compat__init__.py:7: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
from pandas.util.testing import assert_frame_equal
0
10
20
30
40
50
60
Traceback (most recent call last):
File "C:\Users\Desktop\Python tutorials\Python finance Examples\py3tutorialSP500manip.py", line 75, in
compile_data()
File "C:\Users\Desktop\Python tutorials\Python finance Examples\py3tutorialSP500manip.py", line 57, in compile_data
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
File "C:\Users\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 676, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 448, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:\Users\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 880, in init
self._make_engine(self.engine)
File "C:\Users\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 1114, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\parsers.py", line 1891, in init
self._reader = parsers.TextReader(src, **kwds)
File "pandas_libs\parsers.pyx", line 374, in pandas._libs.parsers.TextReader.cinit
File "pandas_libs\parsers.pyx", line 674, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] File stock_dfs/BRK.B.csv does not exist: 'stock_dfs/BRK.B.csv'
following this tutorial:
https://pythonprogramming.net/combining-stock-prices-into-one-dataframe-python-programming-for-finance/
Right before the error occurs, a call is made to pd.read_csv():
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
The error occurs when ticker is 'BRK.B' and the program attempts to set read data from 'stocks_dfs/BRK.B.csv'.
The error message is saying that there is no stocks_dfs/BRK.B.csv file on your machine. This is puzzling since this bit of code should have downloaded all the necessary files:
for ticker in tickers:
# just in case your connection breaks, we'd like to save our progress!
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = web.DataReader(ticker, 'yahoo', start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df = df.drop("Symbol", axis=1)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
Make sure you ran the download code (directly above) in the same directory you are running the read code (top). To run a quick check, see if there exists a folder called stock_dfs/ in you working directory. That folder should contain files like GOOGL.csv, FB.csv, and specifically BRK.B.csv.
The purpose of this script is to read a csv file.
The file contains forex data.
The file has 7 columns Date, Time, Open, High, Low, Close and Volume, and around 600k rows.
After scraping the date and time the script must will make some date time calculation like month and day.
Then some technical analysis using TA-LIB library.
Here is the code:
import pandas as pd
import talib
class Data:
def __init__(self):
self.df = pd.DataFrame()
self.names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
self.open = self.df['Open'].astype(float)
self.high = self.df['High'].astype(float)
self.low = self.df['Low'].astype(float)
self.close = self.df['Close'].astype(float)
def file(self, file):
self.df = pd.read_csv(file, names=self.names,
parse_dates={'Release Date': ['Date', 'Time']})
return self.df
def date(self):
self.df['Release Date'] = pd.to_datetime(self.df['Release Date'])
def year(self):
self.df['year'] = pd.to_datetime(self.df['Release Date']).dt.year
def month(self):
self.df['year'] = pd.to_datetime(self.df['Release Date']).dt.month
def day(self):
self.df['year'] = pd.to_datetime(self.df['Release Date']).dt.day
def dema(self):
# DEMA - Double Exponential Moving Average
self.df['DEMA'] = talib.DEMA(self.close, timeperiod=30)
def ema(self):
# EMA - Exponential Moving Average
self.df['EMA'] = talib.EMA(self.close, timeperiod=30)
def HT_TRENDLINE(self):
# HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
self.df['HT_TRENDLINE '] = talib.HT_TRENDLINE(self.close)
def KAMA(self):
# KAMA - Kaufman Adaptive Moving Average
self.df['KAMA'] = talib.KAMA(self.close, timeperiod=30)
def ma(self):
# MA - Moving average
self.df['MA'] = talib.MA(self.close, timeperiod=30, matype=0)
def print(self):
return print(self.df.head())
x = Data()
x.file(r"D:\Projects\Project Forex\USDJPY.csv")
x.print()
Here is the error:
Traceback (most recent call last):
File "C:\Users\Sayed\miniconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Open'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Sayed/PycharmProjects/project/Technical Analysis.py", line 55, in <module>
x = Data()
File "C:/Users/Sayed/PycharmProjects/project/Technical Analysis.py", line 9, in __init__
self.open = self.df['Open'].astype(float)
File "C:\Users\Sayed\miniconda3\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Sayed\miniconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Open'
In the __init__ function you are initializing empty DataFrame without any columns. But 1 line after, you are trying to convert Open column of the DataFrame to float.
def __init__(self):
self.df = pd.DataFrame() # No columns
self.names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
self.open = self.df['Open'].astype(float) # ERROR: 'Open' column does not exist
self.high = self.df['High'].astype(float)
self.low = self.df['Low'].astype(float)
self.close = self.df['Close'].astype(float)
Change you init function to this and it should work!
def __init__(self):
self.names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
self.df = pd.DataFrame(columns=self.names) # Empty dataframe with columns
self.open = self.df['Open'].astype(float) # Now 'Open' column exists
self.high = self.df['High'].astype(float)
self.low = self.df['Low'].astype(float)
self.close = self.df['Close'].astype(float)
I am currently working on world religions data and hoping to organise a dataframe which gives me ['name of country', 'country most adhered religion', 'number of adherence'], however, I encountered an error message. Below is my code.
'''
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
import mapclassify
import pyproj
from pyproj import Proj
from matplotlib.patches import Ellipse, Polygon
import datetime
import numpy as np
countries = geopandas.read_file('../data/world/ne_admin_0_countries.geojson')
hse_size = pd.read_csv('../data/world/houseshold_size_2018.csv', skiprows=4, header=0)
rlgn_adhere = pd.read_csv('../data/world/WRP_national.csv', header=0)
religion_cat = []
rlgn_adhere_top = list(rlgn_adhere.columns.values)
for i in range(3,38):
religion_cat.append(rlgn_adhere_top[i])
country_rlgn_adhere = rlgn_adhere.groupby(['name'], as_index=False)
lastest_rlgn_adhere = country_rlgn_adhere['year'].max()
country_latest_adhere = lastest_rlgn_adhere.merge(rlgn_adhere, on=['year', 'name'], how='left')
col_latest_rlgn_pop = ['year', 'name'] + religion_cat
latest_rlgn_pop = country_latest_adhere[col_latest_rlgn_pop]
pop_rlgn = ''
pop_rlgn_cat_num = pd.DataFrame(columns=['name', 'Country Most Adhered Religion', 'Number of Adherence'])
for x in latest_rlgn_pop['name']:
maximum = 0
a = pd.DataFrame()
a = latest_rlgn_pop[latest_rlgn_pop['name'] == x]
for y in religion_cat:
b = pd.Series([])
b = a[str(y)]
print(b[0])
if np.invert(np.isnan(b[0])):
b = int(b[0])
if (b > maximum):
maximum = b
pop_rlgn = y
a.insert(0,"Number of Adherence", maximum)
a.insert(0,"Country Most Adhered Religion", pop_rlgn)
pop_rlgn_cat_num = pop_rlgn_cat_num.append(a[['name', 'Number of Adherence', 'Country Most Adhered Religion']],sort=True)
latest_rlgn_pop = pd.merge(latest_rlgn_pop, pop_rlgn_cat_num, on=['name'])
country_hse_size = hse_size.groupby(['Country or area'], as_index=False)
latest_size = country_hse_size['Reference date (dd/mm/yyyy)'].max()
avg_hse_size = hse_size[['Country or area', 'Reference date (dd/mm/yyyy)', 'Average household size (number of members)']]
country_latest_size = latest_size.merge(avg_hse_size, on=['Country or area','Reference date (dd/mm/yyyy)'], how='left')
country_latest_size = country_latest_size.dropna()
country_latest_size_unique = country_latest_size.groupby(['Country or area'], as_index=False)
country_latest_size_unique = country_latest_size_unique['Average household size (number of members)'].mean()
countries = countries[['ADMIN', 'geometry']]
countries.columns = ['Country or area', 'geometry']
countries_household_size = countries.merge(country_latest_size_unique, on='Country or area', how='left')
'''
In my nested for loop, when line 36 'print(b[0])' is ran the second time, an error message appeared in the console:
Traceback (most recent call last):
File "C:\Users\USER\Anaconda3\envs\MaCT\lib\site-packages\pandas\core\series.py", line 1068, in __getitem__
result = self.index.get_value(self, key)
File "C:\Users\USER\Anaconda3\envs\MaCT\lib\site-packages\pandas\core\indexes\base.py", line 4730, in get_value
return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
File "pandas\_libs\index.pyx", line 80, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 88, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 992, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 998, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
I do not yet find a clue about this error message, could someone please help me with that? Thanks.
Here is the link to datasets I've been using:
Country_Household_Religions_Dataset
I'm trying to extract the S&P500 list from wikipedia; however, when I run the code, it only extracts 90 companies, and it gives this giant error:
Traceback (most recent call last):
File "D:/Python projects/Pandas_1/S&P500 Tickers.py", line 46, in <module>
get_data_from_yahoo()
File "D:/Python projects/Pandas_1/S&P500 Tickers.py", line 37, in get_data_from_yahoo
df = web.DataReader(ticker, 'yahoo', start, end)
File "C:\Users\UserX\venv\lib\site-packages\pandas_datareader\data.py", line 310, in DataReader
session=session).read()
File "C:\Users\UserX\venv\lib\site-packages\pandas_datareader\base.py", line 210, in read
params=self._get_params(self.symbols))
File "C:\Users\UserX\venv\lib\site-packages\pandas_datareader\yahoo\daily.py", line 129, in _read_one_data
resp = self._get_response(url, params=params)
File "C:\Users\UserX\venv\lib\site-packages\pandas_datareader\base.py", line 155, in _get_response
raise RemoteDataError(msg)
pandas_datareader._utils.RemoteDataError: Unable to read URL: https://finance.yahoo.com/quote/CBOE: CBOE/history?period1=1262311200&period2=1547776799&interval=1d&frequency=1d&filter=history
And the response code, which is too large to fit into this post.
I am a newbie, so I don't know what to try. My code is:
import bs4 as bs
import datetime as dt
import os
import pandas_datareader.data as web
import pickle
import requests
def save_sp500_tickers():
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
return tickers
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2010, 1, 1)
end = dt.datetime.now()
for ticker in tickers:
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = web.DataReader(ticker, 'yahoo', start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df = df.drop("Symbol", axis=1)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
get_data_from_yahoo()