Exception has occurred: KeyError - python

So I'm attempting to use python to scan an excel file of stock data and help screen it. The following error is occurring however. It should ask what file you want to screen, and then apply a series of conditions to determine which stocks meet the screening parameters. Then save it as another spreadsheet with what passed the screens.
Exception has occurred: KeyError
'Symbol'
File "D:\Program Files\Projects\TrendTemplate.py", line 27, in <module>
stock=str(stocklist["Symbol"][i])
Code:
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
from tkinter import Tk
from tkinter.filedialog import askopenfilename
import os
from pandas import ExcelWriter
yf.pdr_override()
start =dt.datetime(2017,12,1)
now = dt.datetime.now()
root = Tk()
ftypes = [(".xlsm","*.xlsx",".xls")]
ttl = "Title"
dir1 = 'C:\\'
filePath = askopenfilename(filetypes = ftypes, initialdir = dir1, title = ttl)
stocklist = pd.read_excel(filePath)
#stocklist=stocklist.head()
exportList= pd.DataFrame(columns=['Stock', "RS_Rating", "50 Day MA", "150 Day Ma", "200 Day MA", "52 Week Low", "52 week High"])
for i in stocklist.index:
stock=str(stocklist["Symbol"][i])
RS_Rating=stocklist["RS Rating"][i]
try:
df = pdr.get_data_yahoo(stock, start, now)
smaUsed=[50,150,200]
for x in smaUsed:
sma=x
df["SMA_"+str(sma)]=round(df.iloc[:,4].rolling(window=sma).mean(),2)
currentClose=df["Adj Close"][-1]
moving_average_50=df["SMA_50"][-1]
moving_average_150=df["SMA_150"][-1]
moving_average_200=df["SMA_200"][-1]
low_of_52week=min(df["Adj Close"][-260:])
high_of_52week=max(df["Adj Close"][-260:])
try:
moving_average_200_20 = df["SMA_200"][-20]
except Exception:
moving_average_200_20=0
#Condition 1: Current Price > 150 SMA and > 200 SMA
if(currentClose>moving_average_150>moving_average_200):
cond_1=True
else:
cond_1=False
#Condition 2: 150 SMA and > 200 SMA
if(moving_average_150>moving_average_200):
cond_2=True
else:
cond_2=False
#Condition 3: 200 SMA trending up for at least 1 month (ideally 4-5 months)
if(moving_average_200>moving_average_200_20):
cond_3=True
else:
cond_3=False
#Condition 4: 50 SMA> 150 SMA and 50 SMA> 200 SMA
if(moving_average_50>moving_average_150>moving_average_200):
#print("Condition 4 met")
cond_4=True
else:
#print("Condition 4 not met")
cond_4=False
#Condition 5: Current Price > 50 SMA
if(currentClose>moving_average_50):
cond_5=True
else:
cond_5=False
#Condition 6: Current Price is at least 30% above 52 week low (Many of the best are up 100-300% before coming out of consolidation)
if(currentClose>=(1.3*low_of_52week)):
cond_6=True
else:
cond_6=False
#Condition 7: Current Price is within 25% of 52 week high
if(currentClose>=(.75*high_of_52week)):
cond_7=True
else:
cond_7=False
#Condition 8: IBD RS rating >70 and the higher the better
if(RS_Rating>70):
cond_8=True
else:
cond_8=False
if(cond_1 and cond_2 and cond_3 and cond_4 and cond_5 and cond_6 and cond_7 and cond_8):
exportList = exportList.append({'Stock': stock, "RS_Rating": RS_Rating, "50 Day MA": moving_average_50, "150 Day Ma": moving_average_150, "200 Day MA": moving_average_200, "52 Week Low": low_of_52week, "52 week High": high_of_52week}, ignore_index=True)
except Exception:
print("No data on "+stock)
print(exportList)
newFile=os.path.dirname(filePath)+"/ScreenOutputTrendTemplate.xlsx"
writer= ExcelWriter(newFile)
exportList.to_excel(writer,"Sheet1")
writer.save()

Related

Python - error importing Nasdaq stock list

Im preparing a python code , to screen stocks from the SP500 , DOW and Nasdaq.
SP500 and DOW importing data stocks is working properly , but when I try to import Nasdaq always get similar error, related to timestamp.
See below:
My code:
import talib
from yahoo_fin.stock_info import get_data
import yahoo_fin.stock_info as si
from datetime import datetime
list = si.tickers_nasdaq()
# Get current date and time
now = datetime.now().strftime("%m_%d_%Y_%I_%M_%S")
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
# Create file to save results
f = open(f'C:/Users/fco_j/OneDrive/Escritorio/CHAT GPT/Python/Reports/dow_results_{now}.csv', 'w')
# print table header to file
f.write('Ticker, ClosePrice, SMA200, SMA20, RSI, RelVol\n')
# Define cache_data function
def cache_data(data, stock):
data.to_pickle(f'C:/Users/fco_j/OneDrive/Escritorio/CHAT GPT/Python/Pickle/{stock}.pkl')
for stock in list:
# Download historical data for past year
data = si.get_data(stock, start_date=start_date, end_date=end_date)
last_price = data["close"][-1]
# Get 150 and 20 simple moving averages using Talib
sma150 = talib.SMA(data['close'], timeperiod=150)[-1]
sma20 = talib.SMA(data['close'], timeperiod=20)[-1]
rsi = talib.RSI(data['close'], timeperiod=14)
# Calculate Relative Volume
rel_vol = data['volume'] / talib.SMA(data['volume'].values.astype(float), timeperiod = 50)
# Cache data
cache_data(data, stock)
# Filter stocks with relative volume (time period 20) over 1
if last_price > sma150 and last_price > sma20 and rsi[-1] > 50 and rel_vol[-1] > 1:
# Print results to file
f.write(f"{stock},{last_price},{sma150},{sma20},{rsi[-1]},{rel_vol[-1]}\n")
f.close()
The error:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11208/2663596324.py in
26 for stock in dow_list:
27 # Download historical data for past year
---> 28 data = si.get_data(stock, start_date=start_date, end_date=end_date)
29 last_price = data["close"][-1]
30 # Get 150 and 20 simple moving averages using Talib
~\anaconda3\envs\PyFinance\lib\site-packages\yahoo_fin\stock_info.py in get_data(ticker, start_date, end_date, index_as_date, interval, headers)
98
99 # get the date info
--> 100 temp_time = data["chart"]["result"][0]["timestamp"]
101
102 if interval != "1m":
KeyError: 'timestamp'
The code is working with si.tickers_dow() and si.tickers_sp500() , but not with si.tickers_nasdaq() .
Not sure if a dataframe issue.

How to handle invalid date string input

My project is relatively straight forward. I am attempting to create a web-scraping tool that retrieves a random event from any given wikipedia article for a given date. The format of the URL is: url = f"https://en.wikipedia.org/wiki/{month}_{day}" where the month is the full name of the month followed by the day.
What I'm trying to achieve:
What I'm trying to achieve specifically here is that if an invalid date such as June 31 or Feb 30 were input, then the function below stops and returns a Please provide a valid date as its output without an error message.
Attempted Solution:
I've tried this with an if statement mapping a set of months to a set of dates but it's pretty wonky, as shown bellow:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
month = 'January'
day = '99'
url = f"https://en.wikipedia.org/wiki/{month}_{day}"
thirty = [*range(1,31)]
thirty = [str(x) for x in thirty]
thirty_one = [*range(1,32)]
thirty_one = [str(x) for x in thirty_one]
twenty_nine = [*range(1,30)]
twenty_nine = [str(x) for x in twenty_nine]
soup = BeautifulSoup(requests.get(url).content, "html.parser")
def wikiscraper():
events = []
if (month == set(['April','June','September','November']) and day != set(thirty))| \
(month == set(['January','March','May','July','August','October','December']) and day != set(thirty_one))| \
(month == set(['February']) and day != set(twenty_nine)):
return print("Please provide a valid date")
else:
for li in soup.select("h3 + ul > li"):
if (h2 := li.find_previous("h2")) and (h2.find(id="Events")):
date, event = li.text.replace("–", "-").split(" - ", maxsplit=1)
events.append((date, event))
events = pd.DataFrame(events)
cols = ['year','event']
events = pd.DataFrame(events)
events.columns = cols
pd.options.display.max_colwidth = 300
events['combined'] = 'On this date in the year'+' '+events.year+' '+events.event
events = events[['combined']]
return events.sample()
wikiscraper()
which returns
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-130-b89d9001cf84> in <module>
----> 1 wikiscraper()
<ipython-input-129-2c547a995093> in wikiscraper()
18 cols = ['year','event']
19 events = pd.DataFrame(events)
---> 20 events.columns = cols
21
22 pd.options.display.max_colwidth = 300
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in __setattr__(self, name, value)
5150 try:
5151 object.__getattribute__(self, name)
-> 5152 return object.__setattr__(self, name, value)
5153 except AttributeError:
5154 pass
pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__set__()
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in _set_axis(self, axis, labels)
562 def _set_axis(self, axis: int, labels: Index) -> None:
563 labels = ensure_index(labels)
--> 564 self._mgr.set_axis(axis, labels)
565 self._clear_item_cache()
566
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in set_axis(self, axis, new_labels)
224
225 if new_len != old_len:
--> 226 raise ValueError(
227 f"Length mismatch: Expected axis has {old_len} elements, new "
228 f"values have {new_len} elements"
ValueError: Length mismatch: Expected axis has 0 elements, new values have 2 elements
I also tried it with assert which works, but I want to keep it clean without an assertion error and just have a printed output requesting a valid date. The if statement I'm sure is not a very "pythonic" way of doing it either, although getting to run with the desired output is the bigger priority.
My ultimate goal is to simply get the function to stop and Please provide a valid date if the string input is not compatible with real dates.
edit. The solution was simple
if (month in set(['April','June','September','November']) and day not in set(thirty))| \
(month in set(['January','March','May','July','August','October','December']) and day not in set(thirty_one))| \
(month in set(['February']) and day not in set(twenty_nine)):
return print("Please provide a valid date")
just use in and not == User Timus to the rescue on a dumb mistake.

List index out of range error when using Pandas and Yahoo_fin

This is a modified version of a program from a tutorial that extracts data from all of the stocks in the S&P 500 and picks stocks that match the criteria you specify.
The issue is that when I run the program List index out of range [stock symbol] pops up and those stocks are skipped and aren't added to the final CSV file.
Example:
list index out of range for ABMD
list index out of range for ABT
list index out of range for ADBE
list index out of range for ADI
I'm not really sure what the issue is, I would greatly appreciate it if someone would explain it to me! Also, I am not applying any of the specifying criteria yet and am just trying to get all of the stock data into the CSV file. Make sure to create a database named stock_data if you try the program. Thanks!
My code:
import pandas_datareader as web
import pandas as pd
from yahoo_fin import stock_info as si
import datetime as dt
dow_list = si.tickers_dow()
sp_list = si.tickers_sp500()
tickers = sp_list
'''tickers = list(set(tickers))
tickers.sort()'''
start = dt.datetime.now() - dt.timedelta(days=365)
end = dt.datetime.now()
sp500_df = web.DataReader('^GSPC', 'yahoo', start, end)
sp500_df['Pct Change'] = sp500_df['Adj Close'].pct_change()
sp500_return = (sp500_df['Pct Change'] + 1).cumprod()[-1]
return_list = []
final_df = pd.DataFrame(columns=['Ticker', 'Latest_Price', 'Score', 'PE_Ratio', 'PEG_Ratio', 'SMA_150', 'SMA_200', '52_Week_Low', '52_Week_High'])
counter = 0
for ticker in tickers:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv(f'stock_data/{ticker}.csv')
df['Pct Change'] = df['Adj Close'].pct_change()
stock_return = (df['Pct Change'] + 1).cumprod()[-1]
returns_compared = round((stock_return / sp500_return), 2)
return_list.append(returns_compared)
counter += 1
if counter == 100:
break
best_performers = pd.DataFrame(list(zip(tickers, return_list)), columns=['Ticker', 'Returns Compared'])
best_performers['Score'] = best_performers['Returns Compared'].rank(pct=True) * 100
best_performers = best_performers[best_performers['Score'] >= best_performers['Score'].quantile(0)] #picks stocks in top 25 percentile
for ticker in best_performers['Ticker']:
try:
df = pd.read_csv(f'stock_data/{ticker}.csv', index_col=0)
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
peg_ratio = float(si.get_stats_valuation(ticker)[1][4])
moving_average_150 = df['SMA_150'][-1]
moving_average_200 = df['SMA_200'][-1]
low_52week = round(min(df['Low'][-(52*5):]), 2)
high_52week = round(min(df['High'][-(52 * 5):]), 2)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
condition_1 = latest_price > moving_average_150 > moving_average_200
condition_2 = latest_price >= (1.3 * low_52week)
condition_3 = latest_price >= (0.75 * high_52week)
condition_4 = pe_ratio < 25
condition_5 = peg_ratio < 2
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'PEG_Ratio': peg_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
except Exception as e:
print(f"{e} for {ticker}")
final_df.sort_values(by='Score', ascending=False)
pd.set_option('display.max_columns', 10)
print(final_df)
final_df.to_csv('final.csv')
I have done the error shooting on your behalf. As a conclusion, I see that you have not checked the contents of the acquisition of the individual indicator data.
They are being added to the dictionary format and empty data frames as they are in index and named series. I believe that is the root cause of the error.
Specifying the last data and retrieving the values
iloc is not used.
52*5 lookbacks for 253 data
In addition, when additional indicators are acquired for the acquired issue data, there are cases where they can be acquired for the same issue, and cases where they cannot. (The cause is unknown.) Therefore, it may be necessary to change the method of processing pe_ratio and peg_ratio after obtaining them in advance.
for ticker in best_performers['Ticker']:
#print(ticker)
try:
df = pd.read_csv(f'stock_data/{ticker}.csv')#, index_col=0
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1:].values[0]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
moving_average_150 = df['SMA_150'][-1:].values[0]
moving_average_200 = df['SMA_200'][-1:].values[0]
low_52week = round(min(df['Low'][-(52*1):]), 2)
high_52week = round(min(df['High'][-(52*1):]), 2)
#print(low_52week, high_52week)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
#print(score)
#print(ticker, latest_price,score,pe_ratio,moving_average_200,low_52week,high_52week)
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
#print(final_df)
except Exception as e:
print(f"{e} for {ticker}")
final_df
Ticker Latest_Price Score PE_Ratio SMA_150 SMA_200 52_Week_Low 52_Week_High
0 A 123.839996 40 31.42 147.26 150.31 123.06 126.75
1 AAP 218.250000 70 22.23 220.66 216.64 190.79 202.04
2 AAPL 165.070007 80 29.42 161.85 158.24 150.10 154.12
3 ABC 161.899994 90 21.91 132.94 129.33 132.00 137.79
4 ADBE 425.470001 10 42.46 552.19 571.99 407.94 422.38
Note
Some stocks are missing because additional indicators could not be obtained.
(tickers = sp_list[:10] tested on the first 10)

How to make stock Register with Pandas?

I have two pandas Dataframe of a Location which at some point receive at dispatch certain Item.
#Item Received 'tolo'
ID Date Challan No From Location To Location Item Quantity Remark
7 2021-09-16 124 Admin dkl-kunjakant pipe 500 hji
10 2021-09-01 345 Admin dkl-kunjakant pipe 1000 ert
#Item Dispatched 'frlo'
ID Date Challan No From Location To Location Item Quantity Remark
3 2021-09-01 236 dkl-kunjakant Dkl-deulasahi pipe 145 asa
8 2021-09-24 10 dkl-kunjakant Dkl-deulasahi pipe 50 hji
9 2021-09-23 540 dkl-kunjakant Dkl-deulasahi pipe 40 rty
My objective is to calculate closing Balance at the end of certain date.
To calculate closing balance the formula is simple
Previous day Clsoing Bal + Received - Dispatched = Current day closing balance
For this my code have to iterate through each row of dataframe and calculate closing balance of each day. my code:
frames = [tolo, frlo]
result = pd.concat(frames)
result['Date'] = pd.to_datetime(result.Date)
result.sort_values(by='Date')
main_columns = ['Date', 'Opening Stock', 'Received', 'Total', 'Dispatched', 'Closing Balance', 'Challan No', 'Remarks']
main_df = pd.DataFrame(columns=main_columns)
for col in main_df.columns:
main_df[col].values[:] = 0
main_df['Date'] = result['Date']
main_df['Challan No'] = result['Challan No']
for _, row in result.iterrows():
# print(row['c1'], row['c2'])
# main_df['Date'] = row['Date']
if row['To Location'] == godown_name:
main_df['Received'] = row['Quantity']
main_df['Remarks'] = row['To Location']
elif row['From Location'] == godown_name:
main_df['Dispatched'] = row['Quantity']
main_df['Remarks'] = row['From Location']
for _, row in main_df.iterrows():
row['Opening Stock'] = row['Closing Balance'].shift()
row['Total'] = row['Opening Stock'] + row['Received']
row['Closing Balance'] = row['Total'] - row['Dispatched']
print(main_df.head())
But by iterating through rows it throws me Attribute error in the pandas shift function.
You have a circular dependency: Opening Stock is dependent on Closing Stock, and Stock Stock is in turn dependent on Opening Stock. This is unsolvable. You have to calculate one variable another way.
Also you can replaced a lot of your code with vectorized code:. Try this:
main_df = pd.concat([tolo, frlo])
main_df['Date'] = pd.to_datetime(main_df['Date'])
main_df.sort_values('Date', inplace=True)
godown_name = 'dkl-kunjakant' # I may get this wrong
cond = main_df['To Location'] == godown_name
main_df['Received'] = np.where(cond, main_df['Quantity'], 0)
main_df['Dispatched'] = np.where(cond, 0, main_df['Quantity'])
main_df['Remarks'] = np.where(cond, main_df['To Location'], main_df['From Location'])
main_df['Closing Balance'] = (main_df['Received'] - main_df['Dispatched']).cumsum()
main_df['Opening Stock'] = main_df['Closing Balance'].shift(fill_value=0)
main_df['Total'] = main_df['Opening Stock'] + main_df['Received']

Improving a stock market algorithm

I am trying to make this code look more attractive for potential employers that view it on my GitHub account. The code essentially loops through a CSV file and searches each symbol with the yfinance wrapper for the Yahoo-Finance API. It makes a few checks about the stock and decides whether it is a suitable investment. There are many try except clauses since API can return empty fields in the pandas dataframe. Currently I think it can be improved since it has multiple nested if statements with many try except statements. All feedback is greatly appreciated.
import yfinance as yf
import pandas as pd
import openpyxl
import csv
import math
import traceback
# Not a penny stock
# Earnings increase of at least 33% over 10 years using 3 year averages - 10% over 4 years since the API only contains the most recent 4 years
# Current price no more than 1.5x book value per share
# P/E ratio <= 15
# Long term debt no more than 110% current assets
# Current assets 1.5x current liabilities
symbol_array = []
failed_search = []
with open('companylist.csv') as file:
reader = csv.reader(file)
ticker_data = iter(reader) # skip the first value since it is the header
next(ticker_data)
for row in ticker_data:
ticker = row[0]
print('Searching: ', ticker)
try:
try:
company = yf.Ticker(ticker)
company_info = company.info
except:
print('Not a company')
continue # skip the ticker since it is not a company or the API doesn't have any information about the security
company_balance_sheet = company.balance_sheet
company_earnings = company.earnings
if company_balance_sheet.empty or company_earnings.empty:
continue # if balance sheets or earnings reports are not available, skip the search
column_date = company.balance_sheet.columns[0] # latest date on balance sheet to take data from
current_assets = company.balance_sheet.at['Total Current Assets', column_date]
try: # previous close price can be under 'previousClose' or 'regularMarketPrice' in company_info
current_price = company_info['previousClose']
except:
current_price = company_info['regularMarketPrice']
if current_price >= 10: # check if stock is penny stock
try:
long_term_debt = company.balance_sheet.at['Long Term Debt', column_date]
if math.isnan(long_term_debt):
long_term_debt = 0
except:
long_term_debt=0
if long_term_debt < (current_assets * 1.1):
current_liabilities = company.balance_sheet.at['Total Current Liabilities', column_date]
if current_liabilities < (1.5 * current_assets):
try:
pe_ratio = company_info['trailingPE'] # check if P/E ratio is available, assign pe_ratio 0 if it is not
except:
pe_ratio = 0
if pe_ratio <= 15:
try:
book_value = company_info['bookValue']
if type(book_value) != float: # book_value can be "None" in the company_info object
book_value = 0
except:
book_value = 0
if current_price < (book_value*1.5):
earnings_first = company.earnings.iat[0, 1]
earnings_last = company.earnings.iat[len(company.earnings)-1, 1]
if earnings_last >= earnings_first*1.1:
symbol_array.append(company_info['symbol'])
else:
print('Step 6 fail. Earnings growth too low')
else:
print('Step 5 fail. Current price too high')
else:
print('Step 4 fail. P/E ratio too high')
else:
print('Step 3 fail. Current liabilities too high')
else:
print('Step 2 fail. Long term debt too high')
else:
print('Step 1 fail. Penny stock')
except Exception as e:
print(traceback.format_exc()) # code to point out any errors in the main try statement
failed_search.append(ticker)
print(ticker, ' failed to search.')
print(e)
print('Failed searches:')
for failure in failed_search:
print(failure)
print('Potential Investments:')
for symbol in symbol_array:
print(symbol)

Categories