Im preparing a python code , to screen stocks from the SP500 , DOW and Nasdaq.
SP500 and DOW importing data stocks is working properly , but when I try to import Nasdaq always get similar error, related to timestamp.
See below:
My code:
import talib
from yahoo_fin.stock_info import get_data
import yahoo_fin.stock_info as si
from datetime import datetime
list = si.tickers_nasdaq()
# Get current date and time
now = datetime.now().strftime("%m_%d_%Y_%I_%M_%S")
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
# Create file to save results
f = open(f'C:/Users/fco_j/OneDrive/Escritorio/CHAT GPT/Python/Reports/dow_results_{now}.csv', 'w')
# print table header to file
f.write('Ticker, ClosePrice, SMA200, SMA20, RSI, RelVol\n')
# Define cache_data function
def cache_data(data, stock):
data.to_pickle(f'C:/Users/fco_j/OneDrive/Escritorio/CHAT GPT/Python/Pickle/{stock}.pkl')
for stock in list:
# Download historical data for past year
data = si.get_data(stock, start_date=start_date, end_date=end_date)
last_price = data["close"][-1]
# Get 150 and 20 simple moving averages using Talib
sma150 = talib.SMA(data['close'], timeperiod=150)[-1]
sma20 = talib.SMA(data['close'], timeperiod=20)[-1]
rsi = talib.RSI(data['close'], timeperiod=14)
# Calculate Relative Volume
rel_vol = data['volume'] / talib.SMA(data['volume'].values.astype(float), timeperiod = 50)
# Cache data
cache_data(data, stock)
# Filter stocks with relative volume (time period 20) over 1
if last_price > sma150 and last_price > sma20 and rsi[-1] > 50 and rel_vol[-1] > 1:
# Print results to file
f.write(f"{stock},{last_price},{sma150},{sma20},{rsi[-1]},{rel_vol[-1]}\n")
f.close()
The error:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11208/2663596324.py in
26 for stock in dow_list:
27 # Download historical data for past year
---> 28 data = si.get_data(stock, start_date=start_date, end_date=end_date)
29 last_price = data["close"][-1]
30 # Get 150 and 20 simple moving averages using Talib
~\anaconda3\envs\PyFinance\lib\site-packages\yahoo_fin\stock_info.py in get_data(ticker, start_date, end_date, index_as_date, interval, headers)
98
99 # get the date info
--> 100 temp_time = data["chart"]["result"][0]["timestamp"]
101
102 if interval != "1m":
KeyError: 'timestamp'
The code is working with si.tickers_dow() and si.tickers_sp500() , but not with si.tickers_nasdaq() .
Not sure if a dataframe issue.
Related
This is a modified version of a program from a tutorial that extracts data from all of the stocks in the S&P 500 and picks stocks that match the criteria you specify.
The issue is that when I run the program List index out of range [stock symbol] pops up and those stocks are skipped and aren't added to the final CSV file.
Example:
list index out of range for ABMD
list index out of range for ABT
list index out of range for ADBE
list index out of range for ADI
I'm not really sure what the issue is, I would greatly appreciate it if someone would explain it to me! Also, I am not applying any of the specifying criteria yet and am just trying to get all of the stock data into the CSV file. Make sure to create a database named stock_data if you try the program. Thanks!
My code:
import pandas_datareader as web
import pandas as pd
from yahoo_fin import stock_info as si
import datetime as dt
dow_list = si.tickers_dow()
sp_list = si.tickers_sp500()
tickers = sp_list
'''tickers = list(set(tickers))
tickers.sort()'''
start = dt.datetime.now() - dt.timedelta(days=365)
end = dt.datetime.now()
sp500_df = web.DataReader('^GSPC', 'yahoo', start, end)
sp500_df['Pct Change'] = sp500_df['Adj Close'].pct_change()
sp500_return = (sp500_df['Pct Change'] + 1).cumprod()[-1]
return_list = []
final_df = pd.DataFrame(columns=['Ticker', 'Latest_Price', 'Score', 'PE_Ratio', 'PEG_Ratio', 'SMA_150', 'SMA_200', '52_Week_Low', '52_Week_High'])
counter = 0
for ticker in tickers:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv(f'stock_data/{ticker}.csv')
df['Pct Change'] = df['Adj Close'].pct_change()
stock_return = (df['Pct Change'] + 1).cumprod()[-1]
returns_compared = round((stock_return / sp500_return), 2)
return_list.append(returns_compared)
counter += 1
if counter == 100:
break
best_performers = pd.DataFrame(list(zip(tickers, return_list)), columns=['Ticker', 'Returns Compared'])
best_performers['Score'] = best_performers['Returns Compared'].rank(pct=True) * 100
best_performers = best_performers[best_performers['Score'] >= best_performers['Score'].quantile(0)] #picks stocks in top 25 percentile
for ticker in best_performers['Ticker']:
try:
df = pd.read_csv(f'stock_data/{ticker}.csv', index_col=0)
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
peg_ratio = float(si.get_stats_valuation(ticker)[1][4])
moving_average_150 = df['SMA_150'][-1]
moving_average_200 = df['SMA_200'][-1]
low_52week = round(min(df['Low'][-(52*5):]), 2)
high_52week = round(min(df['High'][-(52 * 5):]), 2)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
condition_1 = latest_price > moving_average_150 > moving_average_200
condition_2 = latest_price >= (1.3 * low_52week)
condition_3 = latest_price >= (0.75 * high_52week)
condition_4 = pe_ratio < 25
condition_5 = peg_ratio < 2
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'PEG_Ratio': peg_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
except Exception as e:
print(f"{e} for {ticker}")
final_df.sort_values(by='Score', ascending=False)
pd.set_option('display.max_columns', 10)
print(final_df)
final_df.to_csv('final.csv')
I have done the error shooting on your behalf. As a conclusion, I see that you have not checked the contents of the acquisition of the individual indicator data.
They are being added to the dictionary format and empty data frames as they are in index and named series. I believe that is the root cause of the error.
Specifying the last data and retrieving the values
iloc is not used.
52*5 lookbacks for 253 data
In addition, when additional indicators are acquired for the acquired issue data, there are cases where they can be acquired for the same issue, and cases where they cannot. (The cause is unknown.) Therefore, it may be necessary to change the method of processing pe_ratio and peg_ratio after obtaining them in advance.
for ticker in best_performers['Ticker']:
#print(ticker)
try:
df = pd.read_csv(f'stock_data/{ticker}.csv')#, index_col=0
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1:].values[0]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
moving_average_150 = df['SMA_150'][-1:].values[0]
moving_average_200 = df['SMA_200'][-1:].values[0]
low_52week = round(min(df['Low'][-(52*1):]), 2)
high_52week = round(min(df['High'][-(52*1):]), 2)
#print(low_52week, high_52week)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
#print(score)
#print(ticker, latest_price,score,pe_ratio,moving_average_200,low_52week,high_52week)
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
#print(final_df)
except Exception as e:
print(f"{e} for {ticker}")
final_df
Ticker Latest_Price Score PE_Ratio SMA_150 SMA_200 52_Week_Low 52_Week_High
0 A 123.839996 40 31.42 147.26 150.31 123.06 126.75
1 AAP 218.250000 70 22.23 220.66 216.64 190.79 202.04
2 AAPL 165.070007 80 29.42 161.85 158.24 150.10 154.12
3 ABC 161.899994 90 21.91 132.94 129.33 132.00 137.79
4 ADBE 425.470001 10 42.46 552.19 571.99 407.94 422.38
Note
Some stocks are missing because additional indicators could not be obtained.
(tickers = sp_list[:10] tested on the first 10)
I want to print only the price for a stock form yfinance, this is what I get/have now :
ticker = "aapl"
start = datetime.now().strftime('%Y-%m-%d')
end = datetime.now().strftime('%Y-%m-%d')
data = pdr.get_data_yahoo(ticker, start, end)
data['EMA10'] = data['Close'].ewm(span=10, adjust=False).mean()
print(data['EMA10'])
and this is the response :
Date
2022-03-04 163.169998
Name: EMA10, dtype: float64
I only want to print 163....
You obtain a pd.Series. To select the top-most value within that series just do data['EMA10'][0].
The entire code is given below:
from datetime import datetime
import pandas_datareader as pdr
ticker = "AAPL"
start = datetime.now().strftime('%Y-%m-%d')
end = datetime.now().strftime('%Y-%m-%d')
data = pdr.get_data_yahoo(ticker, start, end)
data['EMA10'] = data['Close'].ewm(span=10, adjust=False).mean()
print(data['EMA10'][0])
Output:
163.1699981689453
I'm trying this snippet (from web) in my script which calculates StochRSI.
The error output I get:
Traceback (most recent call last):
File "C:\Users\user2\PycharmProjects\xxx2\main.py", line 99, in <module>
main()
File "C:\Users\user2\PycharmProjects\xxx2\main.py", line 87, in main
stoch_rsi = StochRSI(closing_data)
File "C:\Users\user2\PycharmProjects\xxx2\main.py", line 56, in StochRSI
delta = series.diff().dropna()
AttributeError: 'numpy.ndarray' object has no attribute 'diff'
Tried searching for answers but couldn't find any to fix the problem. What's wrong here?
EDIT: Please check the full code below. I hope it clears out what I'm doing... I used numpy to get closing prices. I used that to get live EMA, and it worked fine - and now I'm trying to get StochRSI's live value.
import datetime
import config
import csv
import os.path
import sys
import numpy as np
import pandas as pd
import requests
import talib
from binance.client import Client
from binance.enums import *
from time import sleep
def get_data():
historical_data = client.get_historical_klines(symbol=config.SYMBOL, interval=config.TIME_PERIOD, start_str="1 year ago UTC", klines_type=HistoricalKlinesType.SPOT)
return_data = []
for each in historical_data:
kline = float(each[4])
return_data.append(kline)
return np.array(return_data)
def StochRSI(series, period=14, smoothK=3, smoothD=3):
# Calculate RSI
delta = series.diff().dropna()
ups = delta * 0
downs = ups.copy()
ups[delta > 0] = delta[delta > 0]
downs[delta < 0] = -delta[delta < 0]
ups[ups.index[period-1]] = np.mean(ups[:period]) #first value is sum of avg gains
ups = ups.drop(ups.index[:(period-1)])
downs[downs.index[period-1]] = np.mean(downs[:period]) #first value is sum of avg losses
downs = downs.drop(downs.index[:(period-1)])
rs = ups.ewm(com=period-1, min_periods=0, adjust=False, ignore_na=False).mean() / \
downs.ewm(com=period-1, min_periods=0, adjust=False, ignore_na=False).mean()
rsi = 100 - 100 / (1 + rs)
# Calculate StochRSI
stochrsi = (rsi - rsi.rolling(period).min()) / (rsi.rolling(period).max() - rsi.rolling(period).min())
stochrsi_K = stochrsi.rolling(smoothK).mean()
stochrsi_D = stochrsi_K.rolling(smoothD).mean()
return stochrsi, stochrsi_K, stochrsi_D
def main():
ema_200 = None
last_ema_200 = None
while True:
closing_data = get_data()
last_candle = closing_data[-1]
ema_200 = talib.EMA(closing_data, 10)[-1]
stoch_rsi = StochRSI(closing_data)
if last_candle > ema_200:
print(f"Price {last_candle} is above EMA 200 {ema_200} | RSI {stoch_rsi}")
elif last_candle < ema_200:
print(f"Price {last_candle} is below EMA 200 {ema_200} | RSI {stoch_rsi}")
if __name__ == "__main__":
# Client
client = Client(config.API_KEY, config.API_SECRET_KEY, tld='com')
print(f"Authenticated")
main()
This function is expecting a pandas DataFrame or Series but is getting a numpy ndarray.
So I'm attempting to use python to scan an excel file of stock data and help screen it. The following error is occurring however. It should ask what file you want to screen, and then apply a series of conditions to determine which stocks meet the screening parameters. Then save it as another spreadsheet with what passed the screens.
Exception has occurred: KeyError
'Symbol'
File "D:\Program Files\Projects\TrendTemplate.py", line 27, in <module>
stock=str(stocklist["Symbol"][i])
Code:
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
from tkinter import Tk
from tkinter.filedialog import askopenfilename
import os
from pandas import ExcelWriter
yf.pdr_override()
start =dt.datetime(2017,12,1)
now = dt.datetime.now()
root = Tk()
ftypes = [(".xlsm","*.xlsx",".xls")]
ttl = "Title"
dir1 = 'C:\\'
filePath = askopenfilename(filetypes = ftypes, initialdir = dir1, title = ttl)
stocklist = pd.read_excel(filePath)
#stocklist=stocklist.head()
exportList= pd.DataFrame(columns=['Stock', "RS_Rating", "50 Day MA", "150 Day Ma", "200 Day MA", "52 Week Low", "52 week High"])
for i in stocklist.index:
stock=str(stocklist["Symbol"][i])
RS_Rating=stocklist["RS Rating"][i]
try:
df = pdr.get_data_yahoo(stock, start, now)
smaUsed=[50,150,200]
for x in smaUsed:
sma=x
df["SMA_"+str(sma)]=round(df.iloc[:,4].rolling(window=sma).mean(),2)
currentClose=df["Adj Close"][-1]
moving_average_50=df["SMA_50"][-1]
moving_average_150=df["SMA_150"][-1]
moving_average_200=df["SMA_200"][-1]
low_of_52week=min(df["Adj Close"][-260:])
high_of_52week=max(df["Adj Close"][-260:])
try:
moving_average_200_20 = df["SMA_200"][-20]
except Exception:
moving_average_200_20=0
#Condition 1: Current Price > 150 SMA and > 200 SMA
if(currentClose>moving_average_150>moving_average_200):
cond_1=True
else:
cond_1=False
#Condition 2: 150 SMA and > 200 SMA
if(moving_average_150>moving_average_200):
cond_2=True
else:
cond_2=False
#Condition 3: 200 SMA trending up for at least 1 month (ideally 4-5 months)
if(moving_average_200>moving_average_200_20):
cond_3=True
else:
cond_3=False
#Condition 4: 50 SMA> 150 SMA and 50 SMA> 200 SMA
if(moving_average_50>moving_average_150>moving_average_200):
#print("Condition 4 met")
cond_4=True
else:
#print("Condition 4 not met")
cond_4=False
#Condition 5: Current Price > 50 SMA
if(currentClose>moving_average_50):
cond_5=True
else:
cond_5=False
#Condition 6: Current Price is at least 30% above 52 week low (Many of the best are up 100-300% before coming out of consolidation)
if(currentClose>=(1.3*low_of_52week)):
cond_6=True
else:
cond_6=False
#Condition 7: Current Price is within 25% of 52 week high
if(currentClose>=(.75*high_of_52week)):
cond_7=True
else:
cond_7=False
#Condition 8: IBD RS rating >70 and the higher the better
if(RS_Rating>70):
cond_8=True
else:
cond_8=False
if(cond_1 and cond_2 and cond_3 and cond_4 and cond_5 and cond_6 and cond_7 and cond_8):
exportList = exportList.append({'Stock': stock, "RS_Rating": RS_Rating, "50 Day MA": moving_average_50, "150 Day Ma": moving_average_150, "200 Day MA": moving_average_200, "52 Week Low": low_of_52week, "52 week High": high_of_52week}, ignore_index=True)
except Exception:
print("No data on "+stock)
print(exportList)
newFile=os.path.dirname(filePath)+"/ScreenOutputTrendTemplate.xlsx"
writer= ExcelWriter(newFile)
exportList.to_excel(writer,"Sheet1")
writer.save()
I wrote a script to scrape Yahoo Finance stock data using the Yahoo_Fin package
The aim of the script is to grab company financials to be able to perform some calculations. The input to the script is a txt file with a list of company ticker symbols. The output is also supposed to be a txt with only the companies that match a certain number of established criteria.
The script does occasionally work with a small txt file (20 tickers or less) however it does sometimes give me the following error (without me changing any code)
"None of ['Breakdown'] are in the columns" with Breakdown being the index column I set for the df.
I have run the script dozens of times and sometimes it works, sometimes it doesn't. Ran it in Atom and Jupyter Notebook and still have no clue what is causing the problem. I have also updated pandas and all necessary packages.
This is the code:
import pandas as pd
import statistics as stat
from yahoo_fin.stock_info import *
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#print(stock_list)
## The balance sheet df ##
balance_sheet = {ticker: get_balance_sheet(ticker)
for ticker in stock_list}
## The income statement df ##
income_statement = {ticker: get_income_statement(ticker)
for ticker in stock_list}
bs_data=[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(balance_sheet[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
bs_data.append(one_ticker)
#print(bs_data)
income_data=[]
#one_ticker =[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(income_statement[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
income_data.append(one_ticker)
#print(income_data)
## These are the balance sheet variables ##
for loop_counter in range(0,len(stock_list)):
# Total Assets
total_assets = (bs_data[loop_counter].loc['Total Assets'].astype(int))
avg_total_assets = stat.mean(total_assets)
#print(avg_total_assets)
# Total Current Liabilities
total_current_liabilities = (bs_data[loop_counter].loc['Total Current Liabilities'].astype(int))
avg_total_current_liabilities = stat.mean(total_current_liabilities)
#print(avg_total_current_liabilities)
#Total Liabilities
total_liabilities = (bs_data[loop_counter].loc['Total Liabilities'].astype(int))
avg_total_liabilities = stat.mean(total_liabilities)
#print(avg_total_liabilities)
## These are the income statement variables ##
# Total Revenue
total_revenue = (income_data[loop_counter].loc['Total Revenue']).astype(int)
avg_total_revenue = stat.mean(total_revenue)
#print(avg_total_revenue)
# Operating Income
operating_income = (income_data[loop_counter].loc['Operating Income or Loss']).astype(int)
avg_operating_income = stat.mean(operating_income)
#print(avg_operating_income)
# Total Operating Expenses
total_operating_expenses = (income_data[loop_counter].loc['Total Operating Expenses'].astype(int))
avg_total_operating_expenses = stat.mean(total_operating_expenses)
#print(avg_total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
#print(ebit)
## Calculations ##
opm = (avg_operating_income) / (avg_total_revenue)
#print(opm)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
#print(roce)
leverage = (avg_total_liabilities) / (avg_total_assets)
#print(leverage)
#print("Leverage: " + str(round(leverage,2)))
#print("OPM: " + str(round(opm*100,2)) + "%")
#print("ROCE: " + str(round(roce*100,2)) + "%")
## Save to file ##
#print(leverage)
#print(opm)
#print(roce)
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("We have a match!")
outfile = open("results.txt", "a")
outfile.write(stock_list[loop_counter])
outfile.write("\n")
outfile.close()
Any clues to what might be the problem??
Update #2 Code:
import pandas as pd
import statistics as stat
from yahooquery import *
# Ticker input here
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#for stock in stock_list:
tickers = Ticker(stock_list)
# Get balance sheet
for stock in stock_list:
#print(stock)
bs = tickers.balance_sheet()
bs = pd.DataFrame(bs)
bs = bs.set_index('endDate')
#print(bs)
## Balance sheet variables to extract ##
# Total Assets
total_assets = bs['totalAssets']
avg_total_assets = stat.mean(total_assets)
# Total Current Liabilities
total_current_liabilities = bs['totalCurrentLiabilities']
avg_total_current_liabilities = stat.mean(total_current_liabilities)
# Total Liabilities
total_liabilities = bs['totalLiab']
avg_total_liabilities = stat.mean(total_liabilities)
## Get income statement ##
inst = tickers.income_statement()
inst = pd.DataFrame(inst)
inst = inst.set_index('endDate')
## Income statement variables to extract ##
# Total Revenue#
total_revenue = inst['totalRevenue']
avg_total_revenue = stat.mean(total_revenue)
# Operating Income
operating_income = inst['operatingIncome']
avg_operating_income = stat.mean(operating_income)
# Total Operating Expenses
total_operating_expenses = inst['totalOperatingExpenses']
avg_total_operating_expenses = stat.mean(total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
## Parameters ##
opm = (avg_operating_income) / (avg_total_revenue)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
leverage = (avg_total_liabilities) / (avg_total_assets)
## Save to file ##
#print("Hello!")
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("Hello")
outfile = open("yahoo_query_results.txt", "w+")
outfile.write(stock)
outfile.write("\n")
outfile.close()