I would like to achieve the following.
I would like the function to output a separate data frame for each stock for example
I would like to save each data frame to a separate csv
the code doesnt work and i am unable to output the and save the separate csvs. Can you help me output separate dataframes and export the respective CSVS?
def getdata(stock: str):
# Company Quote Group of Items
company_quote = requests.get(f"https://financialmodelingprep.com/api/v3/quote/{stock}?apikey=demo")
company_quote = company_quote.json()
share_price = float("{0:.2f}".format(company_quote[0]['price']))
# Balance Sheet Group of Items
BS = requests.get(f"https://financialmodelingprep.com/api/v3/income-statement/{stock}?period=quarter&limit=400&apikey=demo")
BS = BS.json()
#Total Debt
debt = float("{0:.2f}".format(float(BS[0]['totalDebt'])/10**9))
#Total Cash
cash = float("{0:.2f}".format(float(BS[0]['cashAndShortTermInvestments'])/10**9))
# Income Statement Group of Items
IS = requests.get(f"https://financialmodelingprep.com/api/v3/income-statement/{stock}?period=quarter&limit=400&apikey=demo")
IS = IS.json()
# Most Recent Quarterly Revenue
qRev = float("{0:.2f}".format(float(IS[0]['revenue'])/10**9))
# Company Profile Group of Items
company_info = requests.get(f"https://financialmodelingprep.com/api/v3/profile/{stock}?apikey=demo")
company_info = company_info.json()
# Chief Executive Officer
ceo = (company_info[0]['ceo'])
return (share_price, cash, debt, qRev, ceo)
stocks = ('AAPL')
d = {}
for stock in stocks:
df[stock] = pd.DataFrame(getdata, columns=['Share Price','Total Cash', 'Total Debt', 'Q3 2019 Revenue', 'CEO'], index=tickers)
print(d)
I'm not sure to understand your question. But if the fact is that you want a different DataFrame for each ticker, here is a solution.
Instead of :
for stock in stocks:
df[stock] = pd.DataFrame(....
Try:
for stock in stocks:
globals()['df_%s' %stock] = pd.DataFrame(...
# And to save it, inside the loop
globals()['df_%s' %stock].to_csv(stock+'.csv')
EDIT:
Thanx for the add. Here is the code
import joblib
from joblib import Parallel,delayed
import requests
import pandas as pd
def getdata(stock):
# Company Quote Group of Items
company_quote = requests.get(f"https://financialmodelingprep.com/api/v3/quote/{stock}?apikey=demo")
company_quote = company_quote.json()
share_price = float("{0:.2f}".format(company_quote[0]['price']))
# Balance Sheet Group of Items
BS = requests.get(f"https://financialmodelingprep.com/api/v3/balance-sheet-statement/{stock}?period=quarter&limit=400&apikey=demo")
BS = BS.json()
#Total Debt
debt = float("{0:.2f}".format(float(BS[0]['totalDebt'])/10**9))
#Total Cash
cash = float("{0:.2f}".format(float(BS[0]['cashAndShortTermInvestments'])/10**9))
# Income Statement Group of Items
IS = requests.get(f"https://financialmodelingprep.com/api/v3/income-statement/{stock}?period=quarter&limit=400&apikey=demo")
IS = IS.json()
# Most Recent Quarterly Revenue
qRev = float("{0:.2f}".format(float(IS[0]['revenue'])/10**9))
# Company Profile Group of Items
company_info = requests.get(f"https://financialmodelingprep.com/api/v3/profile/{stock}?apikey=demo")
company_info = company_info.json()
# Chief Executive Officer
ceo = (company_info[0]['ceo'])
globals()['df_%s' %stock] = pd.DataFrame({'symbol':[stock],'debt':[debt],'cash':[cash],'qRev':[qRev],'ceo':[ceo]})
globals()['df_%s' %stock].to_csv(stock+'.csv')
return(globals()['df_%s' %stock])
stocks = ['AAPL'] #, 'MSFT', 'GOOG', 'T', 'CSCO', 'INTC', 'ORCL', 'AMZN', 'FB', 'TSLA', 'NVDA']
number_of_cpu = joblib.cpu_count()
delayed_funcs = [delayed(getdata)(stock) for stock in stocks]
parallel_pool = Parallel(n_jobs=number_of_cpu,prefer="processes") # processes threads
globals()['df_%s' %stock] = parallel_pool(delayed_funcs)
df_AAPL
OUTPUT
It isn't necessary to return the DataFrame, as you save it in the function. But I did it to show you the possibility.
Related
This is my first experience in Python and Stackoverflow :)
I try to update my xls file with Portfolio using Yfinance.
I'm interested in two parameters on each stock : current price and sector.
I try to update my Pandas DataFrame using the code below.
It works fine for the "Price" but I can't extract "Sector" - get an error below.
What I'm doing wrong here?
Well, there is a "sector" key in the dictionary for each stock. I try to update the Excel file, so there isn't much code
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[271], line 7
5 stock_info = yf.Ticker(ticker).info
6 price = stock_info['regularMarketPrice']
----> 7 sector = str(stock_info['sector'])
8 ibkr.loc[i, ['Price of share']] = price
9 ibkr.loc[i, ['Sector']] = sector
KeyError: 'sector'
The code:
import yfinance as yf
import pandas as pd
import numpy as np <br/> ibkr = pd.read_excel("BKR_WISHLIST.xlsx") <br/> ibkr.columns = ['Company_name', 'Symbol', 'Number of shares', 'Price of share', 'Total_value_share, USD'] <br/> ibkr.dropna(subset=['Total_value_share, USD', 'Number of shares'], inplace=True) <br/> ibkr.insert(2, "Sector", "XXX") <br/> ibkr.reset_index(drop = True, inplace = True) <br/> my_tickers = ibkr["Symbol"].tolist()
tickers = yf.Tickers(my_tickers)
# i = 0
for ticker in my_tickers:
stock_info = yf.Ticker(ticker).info
# price = stock_info['regularMarketPrice']
# sector = stock_info['sector']
ibkr.loc[i, 'Price of share'] = stock_info['regularMarketPrice']
#ibkr.loc[i, 'Sector'] = stock_info["sector"]
i += 1
my_tickers = ibkr["Symbol"].tolist()
tickers = yf.Tickers(my_tickers)
i = 0
for ticker in my_tickers:
stock_info = yf.Ticker(ticker).info
price = stock_info['regularMarketPrice']
sector = stock_info['sector']
ibkr.loc[i, ['Price of share']] = price
ibkr.loc[i, ['Sector']] = sector
i = I+1
Thank you guys for your help!
I don't know how to # you here, but I hope you will this post.
The problem was that the stock_info for some objects (ETF) did not have a "sector" key.
So with help of #Damian Satterthwaite-Phillips and #twister_void this code did work:
my_tickers = ibkr["Symbol"].tolist()
tickers = yf.Tickers(my_tickers)
i = 0
for ticker in my_tickers:
stock_info = yf.Ticker(ticker).info
ibkr.loc[i, 'Sector'] = stock_info.get('sector', 'NA')
ibkr.loc[i, "Price of share"] = stock_info["regularMarketPrice"]
i += 1
There might be a problem with the value in the sector that's why KeyError is raised because the value is not found in some case you can handle with loop
for ticker in my_tickers:
stock_info = yf.Ticker(ticker).info
ibkr.loc[i, "Sector"] = stock_info["sector"]
if stock_info["sector"] is None or stock_info["sector"] == "":
ibkr.loc[i, "Sector"] = "NA"
ibkr.loc[i, "Price"] = stock_info["regularMarketPrice"]
i += 1
The stock_info object does not have a sector key:
For example,
yf.Ticker('MSFT').info
returns
{'regularMarketPrice': None, 'preMarketPrice': None, 'logo_url': ''}
It looks like it might have had sector at one point (e.g.: How to get Industry Data from Yahoo Finance using Python?), but it may be that Yahoo Finance no longer publishes these data.
The API seems to be broken. I have tried upgrading yfinance from 1.86 all the way up to 2.x. Whether I put in a stock ticker or a fund ticker or ETF, I only get back the 3 keys: {'regularMarketPrice': None, 'preMarketPrice': None, 'logo_url': ''}. I don't think this is a "sector" problem at all... it seems more like a Yahoo problem... noworky.
This is a modified version of a program from a tutorial that extracts data from all of the stocks in the S&P 500 and picks stocks that match the criteria you specify.
The issue is that when I run the program List index out of range [stock symbol] pops up and those stocks are skipped and aren't added to the final CSV file.
Example:
list index out of range for ABMD
list index out of range for ABT
list index out of range for ADBE
list index out of range for ADI
I'm not really sure what the issue is, I would greatly appreciate it if someone would explain it to me! Also, I am not applying any of the specifying criteria yet and am just trying to get all of the stock data into the CSV file. Make sure to create a database named stock_data if you try the program. Thanks!
My code:
import pandas_datareader as web
import pandas as pd
from yahoo_fin import stock_info as si
import datetime as dt
dow_list = si.tickers_dow()
sp_list = si.tickers_sp500()
tickers = sp_list
'''tickers = list(set(tickers))
tickers.sort()'''
start = dt.datetime.now() - dt.timedelta(days=365)
end = dt.datetime.now()
sp500_df = web.DataReader('^GSPC', 'yahoo', start, end)
sp500_df['Pct Change'] = sp500_df['Adj Close'].pct_change()
sp500_return = (sp500_df['Pct Change'] + 1).cumprod()[-1]
return_list = []
final_df = pd.DataFrame(columns=['Ticker', 'Latest_Price', 'Score', 'PE_Ratio', 'PEG_Ratio', 'SMA_150', 'SMA_200', '52_Week_Low', '52_Week_High'])
counter = 0
for ticker in tickers:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv(f'stock_data/{ticker}.csv')
df['Pct Change'] = df['Adj Close'].pct_change()
stock_return = (df['Pct Change'] + 1).cumprod()[-1]
returns_compared = round((stock_return / sp500_return), 2)
return_list.append(returns_compared)
counter += 1
if counter == 100:
break
best_performers = pd.DataFrame(list(zip(tickers, return_list)), columns=['Ticker', 'Returns Compared'])
best_performers['Score'] = best_performers['Returns Compared'].rank(pct=True) * 100
best_performers = best_performers[best_performers['Score'] >= best_performers['Score'].quantile(0)] #picks stocks in top 25 percentile
for ticker in best_performers['Ticker']:
try:
df = pd.read_csv(f'stock_data/{ticker}.csv', index_col=0)
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
peg_ratio = float(si.get_stats_valuation(ticker)[1][4])
moving_average_150 = df['SMA_150'][-1]
moving_average_200 = df['SMA_200'][-1]
low_52week = round(min(df['Low'][-(52*5):]), 2)
high_52week = round(min(df['High'][-(52 * 5):]), 2)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
condition_1 = latest_price > moving_average_150 > moving_average_200
condition_2 = latest_price >= (1.3 * low_52week)
condition_3 = latest_price >= (0.75 * high_52week)
condition_4 = pe_ratio < 25
condition_5 = peg_ratio < 2
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'PEG_Ratio': peg_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
except Exception as e:
print(f"{e} for {ticker}")
final_df.sort_values(by='Score', ascending=False)
pd.set_option('display.max_columns', 10)
print(final_df)
final_df.to_csv('final.csv')
I have done the error shooting on your behalf. As a conclusion, I see that you have not checked the contents of the acquisition of the individual indicator data.
They are being added to the dictionary format and empty data frames as they are in index and named series. I believe that is the root cause of the error.
Specifying the last data and retrieving the values
iloc is not used.
52*5 lookbacks for 253 data
In addition, when additional indicators are acquired for the acquired issue data, there are cases where they can be acquired for the same issue, and cases where they cannot. (The cause is unknown.) Therefore, it may be necessary to change the method of processing pe_ratio and peg_ratio after obtaining them in advance.
for ticker in best_performers['Ticker']:
#print(ticker)
try:
df = pd.read_csv(f'stock_data/{ticker}.csv')#, index_col=0
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1:].values[0]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
moving_average_150 = df['SMA_150'][-1:].values[0]
moving_average_200 = df['SMA_200'][-1:].values[0]
low_52week = round(min(df['Low'][-(52*1):]), 2)
high_52week = round(min(df['High'][-(52*1):]), 2)
#print(low_52week, high_52week)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
#print(score)
#print(ticker, latest_price,score,pe_ratio,moving_average_200,low_52week,high_52week)
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
#print(final_df)
except Exception as e:
print(f"{e} for {ticker}")
final_df
Ticker Latest_Price Score PE_Ratio SMA_150 SMA_200 52_Week_Low 52_Week_High
0 A 123.839996 40 31.42 147.26 150.31 123.06 126.75
1 AAP 218.250000 70 22.23 220.66 216.64 190.79 202.04
2 AAPL 165.070007 80 29.42 161.85 158.24 150.10 154.12
3 ABC 161.899994 90 21.91 132.94 129.33 132.00 137.79
4 ADBE 425.470001 10 42.46 552.19 571.99 407.94 422.38
Note
Some stocks are missing because additional indicators could not be obtained.
(tickers = sp_list[:10] tested on the first 10)
Return Data from function as dictionary and store it in data frame.
While run it using for loop getting error.
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
owm = pyowm.OWM(" your free api key from OpenWeatherMap")
mgr = owm.weather_manager()
data =[]
# Create function to get weather details
def get_weather(city):
observation = mgr.weather_at_place(city)
l = observation.weather
Wind_Speed = l.wind()['speed']
Temp = l.temperature('celsius')['temp']
Max_temp = l.temperature('celsius')['temp_max']
Min_temp = l.temperature('celsius')['temp_min']
#Heat_index = l.heat_index
Humidity = l.humidity
Pressure = l.pressure['press']
weather = {"City": city, "Wind_Speed" : Wind_Speed, "Temp":
Temp,"Max_temp":Max_temp, "Min_temp":Min_temp,
"Humidity":Humidity, "Pressure":Pressure}
return weather
for city in df_location['City']:
get_weather(city)
df = df.append(data, True)
Want to store that weather details in same df with relative city.
Current df_location is like:
I am trying to make this code look more attractive for potential employers that view it on my GitHub account. The code essentially loops through a CSV file and searches each symbol with the yfinance wrapper for the Yahoo-Finance API. It makes a few checks about the stock and decides whether it is a suitable investment. There are many try except clauses since API can return empty fields in the pandas dataframe. Currently I think it can be improved since it has multiple nested if statements with many try except statements. All feedback is greatly appreciated.
import yfinance as yf
import pandas as pd
import openpyxl
import csv
import math
import traceback
# Not a penny stock
# Earnings increase of at least 33% over 10 years using 3 year averages - 10% over 4 years since the API only contains the most recent 4 years
# Current price no more than 1.5x book value per share
# P/E ratio <= 15
# Long term debt no more than 110% current assets
# Current assets 1.5x current liabilities
symbol_array = []
failed_search = []
with open('companylist.csv') as file:
reader = csv.reader(file)
ticker_data = iter(reader) # skip the first value since it is the header
next(ticker_data)
for row in ticker_data:
ticker = row[0]
print('Searching: ', ticker)
try:
try:
company = yf.Ticker(ticker)
company_info = company.info
except:
print('Not a company')
continue # skip the ticker since it is not a company or the API doesn't have any information about the security
company_balance_sheet = company.balance_sheet
company_earnings = company.earnings
if company_balance_sheet.empty or company_earnings.empty:
continue # if balance sheets or earnings reports are not available, skip the search
column_date = company.balance_sheet.columns[0] # latest date on balance sheet to take data from
current_assets = company.balance_sheet.at['Total Current Assets', column_date]
try: # previous close price can be under 'previousClose' or 'regularMarketPrice' in company_info
current_price = company_info['previousClose']
except:
current_price = company_info['regularMarketPrice']
if current_price >= 10: # check if stock is penny stock
try:
long_term_debt = company.balance_sheet.at['Long Term Debt', column_date]
if math.isnan(long_term_debt):
long_term_debt = 0
except:
long_term_debt=0
if long_term_debt < (current_assets * 1.1):
current_liabilities = company.balance_sheet.at['Total Current Liabilities', column_date]
if current_liabilities < (1.5 * current_assets):
try:
pe_ratio = company_info['trailingPE'] # check if P/E ratio is available, assign pe_ratio 0 if it is not
except:
pe_ratio = 0
if pe_ratio <= 15:
try:
book_value = company_info['bookValue']
if type(book_value) != float: # book_value can be "None" in the company_info object
book_value = 0
except:
book_value = 0
if current_price < (book_value*1.5):
earnings_first = company.earnings.iat[0, 1]
earnings_last = company.earnings.iat[len(company.earnings)-1, 1]
if earnings_last >= earnings_first*1.1:
symbol_array.append(company_info['symbol'])
else:
print('Step 6 fail. Earnings growth too low')
else:
print('Step 5 fail. Current price too high')
else:
print('Step 4 fail. P/E ratio too high')
else:
print('Step 3 fail. Current liabilities too high')
else:
print('Step 2 fail. Long term debt too high')
else:
print('Step 1 fail. Penny stock')
except Exception as e:
print(traceback.format_exc()) # code to point out any errors in the main try statement
failed_search.append(ticker)
print(ticker, ' failed to search.')
print(e)
print('Failed searches:')
for failure in failed_search:
print(failure)
print('Potential Investments:')
for symbol in symbol_array:
print(symbol)
I wrote a script to scrape Yahoo Finance stock data using the Yahoo_Fin package
The aim of the script is to grab company financials to be able to perform some calculations. The input to the script is a txt file with a list of company ticker symbols. The output is also supposed to be a txt with only the companies that match a certain number of established criteria.
The script does occasionally work with a small txt file (20 tickers or less) however it does sometimes give me the following error (without me changing any code)
"None of ['Breakdown'] are in the columns" with Breakdown being the index column I set for the df.
I have run the script dozens of times and sometimes it works, sometimes it doesn't. Ran it in Atom and Jupyter Notebook and still have no clue what is causing the problem. I have also updated pandas and all necessary packages.
This is the code:
import pandas as pd
import statistics as stat
from yahoo_fin.stock_info import *
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#print(stock_list)
## The balance sheet df ##
balance_sheet = {ticker: get_balance_sheet(ticker)
for ticker in stock_list}
## The income statement df ##
income_statement = {ticker: get_income_statement(ticker)
for ticker in stock_list}
bs_data=[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(balance_sheet[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
bs_data.append(one_ticker)
#print(bs_data)
income_data=[]
#one_ticker =[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(income_statement[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
income_data.append(one_ticker)
#print(income_data)
## These are the balance sheet variables ##
for loop_counter in range(0,len(stock_list)):
# Total Assets
total_assets = (bs_data[loop_counter].loc['Total Assets'].astype(int))
avg_total_assets = stat.mean(total_assets)
#print(avg_total_assets)
# Total Current Liabilities
total_current_liabilities = (bs_data[loop_counter].loc['Total Current Liabilities'].astype(int))
avg_total_current_liabilities = stat.mean(total_current_liabilities)
#print(avg_total_current_liabilities)
#Total Liabilities
total_liabilities = (bs_data[loop_counter].loc['Total Liabilities'].astype(int))
avg_total_liabilities = stat.mean(total_liabilities)
#print(avg_total_liabilities)
## These are the income statement variables ##
# Total Revenue
total_revenue = (income_data[loop_counter].loc['Total Revenue']).astype(int)
avg_total_revenue = stat.mean(total_revenue)
#print(avg_total_revenue)
# Operating Income
operating_income = (income_data[loop_counter].loc['Operating Income or Loss']).astype(int)
avg_operating_income = stat.mean(operating_income)
#print(avg_operating_income)
# Total Operating Expenses
total_operating_expenses = (income_data[loop_counter].loc['Total Operating Expenses'].astype(int))
avg_total_operating_expenses = stat.mean(total_operating_expenses)
#print(avg_total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
#print(ebit)
## Calculations ##
opm = (avg_operating_income) / (avg_total_revenue)
#print(opm)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
#print(roce)
leverage = (avg_total_liabilities) / (avg_total_assets)
#print(leverage)
#print("Leverage: " + str(round(leverage,2)))
#print("OPM: " + str(round(opm*100,2)) + "%")
#print("ROCE: " + str(round(roce*100,2)) + "%")
## Save to file ##
#print(leverage)
#print(opm)
#print(roce)
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("We have a match!")
outfile = open("results.txt", "a")
outfile.write(stock_list[loop_counter])
outfile.write("\n")
outfile.close()
Any clues to what might be the problem??
Update #2 Code:
import pandas as pd
import statistics as stat
from yahooquery import *
# Ticker input here
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#for stock in stock_list:
tickers = Ticker(stock_list)
# Get balance sheet
for stock in stock_list:
#print(stock)
bs = tickers.balance_sheet()
bs = pd.DataFrame(bs)
bs = bs.set_index('endDate')
#print(bs)
## Balance sheet variables to extract ##
# Total Assets
total_assets = bs['totalAssets']
avg_total_assets = stat.mean(total_assets)
# Total Current Liabilities
total_current_liabilities = bs['totalCurrentLiabilities']
avg_total_current_liabilities = stat.mean(total_current_liabilities)
# Total Liabilities
total_liabilities = bs['totalLiab']
avg_total_liabilities = stat.mean(total_liabilities)
## Get income statement ##
inst = tickers.income_statement()
inst = pd.DataFrame(inst)
inst = inst.set_index('endDate')
## Income statement variables to extract ##
# Total Revenue#
total_revenue = inst['totalRevenue']
avg_total_revenue = stat.mean(total_revenue)
# Operating Income
operating_income = inst['operatingIncome']
avg_operating_income = stat.mean(operating_income)
# Total Operating Expenses
total_operating_expenses = inst['totalOperatingExpenses']
avg_total_operating_expenses = stat.mean(total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
## Parameters ##
opm = (avg_operating_income) / (avg_total_revenue)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
leverage = (avg_total_liabilities) / (avg_total_assets)
## Save to file ##
#print("Hello!")
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("Hello")
outfile = open("yahoo_query_results.txt", "w+")
outfile.write(stock)
outfile.write("\n")
outfile.close()