Data generation incomplete: Python random - python

I am trying to write a script to generate data. I am using random package for this. I execute the script and everything works fine. But when I check through the results, I found out that the script fails to generate the last 100+ rows for some reason.
Can someone suggest me why this could be happening?
from __future__ import print_function
from faker import Faker;
import random;
## Vaue declaration
population = 3;
product = 3;
years = 3;
months = 13;
days = 30;
tax= 3.5;
## Define Column Header
Column_Names = "Population_ID",";","Product_Name",";","Product_ID",";","Year",";",
"Month",";","Day","Quantity_sold",";","Sales_Price",";","Discount",
";","Actual_Sales_Price",tax;
## Function to generate sales related information
def sales_data():
for x in range(0,1):
quantity_sold = random.randint(5,20);
discount = random.choice(range(5,11));
sales_price = random.uniform(20,30);
return quantity_sold,round(sales_price,2),discount,round((sales_price)-(sales_price*discount)+(sales_price*tax));
## Format the month to quarter and return the value
def quarter(month):
if month >= 1 and month <= 3:
return "Q1";
elif month > 3 and month <= 6:
return "Q2";
elif month > 6 and month <= 9:
return "Q3";
else:
return "Q4";
## Generate product_id
def product_name():
str2 = "PROD";
sample2 = random.sample([1,2,3,4,5,6,7,8,9],5);
string_list = [];
for x in sample2:
string_list.append(str(x));
return (str2+''.join(string_list));
### Main starts here ###
result_log = open("C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv",'w')
print (Column_Names, result_log);
### Loop and Generate Data ###
for pop in range(0,population):
pop = random.randint(55000,85000);
for prod_id in range(0,product):
product_name2 = product_name();
for year in range(1,years):
for month in range(1,months):
for day in range(1,31):
a = sales_data();
rows = str(pop)+";"+product_name2+";"+str(prod_id)+";"+str(year)+";"+str(month)+";"+quarter(month)+";"+str(day)+";"+str(a[0])+";"+str(a[1])+";"+str(a[2])+";"+str(tax)+";"+str(a[3]);
print(rows,file=result_log);
#print (rows);
tax = tax+1;

You need to close a file to have the buffers flushed:
result_log.close()
Better still, use the file object as a context manager and have the with statement close it for you when the block exits:
filename = "C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv"
with result_log = open(filename, 'w'):
# code writing to result_log
Rather than manually writing strings with delimiters in between, you should really use the csv module:
import csv
# ..
column_names = (
"Population_ID", "Product_Name", "Product_ID", "Year",
"Month", "Day", "Quantity_sold", "Sales_Price", "Discount",
"Actual_Sales_Price", tax)
# ..
with result_log = open(filename, 'wb'):
writer = csv.writer(result_log, delimiter=';')
writer.writerow(column_names)
# looping
row = [pop, product_name2, prod_id, year, month, quarter(month), day,
a[0], a[1], a[2], tax, a[3]]
writer.writerow(row)

Related

Embedding python Matplotlib Graph in html using Pyscript

I've written a python program that takes some inputs and turns them into a matplotlib graph. Specifically, it displays wealth distributions by percentile for a country of the user's choosing. However, these inputs are currently given by changing variables in the program.
I want to put this code on a website, allowing users to choose any country and see the wealth distribution for that country, as well as how they compare. Essentially, I am trying to recreate this: https://wid.world/income-comparator/
The code in python is all done but I am struggling to incorporate it into an HTML file. I was trying to use pyscript but it currently loads forever and displays nothing. Would rather not rewrite it in javascript (mainly because I don't know js). My thoughts are that it has something to do with the code importing csv files from my device?
import csv
from typing import List
import matplotlib.pyplot as plt
import collections
import math
from forex_python.converter import CurrencyRates
# ---------------- #
# whether or not the graph includes the top 1 percent in the graph (makes the rest of the graph visible!)
one_percent = False # True or False
# pick which country(ies) you want to view
country = 'China' # String
# what currency should the graph use
currency_used = 'Canada' # String
# if you want to compare an income
compare_income = True # True or False
# what income do you want to compare
income = 100000 # Int
# ---------------- #
codes = {}
# get dictionary of monetary country codes
monetary_codes = {}
with open('codes-all.csv') as csv_file:
list = csv.reader(csv_file, delimiter=',')
for row in list:
if row[5] == "":
monetary_codes[row[0]] = (row[2], row[1])
# get dictionary of country names and codes for WID
with open('WID_countries.csv') as csv_file:
WID_codes = csv.reader(csv_file, delimiter=',')
next(WID_codes)
for row in WID_codes:
if len(row[0]) == 2:
if row[2] != "":
monetary_code = monetary_codes[row[1].upper()][0]
currency_name = monetary_codes[row[1].upper()][1]
codes[row[1].upper()] = (row[0], monetary_code, currency_name)
elif row[2] == "":
codes[row[1].upper()] = (row[0], 'USD', 'United States Dollar')
elif row[0][0] == 'U' and row[0][1] == 'S':
codes[row[1].upper()] = (row[0], 'USD', 'United States Dollar')
# converts user input to upper case
country = country.upper()
currency_used = currency_used.upper()
# gets conversion rate
c = CurrencyRates()
conversion_rate = c.get_rate(codes[country][1], codes[currency_used][1])
# convert money into correct currency
def convert_money(conversion_rate, value):
return float(value) * conversion_rate
# get and clean data
def get_data(country):
aptinc = {}
# cleaning the data
with open(f'country_data/WID_data_{codes[country][0]}.csv') as csv_file:
data = csv.reader(csv_file, delimiter=';')
for row in data:
# I only care about the year 2021 and the variable 'aptinc'
if 'aptinc992' in row[1] and row[3] == '2021':
# translates percentile string into a numerical value
index = 0
for i in row[2]:
# index 0 is always 'p', so we get rid of that
if index == 0:
row[2] = row[2][1:]
# each string has a p in the middle of the numbers we care about. I also only
# care about the rows which measure a single percentile
# (upper bound - lower bound <= 1)
elif i == 'p':
lb = float(row[2][:index - 1])
ub = float(row[2][index:])
# if the top one percent is being filtered out adds another requirement
if not one_percent:
if ub - lb <= 1 and ub <= 99:
row[2] = ub
else:
row[2] = 0
else:
if ub - lb <= 1:
row[2] = ub
else: row[2] = 0
index += 1
# adds wanted, cleaned data to a dictionary. Also converts all values to one currency
if row[2] != 0:
aptinc[row[2]] = convert_money(conversion_rate, row[4])
return aptinc
# find the closest percentile to an income
def closest_percentile(income, data):
closest = math.inf
percentile = float()
for i in data:
difference = income - data[i]
if abs(difference) < closest:
closest = difference
percentile = i
return percentile
# ---------------- #
unsorted_data = {}
percentiles = []
average_income = []
# gets data for the country
data = get_data(country)
for i in data:
unsorted_data[i] = data[i]
# sorts the data
sorted = collections.OrderedDict(sorted(unsorted_data.items()))
for i in sorted:
percentiles.append(i)
average_income.append(data[i])
# makes countries pretty for printing
country = country.lower()
country = country.capitalize()
# calculates where the income places against incomes from country(ies)
blurb = ""
if compare_income:
percentile = closest_percentile(income, sorted)
blurb = f"You are richer than {round(percentile)} percent of {country}'s population"
# plot this data!
plt.plot(percentiles,average_income)
plt.title(f'{country} Average Annual Income by Percentile')
plt.xlabel(f'Percentile\n{blurb}')
plt.ylabel(f'Average Annual Income of {country}({codes[currency_used][1]})')
plt.axvline(x = 99, color = 'r', label = '99th percentile', linestyle=':')
if compare_income:
plt.axvline(x = percentile, color = 'g', label = f'{income} {codes[currency_used][2]}')
plt.legend(bbox_to_anchor = (0, 1), loc = 'upper left')
plt.show()

Using user input as variables in Python

I am trying to implement a "user-friendly" portfolio optimization program in Python.
Since I am still a beginner I did not quite manage to realize it.
The only thing the program should use as input are the stock codes.
I tried to create a mwe below:
import numpy as np
import yfinance as yf
import pandas as pd
def daily_returns(price):
price = price.to_numpy()
shift_1 = price[1:]
shift_2 = price[:-1]
return (shift_1 - shift_2)/shift_1
def annual_returns(price):
price = price.to_numpy()
start = price[0]
end = price[len(price)-1]
return (end-start)/start
def adjusting(price):
adj = len(price)
diff = adj - adjvalue
if diff != 0:
price_new = price[:-diff]
else: price_new = price
return price_new
#Minimal Reproducible Example
#getting user input
names = input('Stock codes:')
names = names.split()
a = len(names)
msft = yf.Ticker(names[0])
aapl = yf.Ticker(names[1])
#import data
hist_msft = msft.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_msft = pd.DataFrame(hist_msft,columns=['Close'])
#hist_msft = hist_msft.to_numpy()
hist_aapl = aapl.history(interval='1d',start='2020-01-01',end='2020-12-31')
hist_aapl = pd.DataFrame(hist_aapl,columns=['Close'])
#hist_aapl = hist_aapl.to_numpy()
#daily returns
aapl_daily_returns = daily_returns(hist_aapl)
aapl_daily_returns = np.ravel(aapl_daily_returns)
msft_daily_returns = daily_returns(hist_msft)
msft_daily_returns = np.ravel(msft_daily_returns)
#adjusting for different trading periods
adjvalue = min(len(aapl_daily_returns),len(msft_daily_returns))
aapl_adj = adjusting(aapl_daily_returns)
msft_adj = adjusting(msft_daily_returns)
#annual returns
aapl_ann_returns = annual_returns(hist_aapl)
msft_ann_returns = annual_returns(hist_msft)
#inputs for optimization
cov_mat = np.cov([aapl_adj,msft_adj])*252
ann_returns = np.concatenate((aapl_ann_returns,msft_ann_returns))
Now I just want the code to work with a various, unknown number of inputs. I tried reading a lot about global variables or tried to figure it out with dictionaries but couldn't really achieve any progress.
I think using the for loop can solve your problem!
...
names = input('Stock codes:')
names = names.split()
for name in names:
#analyze here
#I don't know anything about stocks so I wont write anything here
...

Key Error: "None of --- are in the columns"

I wrote a script to scrape Yahoo Finance stock data using the Yahoo_Fin package
The aim of the script is to grab company financials to be able to perform some calculations. The input to the script is a txt file with a list of company ticker symbols. The output is also supposed to be a txt with only the companies that match a certain number of established criteria.
The script does occasionally work with a small txt file (20 tickers or less) however it does sometimes give me the following error (without me changing any code)
"None of ['Breakdown'] are in the columns" with Breakdown being the index column I set for the df.
I have run the script dozens of times and sometimes it works, sometimes it doesn't. Ran it in Atom and Jupyter Notebook and still have no clue what is causing the problem. I have also updated pandas and all necessary packages.
This is the code:
import pandas as pd
import statistics as stat
from yahoo_fin.stock_info import *
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#print(stock_list)
## The balance sheet df ##
balance_sheet = {ticker: get_balance_sheet(ticker)
for ticker in stock_list}
## The income statement df ##
income_statement = {ticker: get_income_statement(ticker)
for ticker in stock_list}
bs_data=[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(balance_sheet[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
bs_data.append(one_ticker)
#print(bs_data)
income_data=[]
#one_ticker =[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(income_statement[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
income_data.append(one_ticker)
#print(income_data)
## These are the balance sheet variables ##
for loop_counter in range(0,len(stock_list)):
# Total Assets
total_assets = (bs_data[loop_counter].loc['Total Assets'].astype(int))
avg_total_assets = stat.mean(total_assets)
#print(avg_total_assets)
# Total Current Liabilities
total_current_liabilities = (bs_data[loop_counter].loc['Total Current Liabilities'].astype(int))
avg_total_current_liabilities = stat.mean(total_current_liabilities)
#print(avg_total_current_liabilities)
#Total Liabilities
total_liabilities = (bs_data[loop_counter].loc['Total Liabilities'].astype(int))
avg_total_liabilities = stat.mean(total_liabilities)
#print(avg_total_liabilities)
## These are the income statement variables ##
# Total Revenue
total_revenue = (income_data[loop_counter].loc['Total Revenue']).astype(int)
avg_total_revenue = stat.mean(total_revenue)
#print(avg_total_revenue)
# Operating Income
operating_income = (income_data[loop_counter].loc['Operating Income or Loss']).astype(int)
avg_operating_income = stat.mean(operating_income)
#print(avg_operating_income)
# Total Operating Expenses
total_operating_expenses = (income_data[loop_counter].loc['Total Operating Expenses'].astype(int))
avg_total_operating_expenses = stat.mean(total_operating_expenses)
#print(avg_total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
#print(ebit)
## Calculations ##
opm = (avg_operating_income) / (avg_total_revenue)
#print(opm)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
#print(roce)
leverage = (avg_total_liabilities) / (avg_total_assets)
#print(leverage)
#print("Leverage: " + str(round(leverage,2)))
#print("OPM: " + str(round(opm*100,2)) + "%")
#print("ROCE: " + str(round(roce*100,2)) + "%")
## Save to file ##
#print(leverage)
#print(opm)
#print(roce)
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("We have a match!")
outfile = open("results.txt", "a")
outfile.write(stock_list[loop_counter])
outfile.write("\n")
outfile.close()
Any clues to what might be the problem??
Update #2 Code:
import pandas as pd
import statistics as stat
from yahooquery import *
# Ticker input here
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#for stock in stock_list:
tickers = Ticker(stock_list)
# Get balance sheet
for stock in stock_list:
#print(stock)
bs = tickers.balance_sheet()
bs = pd.DataFrame(bs)
bs = bs.set_index('endDate')
#print(bs)
## Balance sheet variables to extract ##
# Total Assets
total_assets = bs['totalAssets']
avg_total_assets = stat.mean(total_assets)
# Total Current Liabilities
total_current_liabilities = bs['totalCurrentLiabilities']
avg_total_current_liabilities = stat.mean(total_current_liabilities)
# Total Liabilities
total_liabilities = bs['totalLiab']
avg_total_liabilities = stat.mean(total_liabilities)
## Get income statement ##
inst = tickers.income_statement()
inst = pd.DataFrame(inst)
inst = inst.set_index('endDate')
## Income statement variables to extract ##
# Total Revenue#
total_revenue = inst['totalRevenue']
avg_total_revenue = stat.mean(total_revenue)
# Operating Income
operating_income = inst['operatingIncome']
avg_operating_income = stat.mean(operating_income)
# Total Operating Expenses
total_operating_expenses = inst['totalOperatingExpenses']
avg_total_operating_expenses = stat.mean(total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
## Parameters ##
opm = (avg_operating_income) / (avg_total_revenue)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
leverage = (avg_total_liabilities) / (avg_total_assets)
## Save to file ##
#print("Hello!")
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("Hello")
outfile = open("yahoo_query_results.txt", "w+")
outfile.write(stock)
outfile.write("\n")
outfile.close()

How to check limit range from csv file in python script

I am trying to fetch some values from database and need to check some lower and upper limits of a variables which are store in a text file like this and they are separated by \t. the text file looks like
Variable lower_limit upper_limit
temperature 20 40
pressure 0 100
temperature2 0 30
temperature3 20 25
and the data in database looks like
usec temperature_data temperature2_data
1456411800 25 15
1456412400 45 25
1456413000 28 19
So i start with checking first whether the variable is in the text file, if yes then i would need to check the limits of that variable. until now i am only successful in verifying the name of the variable, but i am unable to check the limits.
my code is as follow
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime as dt
import sys
import time
import datetime
import calendar
import numpy as np
import mysql.connector
import datetime
import numpy as np
import pandas as pd
import mysql.connector
from mysql.connector import errorcode
# starting day, month and year
start_day = dt.datetime(2016, 02, 25)
# total number of dates to visualize
number = 11
num_total = 11
# enter limit range
upper_limit = 250 # these are hardcode values which i want to
replace and instead of hard code , i want to
check these limits values from the text file
lower_limit = 0
# start day in epoch time format
start_time = 1456411800
# variable name and filepath
filepath = '/home/robbyy/files/limit.txt'
vari_name = 'temperature2'
# database name, user and password details and query to fetch respective data
usr = 'roby'
password = 'xxxx'
db_name = 'roby_data'
insert_query = ("SELECT usec , temperature2_data "
"FROM rob_table WHERE usec >= %s "
"AND usec <= %s")
def generate_data():
num = num_total
cnx = mysql.connector.connect(user=usr, password=password,
database=db_name)
cursor = cnx.cursor()
query = insert_query
for i in range(number):
current_start_ts = (start_time + (i*86400))
current_day = datetime.datetime.fromtimestamp(current_start_ts)
# print 'cd: ', current_day
current_end_ts = (start_time + (i*86400)) + 86399
cursor.execute(query, (current_start_ts * 1000000,
current_end_ts * 1000000))
rows = cursor.fetchall()
rows_arr = np.array(rows)
# print 'rows all here li: ', rows
with open(filepath, 'r') as f:
limit_file = f.read()
limits = {}
for line in limit_file.splitlines():
print 'line to see:', line
variable, lower, upper = line.split()
if not variable == 'Variable':
limits[variable] = {'lower': int(lower),
'upper': int(upper)}
print 'limits: ', limits
if vari_name in data:
pass
if len(rows_arr) == 0:
continue
# print 'no data is here'
else:
for item, index in rows_arr:
if index >= upper_limit or index <= lower_limit:
print 'data exceeds limit: ', index
else:
continue
# print 'data is within range: ', index
else:
print 'sorry: this variable name is invalid'
start = start_day
dates = [start + dt.timedelta(days=i) for i in range(num)]
return dates
def main():
dates = generate_data()
main()
If someone helps me or guide me how to check the lower and upper limits from the text file for the required variable instead of giving hard coded values in the script. i would be grateful
thanks
just parse the limits file and for example create a dict out of it. Something like this.
def parse_limits(file):
with open(file, 'r') as f:
limit_file = f.read()
limits = {}
for line in limit_file.splitlines():
variable, lower, upper = line.split()
if not variable == 'Variable':
limits[variable] = {'lower': int(lower),
'upper': int(upper)}
return limits
That would result in a nested dict as follows:
{
'pressure': {'upper': 100, 'lower': 0},
'temperature2': {'upper': 30, 'lower': 0},
'temperature': {'upper': 40, 'lower': 20},
'temperature3': {'upper': 25, 'lower': 20}
}
Edit:
As requested your final code might look s.th. like this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime as dt
import sys
import time
import datetime
import calendar
import numpy as np
import mysql.connector
import datetime
import numpy as np
import pandas as pd
import mysql.connector
from mysql.connector import errorcode
# starting day, month and year
start_day = dt.datetime(2016, 02, 25)
# total number of dates to visualize
number = 11
num_total = 11
# enter limit range
upper_limit = 250 # these are hardcode values which i want to
replace and instead of hard code , i want to
check these limits values from the text file
lower_limit = 0
# start day in epoch time format
start_time = 1456411800
# variable name and filepath
filepath = '/home/robbyy/files/limit.txt'
vari_name = 'temperature2'
# database name, user and password details and query to fetch respective data
usr = 'roby'
password = 'xxxx'
db_name = 'roby_data'
insert_query = ("SELECT usec , temperature2_data "
"FROM rob_table WHERE usec >= %s "
"AND usec <= %s")
def parse_limits(file):
with open(file, 'r') as f:
limit_file = f.read()
limits = {}
for line in limit_file.splitlines():
variable, lower, upper = line.split()
if not variable == 'Variable':
limits[variable] = {'lower': int(lower),
'upper': int(upper)}
return limits
limits = parse_limits(filepath)
def generate_data():
num = num_total
cnx = mysql.connector.connect(user=usr, password=password,
database=db_name)
cursor = cnx.cursor()
query = insert_query
for i in range(number):
current_start_ts = (start_time + (i*86400))
current_day = datetime.datetime.fromtimestamp(current_start_ts)
# print 'cd: ', current_day
current_end_ts = (start_time + (i*86400)) + 86399
cursor.execute(query, (current_start_ts * 1000000,
current_end_ts * 1000000))
rows = cursor.fetchall()
rows_arr = np.array(rows)
# print 'rows all here li: ', rows
print 'limits: ', limits
if vari_name in data:
if len(rows_arr) == 0:
continue
# print 'no data is here'
else:
for item, index in rows_arr:
if index >= limits[vari_name]['upper'] or
index <= limits[vari_name]['lower']:
print 'data exceeds limit: ', index
else:
continue
# print 'data is within range: ', index
else:
print 'sorry: this variable name is invalid'
start = start_day
dates = [start + dt.timedelta(days=i) for i in range(num)]
return dates
def main():
dates = generate_data()
main()

pandas does not append to df as it should of each line of iteration

I have a df that does not behave. Please help me train it!
I need for each of my iterations that goes through to the deepest nested 'if' statement (that satisfies all my requirements), to be appended do my df 'df_comp_KPIs'.
Why does this code not work?
Any ideas
import time
import urllib.request, urllib.error, urllib.parse
import pandas as pd
import csv
import urllib.request, urllib.error, urllib.parse
from bs4 import BeautifulSoup as bs4
start = time.time() # Start script timer
# Creating the df that will save my results in the Yahoo KPI iterations
#global df_comp_KPIs
df_comp_KPIs = pd.DataFrame() #columns = ('Ticker','Mark.Cap','PriceToBook','PEG5','TrailPE12Mo','DeptToEquit.')
ofInterest = ['AAN', 'ANF', 'ANCX', 'ACE', 'ATVI', 'AET', 'AGCO', 'ATSG', 'AWH', 'ALL', 'AFAM', 'ALJ']
evenBetter = []
# add some more to powers as necessary
powers = {'M': 10 ** 6, 'B': 10 ** 9, 'T': 10 ** 12}
# Convert the string from Market Cap col to float.
def stringNoToFloat(s):
try:
power = s[-1]
return float(s[:-1]) * powers[power]
except TypeError:
return s
#df.applymap(stringNoToFloat)
def yahoostats(ticker):
try:
print ('doing',ticker)
url = 'http://finance.yahoo.com/q/ks?s='+ticker
page = urllib.request.urlopen(url)
soup = bs4(page)
page.close()
# Lookup pbr and return the next 'td' tag-content
mcap = soup.find(text='Market Cap (intraday)').findNext('td').string
# print('Market Cap:', mcap)
pbr = float(soup.find(text='Price/Book (mrq):').findNext('td').string)
# print('Ticker %s, Price to book ratio: %1.2f' %(ticker, float(pbr))) # format 2 dig.
# print()
if float(pbr) < 3:
# print ('price to book ratio:',ticker,pbr)
PEG5 = float(soup.find(text='PEG Ratio (5 yr expected)').findNext('td').string)
# print('Ticker %s, PEG Ratio (5 yr expected): %1.2f' %(ticker, PEG5)) # format 2 dig.
if 0 < float(PEG5) < 3:
#print 'PEG forward 5 years',PEG5
DE = float(soup.find(text='Total Debt/Equity (mrq):').findNext('td').string)
# print('Ticker %s, Total Debt/Equity (mrq): %1.2f' %(ticker, DE)) # format 2 dig.
#
# #if 0 < float(DE) < 2:
#
PE12 = float(soup.find(text='Trailing P/E (ttm, intraday):').findNext('td').string)
# print ('Trailing PE (12mo):', PE12)
if float(PE12) < 15:
evenBetter.append(ticker)
df_comp_KPIs = df_comp_KPIs.append({'Ticker':ticker,'Mark.Cap':mcap,'PriceToBook':pbr,'PEG5':PEG5,'TrailPE12Mo':PE12,'DeptToEquit':DE}, ignore_index = True) #
df_comp_KPIs = df_comp_KPIs.sort(['PriceToBook','PEG5'], ascending=[1,1]) # , ignore_index=True
print('____________________________')
print('')
print(ticker,'meets requirements')
print('Market Cap (intraday):', mcap)
print('price to book:',pbr)
print('PEG forward 5 years',PEG5)
print('Trailing PE (12mo):',PE12)
print('Debt to Equity:',DE)
print('____________________________')
# saving ticker KPIs to csv #
df_comp_KPIs.to_csv('df_company_KPIs.csv')
except Exception as e:
print ('failed in the main loop:',str(e))
pass
return mcap, pbr, PEG5, PE12, DE
if __name__ == '__main__':
for eachticker in ofInterest:
yahoostats(eachticker)
# time.sleep(.05)
print(evenBetter)
print()
print('Company screener finished in %.1f seconds' %(time.time()-start))
# Convert string with MarketCap to float in one go on the mcap col in df
df_comp_KPIs['Mark.Cap'].applymap(stringNoToFloat)
OK. It appears I ended up with this error because of a missing global def of my df. So the code could not see the df created outside the function.
So the beffing of my function should look like this instead of the above:
def yahoostats(ticker):
global df_comp_KPIs
try:
print ('doing',ticker)
url = 'http://finance.yahoo.com/q/ks?s='+ticker
page = urllib.request.urlopen(url)
This solves the issue and I can alter, call, or do anything else with my df.
I found the answer here:
http://eli.thegreenplace.net/2011/05/15/understanding-unboundlocalerror-in-python

Categories