When I run it, it keeps telling me the dataframe object is not callable.
class OptionDataWebGleaner():
def __init__(self):
ticker = pd.read_csv('Yahoo_ticker_List.csv')['AUB.AX'].values
stock = raw_input('Please give the ticker of your selected option?\n')
if stock in ticker:
self.stock = stock
else:
raise TypeError('Your option is not available here.')
date_norm = raw_input('Please give your maturity date in the format of mm/dd/yyyy\n')
maturity_date = datetime.strptime(date_norm, '%m/%d/%Y').date()
self.maturity_date = maturity_date
self.today = date.today()
dates = ['1481846400', '1484870400', '1487289600']
maturity_dates = [date(2016, 12, 16), date(2017, 1, 20), date(2017, 2, 17)]
date_dict = {}
for v in zip(dates, maturity_dates):
date_dict[v[1]] = v[0]
try:
self.d = date_dict[self.maturity_date]
except:
print('Your maturuity date is not available')
option = raw_input('Please give the type of your option, either call or put\n')
self.option_type = option + 's'
#property
def crawl_data(self): # self #option_type: calls or puts. str
stock = self.stock
option_type = self.option_type
maturity_date = self.maturity_date
d = self.d
chromedriver = "/Users/Miya/Downloads/chromedriver.exe"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
today = self.today
## Get the url
url = 'http://finance.yahoo.com/quote/' + stock + '/options?date=' + d
## Crawl data
driver.get(url)
html_source = driver.page_source
## Beautifulsoup
soup = BeautifulSoup(html_source, 'html.parser')
if soup.find('table', option_type) is not None:
stock_price = [float(i.text) for i in soup.findAll('span', 'Fz(36px)')]
title = [i.text for i in soup.find('table', option_type).find_all('th')]
text = [i.text for i in soup.find('table', option_type).find_all('td')]
rows = [row for row in soup.find('table', option_type).find_all("tr")]
l_table = len(rows) - 1
## call/put data
dictionary = {}
dictionary['maturity_date'] = [maturity_date] * l_table
dictionary['date'] = [today] * l_table
dictionary['stock_price'] = stock_price * l_table
for j in range(10):
key = title[j]
dictionary[key] = []
for i in range(l_table):
dictionary[key].append(text[10 * i + j])
## write into dataframe
dataframe = pd.DataFrame(dictionary)
return dataframe
def clean_data(self):
dataframe = self.crawl_data()
print('Remove unexpected symbols...')
columns_to_set = ['Last Price', 'Open Interest', 'Strike', 'Volume', 'Implied Volatility']
for i in columns_to_set:
series = dataframe[i]
series_new = []
for j in series:
j = str(j)
j_new = ''.join(ch for ch in j if (ch != '%') and (ch != ','))
series_new.append(j_new)
dataframe[i] = series_new
print('Change the data type...')
## change the dtype
columns_to_change = ['Last Price', 'Open Interest', 'Strike', 'Volume', 'stock_price', 'Implied Volatility']
for i in columns_to_change:
dataframe_cleaned[i] = dataframe[i].astype(float)
print("Remove missing values...")
dataframe_cleaned = dataframe_cleaned.dropna()
# print("Clean Outliers...")
# dataframe = dataframe.loc[dataframe['Implied Volatility'] <= 2]
return dataframe_cleaned
def save_file(self):
save_file = raw_input("Do you want to save the file into csv? Type Y for yes, N or no\n ")
d = self.d
stock = self.stock
df_option = self.clean_data()
if save_file == 'Y':
csv_name = stock + d + '.csv'
df_option.to_csv(csv_name)
print("File Saved!")
def viz(self):
dataframe = self.clean_data()
stock = self.stock
time_to_maturity = []
dataframe = dataframe.sort_values(by='Strike')
## grab dataframe, then relevant data
for i, j in zip(dataframe.maturity_date, dataframe.date):
time_to_maturity.append((i - j).days / 365)
strike_price = dataframe['Strike']
# generate pseudo-implied volatility by using strike price and time-to-maturity as parameters
implied_vol = dataframe['Implied Volatility'].values
strike_price, time_to_maturity = np.meshgrid(strike_price, time_to_maturity)
fig = plot.figure(figsize=(10, 5)) ## a plot object
ax = Axes3D(fig) # create a 3D object/handle
##plot surface: array row/column stride(step size:2)
##plot surface: array row/column stride(step size:2)
surf = ax.plot_surface(strike_price, time_to_maturity, implied_vol, rstride=2, cstride=2, cmap=cm.coolwarm,
linewidth=0.5, antialiased=False)
# set x,y,a labels
ax.set_xlabel('Strike Price')
ax.set_ylabel('time to maturity')
ax.set_zlabel('implied volatility%')
plot.suptitle(stock)
plot.show()
def summary(self):
dataframe = self.clean_data
print(dataframe.describe())
OptionDataWebGleaner().viz()
The problem is the property decorator on crawl_data. This answer explains how the property decorator actually works, but basically, dataframe.crawl_data is the dataframe returned by the function, not the function. So dataframe.crawl_data() in the first line of clean_data is trying to call the dataframe, not the function.
Here's an example:
>>> class Test(object):
... #property
... def example(self):
... return 1
...
>>> t = Test()
>>> t.example
1
>>> t.example()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'int' object is not callable
This question really could have done with the stacktrace. It would have lead us right to line with the problematic call.
Related
This works if I remove the schedule but if i leave it in i receive a key error for 'Symbol'
def tweet_and_archive(sl):
ticker_l = []
name_l = []
price_l = []
price_out_l = []
date_time = []
for index, row in sl.iterrows():
Stock = row['Symbol']
Price = row['Price']
Price_out = row['Price Out']
name_ = row['Name']
Date_ = row['DateTime']
if ...
schedule.every().monday.at('12:31').do(lambda: tweet_and_archive(short_list))
while True:
schedule.run_pending()
time.sleep(1)
This is the short_list dataframe:
Symbol Name Price % Change Price Out DateTime
0 ANGPY Anglo American Platinum Limited 25.82 7.14 NaN 28/02/2022
I am trying to scrape the "PRINCIPAL STOCKHOLDERS" table from the linktext fileand convert it to a csv file. Right now I am only half successful. Namely, I can locate the table and parse it but somehow I cannot convert the text table to a standard one. My code is attached. Can someone help me with it?
url = r'https://www.sec.gov/Archives/edgar/data/1034239/0000950124-97-003372.txt'
# Different approach, the first approach does not work
filing_url = requests.get(url)
content = filing_url.text
splited_data = content.split('\n')
table_title = 'PRINCIPAL STOCKHOLDERS'
END_TABLE_LINE = '- ------------------------'
def find_no_line_start_table(table_title,splited_data):
found_no_lines = []
for index, line in enumerate(splited_data):
if table_title in line:
found_no_lines.append(index)
return found_no_lines
table_start = find_no_line_start_table(table_title,splited_data)
# I need help with locating the table. If I locate the table use the above function, it will return two locations and I have to manually choose the correct one.
table_start = table_start[1]
def get_start_data_table(table_start, splited_data):
for index, row in enumerate(splited_data[table_start:]):
if '<C>' in row:
return table_start + index
def get_end_table(start_table_data, splited_data ):
for index, row in enumerate(splited_data[start_table_data:]):
if END_TABLE_LINE in row:
return start_table_data + index
def row(l):
l = l.split()
number_columns = 8
if len(l) >= number_columns:
data_row = [''] * number_columns
first_column_done = False
index = 0
for w in l:
if not first_column_done:
data_row[0] = ' '.join([data_row[0], w])
if ':' in w:
first_column_done = True
else:
index += 1
data_row[index] = w
return data_row
start_line = get_start_data_table(table_start, splited_data)
end_line = get_end_table(start_line, splited_data)
table = splited_data[start_line : end_line]
# I also need help with convert the text table to a CSV file, somehow the following function does not #recognize my column.
def take_table(table):
owner = []
Num_share = []
middle = []
middle_1 = []
middle_2 = []
middle_3 = []
prior_offering = []
after_offering = []
for r in table:
data_row = row(r)
if data_row:
col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8 = data_row
owner.append(col_1)
Num_share.append(col_2)
middle.append(col_3)
middle_1.append(col_4)
middle_2.append(col_5)
middle_3.append(col_6)
prior_offering.append(col_7)
after_offering.append(col_8)
table_data = {'owner': owner, 'Num_share': Num_share, 'middle': middle, 'middle_1': middle_1,
'middle_2': middle_2, 'middle_3': middle_3, 'prior_offering': prior_offering,
'after_offering': after_offering}
return table_data
#print (table)
dict_table = take_table(table)
a = pd.DataFrame(dict_table)
a.to_csv('trail.csv')
I think what you need to do is
pd.DataFrame.from_dict(dict_table)
instead of
pd.DataFrame(dict_table)
I am learning Python and right now I am trying to examine percent change in stock values from a database. However, one of the variables I am trying to examine is coming from a database which is of type Series. And whenever I try to convert into a float to use it for multiplication and division, I receive an error "TypeError: cannot convert the series to ". I have seen solutions that stated to use .astype(float), but that didn't work for me. Any help would be appreciated.
import pandas as pd
import os
import time
from datetime import datetime
path = "C:/Users/andre/AppData/Local/Programs/Python/Python37/SciKit-learn Tutorial/intraQuarter"
def Key_Stats(gather = "Total Debt/Equity (mrq)"):
statspath = path + '/_KeyStats'
stock_list = [x[0] for x in os.walk(statspath)]
counter = 0
df = pd.DataFrame(columns = ['Date','Unix','Folder','DE Ratio','Price',
'Stock_pct_change','SP500','SP500_pct_change', 'Difference'])
sp500_df = pd.read_csv("YAHOO-INDEX_GSPC.csv")
ticker_list = []
for each_dir in stock_list[1:]:
each_file = os.listdir(each_dir)
folder = each_dir.split("\\")[1]
ticker_list.append(folder)
#Reset starting point for each directory
starting_stock_value = False
starting_sp500_value = False
if len(each_file) > 0:
for file in each_file:
date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
unix_time = time.mktime(date_stamp.timetuple())
full_file_path = each_dir + '/' + file
file_content_source = open(full_file_path, 'r').read()
try:
value = file_content_source.split(gather)[1].split('<td class="yfnc_tabledata1">')[1].split('</td>')[0]
try:
sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
row = sp500_df[(sp500_df['Date'] == sp500_date)]
sp500_value = row['Adj Close']
print(type(sp500_value))
print(sp500_value)
except:
sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
row = sp500_df[(sp500_df['Date'] == sp500_date)]
sp500_value = row['Adj Close']
try:
stock_price = file_content_source.split('</small><big><b>')[1].split('</b></big>')[0]
if(stock_price.endswith('</span>')):
stock_price = stock_price.split('>')[1].split('</span')[0]
except IndexError:
try:
stock_price = file_content_source.split('</small><big><b>')[1].split('</span>')[0].split('>')[1]
except IndexError:
try:
stock_price = file_content_source.split('<span id="yfs_')
seglist = [] #Created a list to store all the possible numbers that arise
for parts in stock_price:
segment = parts.split('</span>')[0].split('>')[1]
try:
#The numbers are usually 4,5, or 6 characters in length and check if str is a number
if((len(segment) == 4 or len(segment) == 5 or len(segment) == 6) and float(segment) >= 0):
seglist.append(segment) #Add potential number to list
stock_price = seglist[0] #Noticed the first number is usually the correct one
except ValueError:
pass
except IndexError:
print('Error in Folder:', folder, ' File: ', file, ' Stock Price=', stock_price)
#print('Folder:', folder, ' File', file, ' Stock Price: ', stock_price)
if not starting_stock_value:
starting_stock_value = float(stock_price)
if not starting_sp500_value:
starting_sp500_value = float(sp500_value)
#percentage change = (new-old)/old x 100
stock_pct_change = ((float(stock_price) - starting_stock_value) / starting_stock_value) * 100
#-------------------------------------------------------------------------------
#ERROR OCCURS HERE!!!!
sp500_pct_change = ((float(sp500_value) - starting_sp500_value) / starting_sp500_value) * 100
#-------------------------------------------------------------------------------
df = df.append({'Date': date_stamp,'Unix': unix_time,
'Folder': folder,'DE Ratio': value,
'Price': stock_price,
'Stock_pct_change': stock_pct_change,
'SP500': sp500_value,
'SP500_pct_change': sp500_pct_change,
'Difference': stock_pct_change-sp500_pct_change},
ignore_index = True)
except IndexError:
stock_price = file_content_source.split('<span id="yfs_')[5].split('</span>')[0].split('>')[1]
print('Error in Folder:', folder, ' File: ', file, "Value=", value, 'Stock Price=', stock_price)
#Plot
for each_ticker in ticker_list:
try:
plot_df = df[(df['Folder'] == each_ticker)]
plot_df = plot_df.set_index(['Date'])
plot_df['Difference'].plot(label = each_ticker)
plt.legend()
except:
pass
plt.show()
Key_Stats()
Error:
<class 'pandas.core.series.Series'>
2997 1131.130005
Name: Adj Close, dtype: float64
<class 'pandas.core.series.Series'>
2947 1129.439941
Name: Adj Close, dtype: float64
<class 'pandas.core.series.Series'>
2778 1198.680054
Name: Adj Close, dtype: float64
<class 'pandas.core.series.Series'>
Series([], Name: Adj Close, dtype: float64)
Traceback (most recent call last):
File "C:\Users\andre\AppData\Local\Programs\Python\Python37\SciKit-learn Tutorial\Tutorial 6 - Playing with the Data (pct_change).py", line 103, in <module>
Key_Stats()
File "C:\Users\andre\AppData\Local\Programs\Python\Python37\SciKit-learn Tutorial\Tutorial 6 - Playing with the Data (pct_change).py", line 83, in Key_Stats
sp500_pct_change = ((float(sp500_value) - starting_sp500_value) / starting_sp500_value) * 100
File "C:\Users\andre\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\series.py", line 93, in wrapper
"{0}".format(str(converter)))
TypeError: cannot convert the series to <class 'float'>
I guess we are working on the same project and on the same path. Here it is. I hope u understand where to insert this portion of the code
try:
sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
row = sp500_df[sp500_df["Date"] == sp500_date]
sp500_value = row["Adj Close"]
sp500_value1 = sp500_value.values[0]
print(sp500_value1)
except:
sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
row = sp500_df[sp500_df["Date"] == sp500_date]
sp500_value = row["Adj Close"]
sp500_value1 = sp500_value.values[0]
print(sp500_value1)
I have developed a code in Python in which -in order to run the program- I need to take some arguments from the command line. But I am getting continuously the same error:
Traceback (most recent call last):
File "<string>", line 1, in <fragment>
invalid syntax: <string>, line 1, pos 16
I have the faintest idea what is wrong with my code. So, I present my code below in case someone could help me:
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkutil.DataAccess as da
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import math
import copy
import QSTK.qstkstudy.EventProfiler as ep
import csv
import sys
import argparse
def readData(li_startDate, li_endDate, ls_symbols):
#Create datetime objects for Start and End dates (STL)
dt_start = dt.datetime(li_startDate[0], li_startDate[1], li_startDate[2])
dt_end = dt.datetime(li_endDate[0], li_endDate[1], li_endDate[2])
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
return [d_data, dt_start, dt_end, dt_timeofday, ldt_timestamps]
def marketsim(cash,orders_file,values_file):
orders = pd.read_csv(orders_file,index_col='Date',parse_dates=True,header=None)
ls_symbols = list(set(orders['X.4'].values))
df_lastrow = len(orders) - 1
dt_start = dt.datetime(orders.get_value(0, 'X.1'),orders.get_value(0, 'X.2'),orders.get_value(0, 'X.3'))
dt_end = dt.datetime(orders.get_value(df_lastrow, 'X.1'),orders.get_value(df_lastrow, 'X.2'),orders.get_value(df_lastrow, 'X.3') + 1 )
#d_data = readData(dt_start,dt_end,ls_symbols)
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
df_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
ls_symbols.append("_CASH")
trades = pd.Dataframe(index=list(ldt_timestamps[0]),columns=list(ls_symbols))
current_cash = cash
trades["_CASH"][ldt_timestamps[0]] = current_cash
current_stocks = dict()
for symb in ls_symbols:
current_stocks[symb] = 0
trades[symb][ldt_timestamps[0]] = 0
for row in orders.iterrows():
row_data = row[1]
current_date = dt.datetime(row_data['X.1'],row_data['X.2'],row_data['X.3'],16)
symb = row_data['X.4']
stock_value = d_data['close'][symb][current_date]
stock_amount = row_data['X.6']
if row_data['X.5'] == "Buy":
current_cash = current_cash - (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] + stock_amount
trades[symb][current_date] = current_stocks[symb]
else:
current_cash = current_cash + (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] - stock_amount
trades[symb][current_date] = current_stocks[symb]
#trades.fillna(method='ffill',inplace=True)
#trades.fillna(method='bfill',inplace=False)
trades.fillna(0)
#alt_cash = current_cash
#alt_cash = trades.cumsum()
value_data = pd.Dataframe(index=list(ldt_timestamps),columns=list("V"))
value_data = value_data.fillna(0)
value_data = value_data.cumsum(axis=0)
for day in ldt_timestamps:
value = 0
for sym in ls_symbols:
if sym == "_CASH":
value = value + trades[sym][day]
else:
value = calue + trades[sym][day]*d_data['close'][sym][day]
value_data["V"][day] = value
fileout = open(values_file,"w")
for row in value_data.iterrows():
file_out.writelines(str(row[0].strftime('%Y,%m,%d')) + ", " + str(row[1]["V"].round()) + "\n" )
fileout.close()
def main(argv):
if len(sys.argv) != 3:
print "Invalid arguments for marketsim.py. It should be of the following syntax: marketsim.py orders_file.csv values_file.csv"
sys.exit(0)
#initial_cash = int (sys.argv[1])
initial_cash = 1000000
ordersFile = str(sys.argv[1])
valuesFile = str(sys.argv[2])
marketsim(initial_cash,ordersFile,valuesFile)
if __name__ == "__main__":
main(sys.argv[1:])
The input I gave to the command line was:
python marketsim.py orders.csv values.csv
I guess that the problem lies either into the imports or probably into the main function(incl. the if below the def main(argv)
I have to point out that the files orders.csv and values.csv exist and are located into the same folder.
I hope have made everything clear.
So, I am looking forward to reading your answers community-mates! :D
Thank you!
I am getting type error for the following code. The output of the code should be the graph on this page.http://www.realclearpolitics.com/epolls/other/president_obama_job_approval-1044.html . When I run the code, the error is shown in this part of the code.
reduce_the_data = new_take_page[new_colors.keys()].sum(axis=1)/100
def get_poll_data(poll_id):
url = "http://charts.realclearpolitics.com/charts/%i.xml" %int(poll_id)
return requests.get(url).text # is used to get the text from a url
def color_function(xml):
dom = web.Element(xml)
colors_dict ={}
for i in dom.by_tag('graph'):
name = i.attributes['title']
hex_colors = i.attributes['color']
colors_dict[name] = hex_colors
return colors_dict
def strip(s):
re.sub(r'[\W_]+', '', s)
def take_page(xml):
dom = web.Element(xml)
final = {}
charts_page = dom.by_tag('series')[0]
y = {i.attributes['xid']: str(i.content) for i in charts_page.by_tag('value')}
key_of_y = y.keys()
final['date'] = pd.to_datetime([y[j] for j in key_of_y])
for each_value in dom.by_tag('graph'):
title_name = each_value.attributes['title']
new_dict = {n.attributes['xid']: float(n.content)
if n.content else np.nan for n in each_value.by_tag('value')}
final[title_name] = [new_dict[k] for k in key_of_y]
finals = pd.DataFrame(final)
finals = finals.sort(columns=['date'])
return finals
def new_func(poll_id):
new_poll_id = get_poll_data(poll_id)
new_take_page= take_page(new_poll_id)
new_colors = color_function(new_poll_id)
new_take_page = new_take_page.rename(columns = {c: strip(c) for c in new_take_page.columns})
reduce_the_data = new_take_page[new_colors.keys()].sum(axis=1)/100
for x in new_colors.items():
new_take_page[x] /= reduce_the_data
for sticker, color in new_colors.items():
plt.plot(new_take_page.date, new_take_page[sticker], color = color, label= sticker)
plt.xticks(rotation= 60)
plt.legend(loc ='best')
plt.xlabel("Date")
plt.ylabel("Normalized Poll percentage")
>> new_func(1044)
>> plt.title("Polling")
TypeError: 'NoneType' object is not iterable