convert a list of dataframe into another dataframe - python

Code:
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
end1 = date.today()
start1 = end1 - timedelta(days=25)
exp_date1 = date(2022,8,25)
exp_date2 = date(2022,9,29)
# stock = ['HDFCLIFE']
stock = ['RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK',
'ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND',
'GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC']
target_stocks = []
# oi_change = []
for stock in stock:
stock_jan = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date1)
stock_feb = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date2)
delivery_per_age = get_history(symbol=stock,
start=start1,
end=end1)
symbol_s = get_history(symbol=stock,
start=start1,
end=end1)
oi_combined = pd.concat([stock_jan['Change in OI'] + stock_feb['Change in OI']])
total_oi = pd.concat([stock_jan['Open Interest']+stock_feb['Open Interest']])
delivery_vol = pd.concat([delivery_per_age['Deliverable Volume']])
# delivery_per = pd.concat([delivery_per_age['%Deliverble']*100])
na_me = pd.concat([symbol_s['Symbol']])
close = pd.concat([delivery_per_age['Close']])
df = pd.DataFrame(na_me)
df['TOTAL_OPN_INT'] = total_oi
df['OI_COMBINED'] = oi_combined
df['%_CHANGE'] = ((df['OI_COMBINED'] / df['TOTAL_OPN_INT']) * 100).__round__(2)
df['AVG_OI_COMBINED'] = df['OI_COMBINED'].rolling(5).mean()
# df['DELIVERY_VOL'] = delivery_vol
# df['AVG_DELIVERY_VOL'] = df['DELIVERY_VOL'].rolling(5).mean()
# df['DELIVERY_PER'] = delivery_per
# df['AVG_DELIVERY_%'] = df['DELIVERY_PER'].rolling(5).mean()
df['_CLOSE_PRICE_'] = close
pd.set_option('display.max_columns',8)
pd.set_option('display.width',200)
# print(df)
cond = ((df.loc[df.index[-5:-1], '%_CHANGE'].agg(min) > 0) |(df.loc[df.index[-6:-1], '%_CHANGE'].agg(min) > 0)) & (df.loc[df.index[-1], '%_CHANGE'] < 0)
if(cond):
target_stocks.append(df)
print(target_stocks)
PRODUCT:
[ Symbol TOTAL_OPN_INT OI_COMBINED %_CHANGE AVG_OI_COMBINED _CLOSE_PRICE_
Date
2022-07-19 HINDUNILVR 1015800 313200 30.83 NaN 2567.95
2022-07-20 HINDUNILVR 1617900 602100 37.21 NaN 2604.50
2022-07-21 HINDUNILVR 2355000 737100 31.30 NaN 2607.45
2022-07-22 HINDUNILVR 3671400 1316400 35.86 NaN 2640.60
2022-07-25 HINDUNILVR 5421300 1749900 32.28 943740.0 2623.60
2022-07-26 HINDUNILVR 6886200 1464900 21.27 1174080.0 2547.10
2022-07-27 HINDUNILVR 8522700 1636500 19.20 1380960.0 2581.95
2022-07-28 HINDUNILVR 10300200 1777500 17.26 1589040.0 2620.10
2022-07-29 HINDUNILVR 10250100 -50100 -0.49 1315740.0 2637.40
2022-08-01 HINDUNILVR 10237200 -12900 -0.13 963180.0 2593.00
2022-08-02 HINDUNILVR 10178700 -58500 -0.57 658500.0 2635.25
2022-08-03 HINDUNILVR 10208400 29700 0.29 337140.0 2626.35
2022-08-04 HINDUNILVR 10289700 81300 0.79 -2100.0 2627.95
2022-08-05 HINDUNILVR 10334100 44400 0.43 16800.0 2645.40
2022-08-08 HINDUNILVR 10350000 15900 0.15 22560.0 2650.35
2022-08-10 HINDUNILVR 10422900 72900 0.70 48840.0 2642.80
2022-08-11 HINDUNILVR 10432800 9900 0.09 44880.0 2613.70
2022-08-12 HINDUNILVR 10378200 -54600 -0.53 17700.0 2594.95]
Process finished with exit code 0.
Problem:
When I ran the code on 12-aug I got this output as displayed above which is a list. So how can I convert that list of target_stocks into pandas dataframe.
when I tried using df2 = pd.Dataframe(target_stocks) it is throwing an error must pass 2-d input. shape(4,18,16).

You are appending a dataframe to an empty list. This method does not work for dataframes. Instead of having target_stocks = [] make it target_stocks = pd.DataFrame() (an empty dataframe). Then change:
if(cond):
target_stocks.append(df)
to
if(cond):
target_stocks = pd.concat([target_stocks, df])
To add a blank row at the end of the dataframe if the condition is met, add the code below. This finds the length of your data frame and adds a blank row (created by placing an empty value in every column):
target_stocks.loc[len(target_stocks)]=['']*len(target_stocks.columns)
All together:
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
end1 = date.today()
start1 = end1 - timedelta(days=25)
exp_date1 = date(2022,8,25)
exp_date2 = date(2022,9,29)
# stock = ['HDFCLIFE']
stock = ['RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK',
'ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND',
'GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC']
target_stocks = pd.DataFrame()
# oi_change = []
for stock in stock:
stock_jan = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date1)
stock_feb = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date2)
delivery_per_age = get_history(symbol=stock,
start=start1,
end=end1)
symbol_s = get_history(symbol=stock,
start=start1,
end=end1)
oi_combined = pd.concat([stock_jan['Change in OI'] + stock_feb['Change in OI']])
total_oi = pd.concat([stock_jan['Open Interest']+stock_feb['Open Interest']])
delivery_vol = pd.concat([delivery_per_age['Deliverable Volume']])
# delivery_per = pd.concat([delivery_per_age['%Deliverble']*100])
na_me = pd.concat([symbol_s['Symbol']])
close = pd.concat([delivery_per_age['Close']])
df = pd.DataFrame(na_me)
df['TOTAL_OPN_INT'] = total_oi
df['OI_COMBINED'] = oi_combined
df['%_CHANGE'] = ((df['OI_COMBINED'] / df['TOTAL_OPN_INT']) * 100).__round__(2)
df['AVG_OI_COMBINED'] = df['OI_COMBINED'].rolling(5).mean()
# df['DELIVERY_VOL'] = delivery_vol
# df['AVG_DELIVERY_VOL'] = df['DELIVERY_VOL'].rolling(5).mean()
# df['DELIVERY_PER'] = delivery_per
# df['AVG_DELIVERY_%'] = df['DELIVERY_PER'].rolling(5).mean()
df['_CLOSE_PRICE_'] = close
pd.set_option('display.max_columns',8)
pd.set_option('display.width',200)
# print(df)
cond = ((df.loc[df.index[-5:-1], '%_CHANGE'].agg(min) > 0) |(df.loc[df.index[-6:-1], '%_CHANGE'].agg(min) > 0)) & (df.loc[df.index[-1], '%_CHANGE'] < 0)
if(cond):
target_stocks = pd.concat([target_stocks, df])
target_stocks.loc[len(target_stocks)]=['']*len(target_stocks.columns)
target_stocks
Output:

Related

Python algo trading pandas data clean up and call from another function

I have below code and is working fine when I am executing from console. Now I want convert the code constructor which will help to call this and get data.
from kiteconnect import KiteConnect
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
import math
import numpy as np
import talib
import json
# Setting Spyder environmental variables
pd.set_option("display.max_columns", 30)
pd.set_option("display.max_rows", 500)
pd.set_option("display.width", 1000)
with open('C:/AlgoTrade/TradeApp/config/brokerapp.json') as f:
data = json.load(f)
access_token = data['accessToken']
api = data['appKey']
kite=KiteConnect(api_key=api)
kite.set_access_token(access_token)
End_Time = datetime.now().date()
Start_Time = End_Time + relativedelta(days=-30)
# Function to calculate DATR
def DATR():
BF_DATR = pd.DataFrame(kite.historical_data(260105,Start_Time,End_Time,"day",False,True))
atr = (talib.ATR(BF_DATR["high"], BF_DATR["low"], BF_DATR["close"], timeperiod = 14)).tail(1)
atr2p = math.ceil(atr * .02)
return atr, atr2p
DATR, DATRP = DATR()
print("DATR : %2d, 2 percentage of DATR : %3d" % (DATR, DATRP))
# Find explosive candles in 5 mins range
class Zones:
def __init__(self, data):
self.data = data
def candle_type(self):
"""this column will be true if the candle is up """
self.data['candle_type'] = np.where(self.data['open'] < self.data['close'], "Bullish", "Bearish")
def body_spread(self):
"""the body of the candle"""
self.data['body'] = (self.data['close'] - self.data['open']).abs()
self.data['spread'] = self.data['high'] - self.data['low']
def is_exciting_candle(self):
self.data['candle_size'] = np.where(((self.data['body'] > (0.55*self.data['spread'])) & (self.data['body'] > DATRP)), "Exciting", "Basing")
def clean(self):
"""removes unneeded columns"""
self.data.drop(['volume','oi', 'body', 'spread'], axis=1, inplace= True)
def run_all(self):
"""run all the methods"""
self.candle_type()
self.body_spread()
self.is_exciting_candle()
self.clean()
Start_Time = End_Time + relativedelta(days=-100)
data = pd.DataFrame(kite.historical_data(260105,Start_Time,End_Time,"5minute",False,True))
data['date'] = pd.to_datetime(data['date']).apply(lambda x: x.replace(tzinfo=None))
sample = Zones(data=data)
sample.run_all()
data = sample.data
print(data)
Now I need to convert above code to constructor and call from another function. Which should return the pandas data frame.
Expected out put is, when we call the constructor from another function it should return the data.
BANKNIFTYDATA = zone.data()
print(BANKNIFTYDATA)
date open high low close candle_type candle_size
0 2022-08-04 09:15:00 38111.05 38230.55 38111.05 38166.65 Bullish Basing
1 2022-08-04 09:20:00 38165.00 38165.00 38079.90 38098.80 Bearish Exciting
2 2022-08-04 09:25:00 38099.90 38157.50 38096.75 38113.05 Bullish Basing
3 2022-08-04 09:30:00 38114.45 38141.20 38086.10 38108.85 Bearish Basing
4 2022-08-04 09:35:00 38106.10 38115.00 38051.50 38066.40 Bearish Exciting
5 2022-08-04 09:40:00 38060.65 38111.75 38054.00 38081.95 Bullish Basing
6 2022-08-04 09:45:00 38081.80 38151.40 38080.75 38133.60 Bullish Exciting
7 2022-08-04 09:50:00 38133.85 38170.20 38131.05 38161.75 Bullish Exciting
8 2022-08-04 09:55:00 38163.20 38166.40 38112.10 38129.30 Bearish Exciting
9 2022-08-04 10:00:00 38131.70 38141.15 38093.10 38117.65 Bearish Basing
I got my answer..
class NiftybankData:
def __init__(self):
End_Time = datetime.now().date()
Start_Time = End_Time + relativedelta(days=-100)
def data(self):
data = pd.DataFrame(kite.historical_data(260105,Start_Time,End_Time,"5minute",False,True))
data['date'] = pd.to_datetime(data['date']).apply(lambda x: x.replace(tzinfo=None))
data['candle_type'] = np.where(data['open'] < data['close'], "Bullish", "Bearish")
data['body'] = (data['close'] - data['open']).abs()
data['spread'] = data['high'] - data['low']
data['candle_size'] = np.where(((data['body'] > (0.55*data['spread'])) & (data['body'] > DATRP)), "Exciting", "Basing")
data.drop(['volume','oi', 'body', 'spread'], axis=1, inplace= True)
return data
NiftyBank = NiftybankData().data()
print(NiftyBank)

List index out of range error when using Pandas and Yahoo_fin

This is a modified version of a program from a tutorial that extracts data from all of the stocks in the S&P 500 and picks stocks that match the criteria you specify.
The issue is that when I run the program List index out of range [stock symbol] pops up and those stocks are skipped and aren't added to the final CSV file.
Example:
list index out of range for ABMD
list index out of range for ABT
list index out of range for ADBE
list index out of range for ADI
I'm not really sure what the issue is, I would greatly appreciate it if someone would explain it to me! Also, I am not applying any of the specifying criteria yet and am just trying to get all of the stock data into the CSV file. Make sure to create a database named stock_data if you try the program. Thanks!
My code:
import pandas_datareader as web
import pandas as pd
from yahoo_fin import stock_info as si
import datetime as dt
dow_list = si.tickers_dow()
sp_list = si.tickers_sp500()
tickers = sp_list
'''tickers = list(set(tickers))
tickers.sort()'''
start = dt.datetime.now() - dt.timedelta(days=365)
end = dt.datetime.now()
sp500_df = web.DataReader('^GSPC', 'yahoo', start, end)
sp500_df['Pct Change'] = sp500_df['Adj Close'].pct_change()
sp500_return = (sp500_df['Pct Change'] + 1).cumprod()[-1]
return_list = []
final_df = pd.DataFrame(columns=['Ticker', 'Latest_Price', 'Score', 'PE_Ratio', 'PEG_Ratio', 'SMA_150', 'SMA_200', '52_Week_Low', '52_Week_High'])
counter = 0
for ticker in tickers:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv(f'stock_data/{ticker}.csv')
df['Pct Change'] = df['Adj Close'].pct_change()
stock_return = (df['Pct Change'] + 1).cumprod()[-1]
returns_compared = round((stock_return / sp500_return), 2)
return_list.append(returns_compared)
counter += 1
if counter == 100:
break
best_performers = pd.DataFrame(list(zip(tickers, return_list)), columns=['Ticker', 'Returns Compared'])
best_performers['Score'] = best_performers['Returns Compared'].rank(pct=True) * 100
best_performers = best_performers[best_performers['Score'] >= best_performers['Score'].quantile(0)] #picks stocks in top 25 percentile
for ticker in best_performers['Ticker']:
try:
df = pd.read_csv(f'stock_data/{ticker}.csv', index_col=0)
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
peg_ratio = float(si.get_stats_valuation(ticker)[1][4])
moving_average_150 = df['SMA_150'][-1]
moving_average_200 = df['SMA_200'][-1]
low_52week = round(min(df['Low'][-(52*5):]), 2)
high_52week = round(min(df['High'][-(52 * 5):]), 2)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
condition_1 = latest_price > moving_average_150 > moving_average_200
condition_2 = latest_price >= (1.3 * low_52week)
condition_3 = latest_price >= (0.75 * high_52week)
condition_4 = pe_ratio < 25
condition_5 = peg_ratio < 2
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'PEG_Ratio': peg_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
except Exception as e:
print(f"{e} for {ticker}")
final_df.sort_values(by='Score', ascending=False)
pd.set_option('display.max_columns', 10)
print(final_df)
final_df.to_csv('final.csv')
I have done the error shooting on your behalf. As a conclusion, I see that you have not checked the contents of the acquisition of the individual indicator data.
They are being added to the dictionary format and empty data frames as they are in index and named series. I believe that is the root cause of the error.
Specifying the last data and retrieving the values
iloc is not used.
52*5 lookbacks for 253 data
In addition, when additional indicators are acquired for the acquired issue data, there are cases where they can be acquired for the same issue, and cases where they cannot. (The cause is unknown.) Therefore, it may be necessary to change the method of processing pe_ratio and peg_ratio after obtaining them in advance.
for ticker in best_performers['Ticker']:
#print(ticker)
try:
df = pd.read_csv(f'stock_data/{ticker}.csv')#, index_col=0
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1:].values[0]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
moving_average_150 = df['SMA_150'][-1:].values[0]
moving_average_200 = df['SMA_200'][-1:].values[0]
low_52week = round(min(df['Low'][-(52*1):]), 2)
high_52week = round(min(df['High'][-(52*1):]), 2)
#print(low_52week, high_52week)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
#print(score)
#print(ticker, latest_price,score,pe_ratio,moving_average_200,low_52week,high_52week)
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
#print(final_df)
except Exception as e:
print(f"{e} for {ticker}")
final_df
Ticker Latest_Price Score PE_Ratio SMA_150 SMA_200 52_Week_Low 52_Week_High
0 A 123.839996 40 31.42 147.26 150.31 123.06 126.75
1 AAP 218.250000 70 22.23 220.66 216.64 190.79 202.04
2 AAPL 165.070007 80 29.42 161.85 158.24 150.10 154.12
3 ABC 161.899994 90 21.91 132.94 129.33 132.00 137.79
4 ADBE 425.470001 10 42.46 552.19 571.99 407.94 422.38
Note
Some stocks are missing because additional indicators could not be obtained.
(tickers = sp_list[:10] tested on the first 10)

KeyError for column that exits in dataframe

This works if I remove the schedule but if i leave it in i receive a key error for 'Symbol'
def tweet_and_archive(sl):
ticker_l = []
name_l = []
price_l = []
price_out_l = []
date_time = []
for index, row in sl.iterrows():
Stock = row['Symbol']
Price = row['Price']
Price_out = row['Price Out']
name_ = row['Name']
Date_ = row['DateTime']
if ...
schedule.every().monday.at('12:31').do(lambda: tweet_and_archive(short_list))
while True:
schedule.run_pending()
time.sleep(1)
This is the short_list dataframe:
Symbol Name Price % Change Price Out DateTime
0 ANGPY Anglo American Platinum Limited 25.82 7.14 NaN 28/02/2022

Dataframe and updating a new column value in a for loop

I am trying to update a value in a dataframe using a method and a forloop. I pass the dataframe into the method and use a for loop to calculate the value I want to put into the last column.
Here is the method
def vwap2(df):
sumTpv = 0.00
sumVolume = 0
dayVwap = 0.00
for i, row in df.iterrows():
#Get all values from each row
#Find typical price
tp = (row['HIGH'] + row['LOW'] + row['CLOSE'] + row['OPEN']) / 4
tpv = tp * row['VOLUME']
sumTpv= sumTpv + tpv
sumVolume = sumVolume + row['VOLUME']
vwap = sumTpv / sumVolume
#Find VWAP
#df.assign(VWAP = vwap)
#row.assign(VWAP = vwap)
#row["VWAP"] = vwap
df.set_value(row, 'VWAP', vwap)
df = df.reindex(row = row)
df[row] = df[row].astype(float)
dayVwap = dayVwap + vwap
print('Day VWAP = ', dayVwap)
print('TPV sum = ', sumTpv)
print('Day Volume = ', sumVolume)
return df
And the Dataframe already has the column in it as I add it to it before I pass the df into the method. Like this
df["VWAP"] = ""
#do vwap calculation
df = vwap2(df)
But the values either are all the same which should not be or are not written. I tried a few things but to no success.
Updates
Here is the data that I am using, I am pulling it from Google each time:
CLOSE HIGH LOW OPEN VOLUME TP \
2018-05-10 22:30:00 97.3600 97.48 97.3000 97.460 371766 97.86375
1525991460000000000 97.2900 97.38 97.1800 97.350 116164 97.86375
1525991520000000000 97.3100 97.38 97.2700 97.270 68937 97.86375
1525991580000000000 97.3799 97.40 97.3101 97.330 46729 97.86375
1525991640000000000 97.2200 97.39 97.2200 97.365 64823 97.86375
TPV SumTPV SumVol VWAP
2018-05-10 22:30:00 3.722224e+08 1.785290e+09 18291710 97.601027
1525991460000000000 3.722224e+08 1.785290e+09 18291710 97.601027
1525991520000000000 3.722224e+08 1.785290e+09 18291710 97.601027
1525991580000000000 3.722224e+08 1.785290e+09 18291710 97.601027
1525991640000000000 3.722224e+08 1.785290e+09 18291710 97.601027
As you can see all the calculated stuff is the same.
Here is what I am using right now.
def vwap2(df):
sumTpv = 0.00
sumVolume = 0
dayVwap = 0.00
for i, row in df.iterrows():
#Get all values from each row
#Find typical price
tp = (row['HIGH'] + row['LOW'] + row['CLOSE'] + row['OPEN']) / 4
df['TP'] = tp
tpv = tp * row['VOLUME']
df['TPV'] = tpv
sumTpv= sumTpv + tpv
df['SumTPV'] = sumTpv
sumVolume = sumVolume + row['VOLUME']
df['SumVol'] = sumVolume
vwap = sumTpv / sumVolume
#Find VWAP
#row.assign(VWAP = vwap)
#row["VWAP"] = vwap
#df.set_value(row, 'VWAP', vwap)
df["VWAP"] = vwap
dayVwap = dayVwap + vwap
print('Day VWAP = ', dayVwap)
print('TPV sum = ', sumTpv)
print('Day Volume = ', sumVolume)
return df
IIUC, you don't need a loop, or even apply - you can use direct column assignment and cumsum() to get what you're looking for.
Some example data:
import numpy as np
import pandas as pd
N = 20
high = np.random.random(N)
low = np.random.random(N)
close = np.random.random(N)
opening = np.random.random(N)
volume = np.random.random(N)
data = {"HIGH":high, "LOW":low, "CLOSE":close, "OPEN":opening, "VOLUME":volume}
df = pd.DataFrame(data)
df.head()
CLOSE HIGH LOW OPEN VOLUME
0 0.848676 0.260967 0.004188 0.139342 0.931406
1 0.771065 0.356639 0.495715 0.652106 0.988217
2 0.288206 0.567776 0.023687 0.809410 0.134134
3 0.832711 0.508586 0.031569 0.120774 0.891948
4 0.857051 0.391618 0.155635 0.069054 0.628036
Assign the tp and tpv columns directly, then apply cumsum to get sumTpv and sumVolume:
df["tp"] = (df['HIGH'] + df['LOW'] + df['CLOSE'] + df['OPEN']) / 4
df["tpv"] = df.tp * df['VOLUME']
df["sumTpv"] = df.tpv.cumsum()
df["sumVolume"] = df.VOLUME.cumsum()
df["vwap"] = df.sumTpv.div(df.sumVolume)
df.head()
CLOSE HIGH LOW OPEN VOLUME tp tpv \
0 0.848676 0.260967 0.004188 0.139342 0.931406 0.313293 0.291803
1 0.771065 0.356639 0.495715 0.652106 0.988217 0.568881 0.562178
2 0.288206 0.567776 0.023687 0.809410 0.134134 0.422270 0.056641
3 0.832711 0.508586 0.031569 0.120774 0.891948 0.373410 0.333063
4 0.857051 0.391618 0.155635 0.069054 0.628036 0.368340 0.231331
sumTpv sumVolume vwap
0 0.291803 0.931406 0.313293
1 0.853982 1.919624 0.444869
2 0.910622 2.053758 0.443393
3 1.243685 2.945706 0.422203
4 1.475016 3.573742 0.412737
Update (per OP comment):
To get dayVwap as the sum of all vwap, use dayVwap = df.vwap.sum().

Retrieve ScannerSubscription results using IbPy

I'm struggling with the results of a ScannerSubscription.
For example, if I request:
qqq_id = 0
subscript = ScannerSubscription()
subscript.numberOfRows(15)
subscript.m_scanCode = 'HIGH_OPEN_GAP'
subscript.m_instrument = 'STK'
subscript.m_averageOptionVolumeAbove = ''
subscript.m_couponRateAbove = ''
subscript.m_couponRateBelow = ''
subscript.m_abovePrice = '5'
subscript.m_belowPrice = ''
subscript.m_marketCapAbove = ''
subscript.m_marketCapBelow = ''
subscript.m_aboveVolume = '100000'
subscript.m_stockTypeFilter = 'ALL'
subscript.locationCode('STK.US.MAJOR')
tws_conn.reqScannerSubscription(qqq_id, subscript)
tws_conn.reqScannerParameters()
I received a scannerData response like this:
<scannerData reqId=0, rank=0, contractDetails=<ib.ext.ContractDetails.ContractDetails object at 0x00000000036EFA58>, distance=None, benchmark=None, projection=None, legsStr=None>
etc...
But I cannot retrieve the result values, for example:
reqScannerParameters() xml result specifies <colId>390</colId> as the colId for the Gap value:
<ScanType>
<displayName>Top Close-to-Open % Gainers</displayName>
<scanCode>HIGH_OPEN_GAP</scanCode>
<instruments>STK,STOCK.NA,STOCK.EU,STOCK.HK,FUT.US,FUT.HK,FUT.EU,FUT.NA</instruments>
<absoluteColumns>false</absoluteColumns>
<Columns varName="columns">
<Column>
<colId>390</colId>
<name>Gap</name>
<display>true</display>
<section>m</section>
<displayType>DATA</displayType>
</Column>
How do I retrieve the GAP value?
Is this even possible ?
Now I'm sure you're supposed to request data after getting the contract.
import pandas as pd
scans = 15
res = pd.DataFrame(index = range(scans), columns = ['sym','open','close','calc']).fillna(0)
msgs = []
from ib.ext.Contract import Contract
from ib.opt import ibConnection, message
from ib.ext.TickType import TickType as tt
def tickPrice(msg):
global scans
if msg.field in [tt.OPEN, tt.CLOSE]:
res.loc[msg.tickerId,tt.getField(msg.field)] = msg.price
op = res.loc[msg.tickerId,'open']
cl = res.loc[msg.tickerId,'close']
if op > 0 and cl > 0 and res.loc[msg.tickerId,'calc'] == 0:
res.loc[msg.tickerId,'calc'] = ((op-cl)*100/cl)
con.cancelMktData(msg.tickerId)
scans -= 1
if scans == 0:
print(res)
con.disconnect()
def snapshot(msg):
res.loc[msg.rank,'sym'] = msg.contractDetails.m_summary.m_symbol
#tt.OPEN (14) isn't coming with snapshot
con.reqMktData(str(msg.rank), msg.contractDetails.m_summary, "", False)
def watcher(msg):
#print (msg)
msgs.append(msg)
def scanData(msg):
snapshot(msg)
def scanDataEnd(msg):
con.cancelScannerSubscription(qqq_id)
con = ibConnection(port=7497, clientId=888)
con.registerAll(watcher)
con.unregister(watcher, message.scannerData)
con.register(scanData, message.scannerData)
con.unregister(watcher, message.scannerDataEnd)
con.register(scanDataEnd, message.scannerDataEnd)
con.unregister(watcher, message.tickPrice)
con.register(tickPrice, message.tickPrice)
con.connect()
from ib.ext.ScannerSubscription import ScannerSubscription
qqq_id = 0
subscript = ScannerSubscription()
subscript.numberOfRows(15)
subscript.m_scanCode = 'HIGH_OPEN_GAP'
subscript.m_instrument = 'STK'
subscript.m_averageOptionVolumeAbove ='0'
subscript.m_abovePrice = '5'
subscript.m_aboveVolume = '100000'
con.reqScannerSubscription(qqq_id, subscript)
res at 1 pm est =
sym open close calc
0 TAC 4.95 4.25 16.470588
1 CTRP 44.80 40.99 9.294950
2 IIIN 39.26 36.58 7.326408
3 LFC 14.60 13.63 7.116654
4 ACH 11.59 10.87 6.623735
5 KALV 9.01 8.38 7.517900
6 OMER 13.25 12.75 3.921569
7 DWTI 68.00 66.50 2.255639
8 WLDN 23.75 23.43 1.365770
9 BZQ 19.67 18.73 5.018687
10 JNUG 6.55 6.43 1.866252
11 GXP PRB 50.78 49.80 1.967871
12 AU 10.85 10.59 2.455146
13 USLV 13.07 12.81 2.029664
14 CBD 16.60 16.03 3.555833
I don't know why they don't come in rank order??

Categories