Dataframe and updating a new column value in a for loop

Dataframe and updating a new column value in a for loop - python

I am trying to update a value in a dataframe using a method and a forloop. I pass the dataframe into the method and use a for loop to calculate the value I want to put into the last column.
Here is the method
def vwap2(df):
sumTpv = 0.00
sumVolume = 0
dayVwap = 0.00
for i, row in df.iterrows():
#Get all values from each row
#Find typical price
tp = (row['HIGH'] + row['LOW'] + row['CLOSE'] + row['OPEN']) / 4
tpv = tp * row['VOLUME']
sumTpv= sumTpv + tpv
sumVolume = sumVolume + row['VOLUME']
vwap = sumTpv / sumVolume
#Find VWAP
#df.assign(VWAP = vwap)
#row.assign(VWAP = vwap)
#row["VWAP"] = vwap
df.set_value(row, 'VWAP', vwap)
df = df.reindex(row = row)
df[row] = df[row].astype(float)
dayVwap = dayVwap + vwap
print('Day VWAP = ', dayVwap)
print('TPV sum = ', sumTpv)
print('Day Volume = ', sumVolume)
return df
And the Dataframe already has the column in it as I add it to it before I pass the df into the method. Like this
df["VWAP"] = ""
#do vwap calculation
df = vwap2(df)
But the values either are all the same which should not be or are not written. I tried a few things but to no success.
Updates
Here is the data that I am using, I am pulling it from Google each time:
CLOSE HIGH LOW OPEN VOLUME TP \
2018-05-10 22:30:00 97.3600 97.48 97.3000 97.460 371766 97.86375
1525991460000000000 97.2900 97.38 97.1800 97.350 116164 97.86375
1525991520000000000 97.3100 97.38 97.2700 97.270 68937 97.86375
1525991580000000000 97.3799 97.40 97.3101 97.330 46729 97.86375
1525991640000000000 97.2200 97.39 97.2200 97.365 64823 97.86375
TPV SumTPV SumVol VWAP
2018-05-10 22:30:00 3.722224e+08 1.785290e+09 18291710 97.601027
1525991460000000000 3.722224e+08 1.785290e+09 18291710 97.601027
1525991520000000000 3.722224e+08 1.785290e+09 18291710 97.601027
1525991580000000000 3.722224e+08 1.785290e+09 18291710 97.601027
1525991640000000000 3.722224e+08 1.785290e+09 18291710 97.601027
As you can see all the calculated stuff is the same.
Here is what I am using right now.
def vwap2(df):
sumTpv = 0.00
sumVolume = 0
dayVwap = 0.00
for i, row in df.iterrows():
#Get all values from each row
#Find typical price
tp = (row['HIGH'] + row['LOW'] + row['CLOSE'] + row['OPEN']) / 4
df['TP'] = tp
tpv = tp * row['VOLUME']
df['TPV'] = tpv
sumTpv= sumTpv + tpv
df['SumTPV'] = sumTpv
sumVolume = sumVolume + row['VOLUME']
df['SumVol'] = sumVolume
vwap = sumTpv / sumVolume
#Find VWAP
#row.assign(VWAP = vwap)
#row["VWAP"] = vwap
#df.set_value(row, 'VWAP', vwap)
df["VWAP"] = vwap
dayVwap = dayVwap + vwap
print('Day VWAP = ', dayVwap)
print('TPV sum = ', sumTpv)
print('Day Volume = ', sumVolume)
return df

IIUC, you don't need a loop, or even apply - you can use direct column assignment and cumsum() to get what you're looking for.
Some example data:
import numpy as np
import pandas as pd
N = 20
high = np.random.random(N)
low = np.random.random(N)
close = np.random.random(N)
opening = np.random.random(N)
volume = np.random.random(N)
data = {"HIGH":high, "LOW":low, "CLOSE":close, "OPEN":opening, "VOLUME":volume}
df = pd.DataFrame(data)
df.head()
CLOSE HIGH LOW OPEN VOLUME
0 0.848676 0.260967 0.004188 0.139342 0.931406
1 0.771065 0.356639 0.495715 0.652106 0.988217
2 0.288206 0.567776 0.023687 0.809410 0.134134
3 0.832711 0.508586 0.031569 0.120774 0.891948
4 0.857051 0.391618 0.155635 0.069054 0.628036
Assign the tp and tpv columns directly, then apply cumsum to get sumTpv and sumVolume:
df["tp"] = (df['HIGH'] + df['LOW'] + df['CLOSE'] + df['OPEN']) / 4
df["tpv"] = df.tp * df['VOLUME']
df["sumTpv"] = df.tpv.cumsum()
df["sumVolume"] = df.VOLUME.cumsum()
df["vwap"] = df.sumTpv.div(df.sumVolume)
df.head()
CLOSE HIGH LOW OPEN VOLUME tp tpv \
0 0.848676 0.260967 0.004188 0.139342 0.931406 0.313293 0.291803
1 0.771065 0.356639 0.495715 0.652106 0.988217 0.568881 0.562178
2 0.288206 0.567776 0.023687 0.809410 0.134134 0.422270 0.056641
3 0.832711 0.508586 0.031569 0.120774 0.891948 0.373410 0.333063
4 0.857051 0.391618 0.155635 0.069054 0.628036 0.368340 0.231331
sumTpv sumVolume vwap
0 0.291803 0.931406 0.313293
1 0.853982 1.919624 0.444869
2 0.910622 2.053758 0.443393
3 1.243685 2.945706 0.422203
4 1.475016 3.573742 0.412737
Update (per OP comment):
To get dayVwap as the sum of all vwap, use dayVwap = df.vwap.sum().

Related

convert a list of dataframe into another dataframe

Code:
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
end1 = date.today()
start1 = end1 - timedelta(days=25)
exp_date1 = date(2022,8,25)
exp_date2 = date(2022,9,29)
# stock = ['HDFCLIFE']
stock = ['RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK',
'ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND',
'GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC']
target_stocks = []
# oi_change = []
for stock in stock:
stock_jan = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date1)
stock_feb = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date2)
delivery_per_age = get_history(symbol=stock,
start=start1,
end=end1)
symbol_s = get_history(symbol=stock,
start=start1,
end=end1)
oi_combined = pd.concat([stock_jan['Change in OI'] + stock_feb['Change in OI']])
total_oi = pd.concat([stock_jan['Open Interest']+stock_feb['Open Interest']])
delivery_vol = pd.concat([delivery_per_age['Deliverable Volume']])
# delivery_per = pd.concat([delivery_per_age['%Deliverble']*100])
na_me = pd.concat([symbol_s['Symbol']])
close = pd.concat([delivery_per_age['Close']])
df = pd.DataFrame(na_me)
df['TOTAL_OPN_INT'] = total_oi
df['OI_COMBINED'] = oi_combined
df['%_CHANGE'] = ((df['OI_COMBINED'] / df['TOTAL_OPN_INT']) * 100).__round__(2)
df['AVG_OI_COMBINED'] = df['OI_COMBINED'].rolling(5).mean()
# df['DELIVERY_VOL'] = delivery_vol
# df['AVG_DELIVERY_VOL'] = df['DELIVERY_VOL'].rolling(5).mean()
# df['DELIVERY_PER'] = delivery_per
# df['AVG_DELIVERY_%'] = df['DELIVERY_PER'].rolling(5).mean()
df['_CLOSE_PRICE_'] = close
pd.set_option('display.max_columns',8)
pd.set_option('display.width',200)
# print(df)
cond = ((df.loc[df.index[-5:-1], '%_CHANGE'].agg(min) > 0) |(df.loc[df.index[-6:-1], '%_CHANGE'].agg(min) > 0)) & (df.loc[df.index[-1], '%_CHANGE'] < 0)
if(cond):
target_stocks.append(df)
print(target_stocks)
PRODUCT:
[ Symbol TOTAL_OPN_INT OI_COMBINED %_CHANGE AVG_OI_COMBINED _CLOSE_PRICE_
Date
2022-07-19 HINDUNILVR 1015800 313200 30.83 NaN 2567.95
2022-07-20 HINDUNILVR 1617900 602100 37.21 NaN 2604.50
2022-07-21 HINDUNILVR 2355000 737100 31.30 NaN 2607.45
2022-07-22 HINDUNILVR 3671400 1316400 35.86 NaN 2640.60
2022-07-25 HINDUNILVR 5421300 1749900 32.28 943740.0 2623.60
2022-07-26 HINDUNILVR 6886200 1464900 21.27 1174080.0 2547.10
2022-07-27 HINDUNILVR 8522700 1636500 19.20 1380960.0 2581.95
2022-07-28 HINDUNILVR 10300200 1777500 17.26 1589040.0 2620.10
2022-07-29 HINDUNILVR 10250100 -50100 -0.49 1315740.0 2637.40
2022-08-01 HINDUNILVR 10237200 -12900 -0.13 963180.0 2593.00
2022-08-02 HINDUNILVR 10178700 -58500 -0.57 658500.0 2635.25
2022-08-03 HINDUNILVR 10208400 29700 0.29 337140.0 2626.35
2022-08-04 HINDUNILVR 10289700 81300 0.79 -2100.0 2627.95
2022-08-05 HINDUNILVR 10334100 44400 0.43 16800.0 2645.40
2022-08-08 HINDUNILVR 10350000 15900 0.15 22560.0 2650.35
2022-08-10 HINDUNILVR 10422900 72900 0.70 48840.0 2642.80
2022-08-11 HINDUNILVR 10432800 9900 0.09 44880.0 2613.70
2022-08-12 HINDUNILVR 10378200 -54600 -0.53 17700.0 2594.95]
Process finished with exit code 0.
Problem:
When I ran the code on 12-aug I got this output as displayed above which is a list. So how can I convert that list of target_stocks into pandas dataframe.
when I tried using df2 = pd.Dataframe(target_stocks) it is throwing an error must pass 2-d input. shape(4,18,16).

You are appending a dataframe to an empty list. This method does not work for dataframes. Instead of having target_stocks = [] make it target_stocks = pd.DataFrame() (an empty dataframe). Then change:
if(cond):
target_stocks.append(df)
to
if(cond):
target_stocks = pd.concat([target_stocks, df])
To add a blank row at the end of the dataframe if the condition is met, add the code below. This finds the length of your data frame and adds a blank row (created by placing an empty value in every column):
target_stocks.loc[len(target_stocks)]=['']*len(target_stocks.columns)
All together:
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
end1 = date.today()
start1 = end1 - timedelta(days=25)
exp_date1 = date(2022,8,25)
exp_date2 = date(2022,9,29)
# stock = ['HDFCLIFE']
stock = ['RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK',
'ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND',
'GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC']
target_stocks = pd.DataFrame()
# oi_change = []
for stock in stock:
stock_jan = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date1)
stock_feb = get_history(symbol=stock,
start=start1,
end=end1,
futures=True,
expiry_date=exp_date2)
delivery_per_age = get_history(symbol=stock,
start=start1,
end=end1)
symbol_s = get_history(symbol=stock,
start=start1,
end=end1)
oi_combined = pd.concat([stock_jan['Change in OI'] + stock_feb['Change in OI']])
total_oi = pd.concat([stock_jan['Open Interest']+stock_feb['Open Interest']])
delivery_vol = pd.concat([delivery_per_age['Deliverable Volume']])
# delivery_per = pd.concat([delivery_per_age['%Deliverble']*100])
na_me = pd.concat([symbol_s['Symbol']])
close = pd.concat([delivery_per_age['Close']])
df = pd.DataFrame(na_me)
df['TOTAL_OPN_INT'] = total_oi
df['OI_COMBINED'] = oi_combined
df['%_CHANGE'] = ((df['OI_COMBINED'] / df['TOTAL_OPN_INT']) * 100).__round__(2)
df['AVG_OI_COMBINED'] = df['OI_COMBINED'].rolling(5).mean()
# df['DELIVERY_VOL'] = delivery_vol
# df['AVG_DELIVERY_VOL'] = df['DELIVERY_VOL'].rolling(5).mean()
# df['DELIVERY_PER'] = delivery_per
# df['AVG_DELIVERY_%'] = df['DELIVERY_PER'].rolling(5).mean()
df['_CLOSE_PRICE_'] = close
pd.set_option('display.max_columns',8)
pd.set_option('display.width',200)
# print(df)
cond = ((df.loc[df.index[-5:-1], '%_CHANGE'].agg(min) > 0) |(df.loc[df.index[-6:-1], '%_CHANGE'].agg(min) > 0)) & (df.loc[df.index[-1], '%_CHANGE'] < 0)
if(cond):
target_stocks = pd.concat([target_stocks, df])
target_stocks.loc[len(target_stocks)]=['']*len(target_stocks.columns)
target_stocks
Output:

List index out of range error when using Pandas and Yahoo_fin

This is a modified version of a program from a tutorial that extracts data from all of the stocks in the S&P 500 and picks stocks that match the criteria you specify.
The issue is that when I run the program List index out of range [stock symbol] pops up and those stocks are skipped and aren't added to the final CSV file.
Example:
list index out of range for ABMD
list index out of range for ABT
list index out of range for ADBE
list index out of range for ADI
I'm not really sure what the issue is, I would greatly appreciate it if someone would explain it to me! Also, I am not applying any of the specifying criteria yet and am just trying to get all of the stock data into the CSV file. Make sure to create a database named stock_data if you try the program. Thanks!
My code:
import pandas_datareader as web
import pandas as pd
from yahoo_fin import stock_info as si
import datetime as dt
dow_list = si.tickers_dow()
sp_list = si.tickers_sp500()
tickers = sp_list
'''tickers = list(set(tickers))
tickers.sort()'''
start = dt.datetime.now() - dt.timedelta(days=365)
end = dt.datetime.now()
sp500_df = web.DataReader('^GSPC', 'yahoo', start, end)
sp500_df['Pct Change'] = sp500_df['Adj Close'].pct_change()
sp500_return = (sp500_df['Pct Change'] + 1).cumprod()[-1]
return_list = []
final_df = pd.DataFrame(columns=['Ticker', 'Latest_Price', 'Score', 'PE_Ratio', 'PEG_Ratio', 'SMA_150', 'SMA_200', '52_Week_Low', '52_Week_High'])
counter = 0
for ticker in tickers:
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv(f'stock_data/{ticker}.csv')
df['Pct Change'] = df['Adj Close'].pct_change()
stock_return = (df['Pct Change'] + 1).cumprod()[-1]
returns_compared = round((stock_return / sp500_return), 2)
return_list.append(returns_compared)
counter += 1
if counter == 100:
break
best_performers = pd.DataFrame(list(zip(tickers, return_list)), columns=['Ticker', 'Returns Compared'])
best_performers['Score'] = best_performers['Returns Compared'].rank(pct=True) * 100
best_performers = best_performers[best_performers['Score'] >= best_performers['Score'].quantile(0)] #picks stocks in top 25 percentile
for ticker in best_performers['Ticker']:
try:
df = pd.read_csv(f'stock_data/{ticker}.csv', index_col=0)
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
peg_ratio = float(si.get_stats_valuation(ticker)[1][4])
moving_average_150 = df['SMA_150'][-1]
moving_average_200 = df['SMA_200'][-1]
low_52week = round(min(df['Low'][-(52*5):]), 2)
high_52week = round(min(df['High'][-(52 * 5):]), 2)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
condition_1 = latest_price > moving_average_150 > moving_average_200
condition_2 = latest_price >= (1.3 * low_52week)
condition_3 = latest_price >= (0.75 * high_52week)
condition_4 = pe_ratio < 25
condition_5 = peg_ratio < 2
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'PEG_Ratio': peg_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
except Exception as e:
print(f"{e} for {ticker}")
final_df.sort_values(by='Score', ascending=False)
pd.set_option('display.max_columns', 10)
print(final_df)
final_df.to_csv('final.csv')

I have done the error shooting on your behalf. As a conclusion, I see that you have not checked the contents of the acquisition of the individual indicator data.
They are being added to the dictionary format and empty data frames as they are in index and named series. I believe that is the root cause of the error.
Specifying the last data and retrieving the values
iloc is not used.
52*5 lookbacks for 253 data
In addition, when additional indicators are acquired for the acquired issue data, there are cases where they can be acquired for the same issue, and cases where they cannot. (The cause is unknown.) Therefore, it may be necessary to change the method of processing pe_ratio and peg_ratio after obtaining them in advance.
for ticker in best_performers['Ticker']:
#print(ticker)
try:
df = pd.read_csv(f'stock_data/{ticker}.csv')#, index_col=0
moving_averages = [150, 200]
for ma in moving_averages:
df['SMA_' + str(ma)] = round(df['Adj Close'].rolling(window=ma).mean(), 2)
latest_price = df['Adj Close'][-1:].values[0]
pe_ratio = float(si.get_quote_table(ticker)['PE Ratio (TTM)'])
moving_average_150 = df['SMA_150'][-1:].values[0]
moving_average_200 = df['SMA_200'][-1:].values[0]
low_52week = round(min(df['Low'][-(52*1):]), 2)
high_52week = round(min(df['High'][-(52*1):]), 2)
#print(low_52week, high_52week)
score = round(best_performers[best_performers['Ticker'] == ticker]['Score'].tolist()[0])
#print(score)
#print(ticker, latest_price,score,pe_ratio,moving_average_200,low_52week,high_52week)
final_df = final_df.append({'Ticker': ticker,
'Latest_Price': latest_price,
'Score': score,
'PE_Ratio': pe_ratio,
'SMA_150': moving_average_150,
'SMA_200': moving_average_200,
'52_Week_Low': low_52week,
'52_Week_High': high_52week}, ignore_index=True)
#print(final_df)
except Exception as e:
print(f"{e} for {ticker}")
final_df
Ticker Latest_Price Score PE_Ratio SMA_150 SMA_200 52_Week_Low 52_Week_High
0 A 123.839996 40 31.42 147.26 150.31 123.06 126.75
1 AAP 218.250000 70 22.23 220.66 216.64 190.79 202.04
2 AAPL 165.070007 80 29.42 161.85 158.24 150.10 154.12
3 ABC 161.899994 90 21.91 132.94 129.33 132.00 137.79
4 ADBE 425.470001 10 42.46 552.19 571.99 407.94 422.38
Note
Some stocks are missing because additional indicators could not be obtained.
(tickers = sp_list[:10] tested on the first 10)

How to make stock Register with Pandas?

I have two pandas Dataframe of a Location which at some point receive at dispatch certain Item.
#Item Received 'tolo'
ID Date Challan No From Location To Location Item Quantity Remark
7 2021-09-16 124 Admin dkl-kunjakant pipe 500 hji
10 2021-09-01 345 Admin dkl-kunjakant pipe 1000 ert
#Item Dispatched 'frlo'
ID Date Challan No From Location To Location Item Quantity Remark
3 2021-09-01 236 dkl-kunjakant Dkl-deulasahi pipe 145 asa
8 2021-09-24 10 dkl-kunjakant Dkl-deulasahi pipe 50 hji
9 2021-09-23 540 dkl-kunjakant Dkl-deulasahi pipe 40 rty
My objective is to calculate closing Balance at the end of certain date.
To calculate closing balance the formula is simple
Previous day Clsoing Bal + Received - Dispatched = Current day closing balance
For this my code have to iterate through each row of dataframe and calculate closing balance of each day. my code:
frames = [tolo, frlo]
result = pd.concat(frames)
result['Date'] = pd.to_datetime(result.Date)
result.sort_values(by='Date')
main_columns = ['Date', 'Opening Stock', 'Received', 'Total', 'Dispatched', 'Closing Balance', 'Challan No', 'Remarks']
main_df = pd.DataFrame(columns=main_columns)
for col in main_df.columns:
main_df[col].values[:] = 0
main_df['Date'] = result['Date']
main_df['Challan No'] = result['Challan No']
for _, row in result.iterrows():
# print(row['c1'], row['c2'])
# main_df['Date'] = row['Date']
if row['To Location'] == godown_name:
main_df['Received'] = row['Quantity']
main_df['Remarks'] = row['To Location']
elif row['From Location'] == godown_name:
main_df['Dispatched'] = row['Quantity']
main_df['Remarks'] = row['From Location']
for _, row in main_df.iterrows():
row['Opening Stock'] = row['Closing Balance'].shift()
row['Total'] = row['Opening Stock'] + row['Received']
row['Closing Balance'] = row['Total'] - row['Dispatched']
print(main_df.head())
But by iterating through rows it throws me Attribute error in the pandas shift function.

You have a circular dependency: Opening Stock is dependent on Closing Stock, and Stock Stock is in turn dependent on Opening Stock. This is unsolvable. You have to calculate one variable another way.
Also you can replaced a lot of your code with vectorized code:. Try this:
main_df = pd.concat([tolo, frlo])
main_df['Date'] = pd.to_datetime(main_df['Date'])
main_df.sort_values('Date', inplace=True)
godown_name = 'dkl-kunjakant' # I may get this wrong
cond = main_df['To Location'] == godown_name
main_df['Received'] = np.where(cond, main_df['Quantity'], 0)
main_df['Dispatched'] = np.where(cond, 0, main_df['Quantity'])
main_df['Remarks'] = np.where(cond, main_df['To Location'], main_df['From Location'])
main_df['Closing Balance'] = (main_df['Received'] - main_df['Dispatched']).cumsum()
main_df['Opening Stock'] = main_df['Closing Balance'].shift(fill_value=0)
main_df['Total'] = main_df['Opening Stock'] + main_df['Received']

Calculating ADX indicator with pandas DF / Smoothing average issues

I've been having issues calculating ADX indicator with pandas DF in Python. Been racking my brains the past few days regarding what is going wrong. My thinking is something to do with smoothing average maybe? the last couple of lines from the results are: The ADX for the last row '2021-07-03' should be around 33.
i. date. open. high. low. close. +DI. -DM EMa. -DI. DX. ADX.
396 2021-06-30 35894.90 36088.87 34013.34 35036.58 ... 0.629251 -132.284380 -4.690877 1.309852 5.418229
397 2021-07-01 35048.78 35048.78 32720.03 33506.98 ... -4.476247 57.794871 2.098175 2.764602 5.479026
398 2021-07-02 33502.26 33971.12 32698.40 33796.99 ... -9.798669 52.972888 2.071717 1.536231 5.483165
399 2021-07-03 33831.62 34807.59 33322.38 34632.63 ... -4.374875 -37.287497 -1.544599 0.478130 5.475540
My code below:
def adx_calc(df):
for current in range(1, len(df.index)):
previous = current - 1
#True Range:
tr = max((df.loc[current,'high'] - df.loc[current,'low']), (df.loc[current,'high'] - df.loc[previous,'close']), abs((df.loc[previous, 'close'] - df.loc[current,'low'])))
df.loc[current, 'TR'] = tr
df['ATR'] = df['TR'].ewm(span = 14).mean()
# DM's:
df.loc[current, '+DM'] = df.loc[current, 'high'] - df.loc[previous, 'high']
df.loc[current, '-DM'] = df.loc[previous, 'low'] - df.loc[current, 'low']
# + DI:
df['+DM_EMA'] = pd.DataFrame.ewm(df['+DM'], span = adx_time_period).mean()
df.loc[current, '+DI'] = ((df.loc[current, '+DM_EMA'] / df.loc[current, 'ATR']) * 100)
# - DI:
df['-DM_EMA'] = pd.DataFrame.ewm(df['-DM'], span = adx_time_period).mean()
df.loc[current, '-DI'] = ((df.loc[current, '-DM_EMA'] / df.loc[current, 'ATR']) * 100)
# # DX:
df.loc[current, 'DX'] = (abs((df.loc[current, '+DI'] - df.loc[current, '-DI'])) / abs((df.loc[current, '+DI'] + df.loc[current, '-DI'])))
# # ADX:
df['ADX'] = df['DX'].rolling(window=14).mean()```

Timestamp not showing up in yfinance package in Python

I am trying to pull stock price history at 1 hour intervals through the Yahoo Finance API using the yfinance package. I run the following code.
import yfinance as yf
msft = yf.Ticker("MSFT")
df = msft.history(period = "5d", interval = "1h")
df.reset_index(inplace = True)
print(df["Date"][0])
print(df["Date"][1])
print(df["Date"][2])
I get the output
2020-04-03 00:00:00
2020-04-03 00:00:00
2020-04-03 00:00:00
Why are the timestamps all 00:00:00? The stock prices are actually at 1 hour intervals and seem correct. The dates also change correctly after 7 rows. Just the timestamps are all 0s. I can just postprocess the timestamps as I know the intervals. Just curious if I am doing something wrong here. Is this how the package is supposed to work?

Have you tried using “60m” as the interval argument? Appears there is an issue you can see here: https://github.com/ranaroussi/yfinance/issues/125

To those who are new to yfinance this is how to extract the data from the yfinance history() function in more detail.
yfinance uses a module called Pandas. The data structures returned from the yfinance API are Pandas objects.
The object returned by the history() function is a Pandas DataFrame object. They are like 2 dimensional arrays, with extras.
For DataFrame objects, there is a columns field which contains an array of column names, and an index field which contains an array of index objects applicable to the columns. The indexes are of a fixed type, and can be objects themselves. In the DataFrame object returned by the yfinance history() function, the indexes are Pandas Timestamp objects. (Pandas allows using any type for the indexes, for example plain integers or strings or other objects would be also allowed)
There is an in-depth description of Pandas datastructures here and here.
Each column in the DataFrame object is a Pandas Series object which is like a one dimensional array. The columns can be accessed by the column names from the DataFrame object. The column values in each column can be accessed using the index objects. Every column uses the same indexes. The Python array notation [ ] can be used to access the fields in the Pandas objects.
This is how to access the data:
def zeroX(n):
result = ""
if (n < 10):
result += "0"
result += str (n)
return result
def dump_Pandas_Timestamp (ts):
result = ""
result += str(ts.year) + "-" + zeroX(ts.month) + "-" + zeroX(ts.day)
#result += " " + zeroX(ts.hour) + ":" + zeroX(ts.minute) + ":" + zeroX(ts.second)
return result
def dump_Pandas_DataFrame (DF):
result = ""
for indexItem in DF.index:
ts = dump_Pandas_Timestamp (indexItem)
fields = ""
first = 1
for colname in DF.columns:
fields += ("" if first else ", ") + colname + " = " + str(DF[colname][indexItem])
first = 0
result += ts + " " + fields + "\n"
return result
msft = yf.Ticker("MSFT")
# get historical market data
hist = msft.history(period="1mo", interval="1d")
print ("hist = " + dump_Pandas_DataFrame(hist))
Output:
hist = 2020-07-08 Open = 210.07, High = 213.26, Low = 208.69, Close = 212.83, Volume = 33600000, Dividends = 0, Stock Splits = 0
2020-07-09 Open = 216.33, High = 216.38, Low = 211.47, Close = 214.32, Volume = 33121700, Dividends = 0, Stock Splits = 0
2020-07-10 Open = 213.62, High = 214.08, Low = 211.08, Close = 213.67, Volume = 26177600, Dividends = 0, Stock Splits = 0
2020-07-13 Open = 214.48, High = 215.8, Low = 206.5, Close = 207.07, Volume = 38135600, Dividends = 0, Stock Splits = 0
2020-07-14 Open = 206.13, High = 208.85, Low = 202.03, Close = 208.35, Volume = 37591800, Dividends = 0, Stock Splits = 0
2020-07-15 Open = 209.56, High = 211.33, Low = 205.03, Close = 208.04, Volume = 32179400, Dividends = 0, Stock Splits = 0
2020-07-16 Open = 205.4, High = 205.7, Low = 202.31, Close = 203.92, Volume = 29940700, Dividends = 0, Stock Splits = 0
2020-07-17 Open = 204.47, High = 205.04, Low = 201.39, Close = 202.88, Volume = 31635300, Dividends = 0, Stock Splits = 0
2020-07-20 Open = 205.0, High = 212.3, Low = 203.01, Close = 211.6, Volume = 36884800, Dividends = 0, Stock Splits = 0
2020-07-21 Open = 213.66, High = 213.94, Low = 208.03, Close = 208.75, Volume = 38105800, Dividends = 0, Stock Splits = 0
2020-07-22 Open = 209.2, High = 212.3, Low = 208.39, Close = 211.75, Volume = 49605700, Dividends = 0, Stock Splits = 0
2020-07-23 Open = 207.19, High = 210.92, Low = 202.15, Close = 202.54, Volume = 67457000, Dividends = 0, Stock Splits = 0
2020-07-24 Open = 200.42, High = 202.86, Low = 197.51, Close = 201.3, Volume = 39827000, Dividends = 0, Stock Splits = 0
2020-07-27 Open = 201.47, High = 203.97, Low = 200.86, Close = 203.85, Volume = 30160900, Dividends = 0, Stock Splits = 0
2020-07-28 Open = 203.61, High = 204.7, Low = 201.74, Close = 202.02, Volume = 23251400, Dividends = 0, Stock Splits = 0
2020-07-29 Open = 202.5, High = 204.65, Low = 202.01, Close = 204.06, Volume = 19632600, Dividends = 0, Stock Splits = 0
2020-07-30 Open = 201.0, High = 204.46, Low = 199.57, Close = 203.9, Volume = 25079600, Dividends = 0, Stock Splits = 0
2020-07-31 Open = 204.4, High = 205.1, Low = 199.01, Close = 205.01, Volume = 51248000, Dividends = 0, Stock Splits = 0
2020-08-03 Open = 211.52, High = 217.64, Low = 210.44, Close = 216.54, Volume = 78983000, Dividends = 0, Stock Splits = 0
2020-08-04 Open = 214.17, High = 214.77, Low = 210.31, Close = 213.29, Volume = 49280100, Dividends = 0, Stock Splits = 0
2020-08-05 Open = 214.9, High = 215.0, Low = 211.57, Close = 212.94, Volume = 28858600, Dividends = 0, Stock Splits = 0
2020-08-06 Open = 212.34, High = 216.37, Low = 211.55, Close = 216.35, Volume = 32656800, Dividends = 0, Stock Splits = 0
2020-08-07 Open = 214.85, High = 215.7, Low = 210.93, Close = 212.48, Volume = 27789600, Dividends = 0, Stock Splits = 0

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Dataframe and updating a new column value in a for loop - python

Related

convert a list of dataframe into another dataframe

List index out of range error when using Pandas and Yahoo_fin

How to make stock Register with Pandas?

Calculating ADX indicator with pandas DF / Smoothing average issues

Timestamp not showing up in yfinance package in Python

Categories

Resources