import pandas as pd
import pandas_datareader.data as web
from datetime import datetime
start_date = '2019-11-26'
end_date = str(datetime.now().strftime('%Y-%m-%d'))
tickers = ['IBM', 'AAPL','GOOG']
df = pd.concat([web.DataReader(ticker, 'yahoo', start_date, end_date) for ticker in tickers]).reset_index()
with pd.option_context('display.max_columns', 999):
print(df)
When I run my code, I can see only "Date High Low Open Close Volume Adj Close" values.
What I want to see is the stocks' names before the Date!
Please, help me out...
It always gives data without stocks' names so you have to add names before you concatenate data.
tickers = ['IBM', 'AAPL','GOOG']
data = []
for ticker in tickers:
df = web.DataReader(ticker, 'yahoo', start_date, end_date)
df['Name'] = ticker
data.append(df)
df = pd.concat(data).reset_index()
Related
I'm running this code to get the data below, but this is taking a lot of time to load. Is there a more optimized way to run it better? P.S. I'm working on google colab.
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import pandas_datareader.data as web
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
stocks = ['AYX', 'TEAM', 'DDOG', 'MDB', 'TDC', 'CFLT'\]
df = pd.DataFrame()
start_date = '2021-10-1'
current_date = datetime.now().strftime("%Y-%m-%d")
date_range = pd.date_range(start=start_date, end=current_date, freq='M')
dates = [date.strftime("%Y-%m-%d") for date in date_range\]
for stock in stocks:
for date in dates:
# Loop through each date
info = yf.Ticker(stock).info
info1 = yf.Ticker(stock).fast_info
NetDebt = info['totalDebt'] - info['totalCash']
marketcap = info1['market_cap']
asofDate = date
df = df.append({
'Date': asofDate,
'Stock': stock,
'NetDebt': NetDebt,
'marketcap': marketcap,
'EV': marketcap + NetDebt
}, ignore_index=True)
print(df)
I have a list of daily transactions that I am trying to plot on a line graph. I decided to group by month and year and sum those groupings. The data plots on the Plotly line graph as expected except the end dates are 30 days behind. This makes it difficult if I want to add/subtract the dates to obtain a certain date range.
To get a certain date range, I am not using the grouped dates but the original dates and applying relativedelta. How can I resolve this?
import pandas as pd
from datetime import datetime, timedelta
import plotly.express as px
import sqlite3
import numpy as np
from dateutil.relativedelta import relativedelta
data = {
'Transaction_type':[ 'Debit', 'Debit', 'Credit','Debit','Debit','Debit', 'Debit', 'Credit','Debit','Debit'],
'Amount': [40,150,1000,60,80,120, 80, 1000,500,80]
}
df = pd.DataFrame(data)
df['Date'] = pd.date_range(start='6/1/2022',end='7/30/2022', periods = len(df))
df['Date'] = pd.to_datetime(df['Date'])
df['year_month'] = df['Date'].dt.strftime('%Y-%m')
#Income Expense Visual
Income_Expense = df.copy()
Income_Expense.Transaction_type.replace(['credit'], 'Income', inplace= True) #Change to Income for line legend
Income_Expense.Transaction_type.replace(['debit'], 'Expense', inplace= True) #Change to Expense for line legend
Income_Expense = pd.pivot_table(Income_Expense, values = ['Amount'], index = ['Transaction_type', 'year_month'],aggfunc=sum).reset_index()
scatter_plot = px.line(Income_Expense, x = 'year_month', y = 'Amount', color = 'Transaction_type', title = 'Income and Expense', color_discrete_sequence= ['red','green'],
category_orders= {'Cash Flow': ['Expense', 'Income']})
scatter_plot.update_layout(legend_traceorder = 'reversed')
scatter_plot.update_layout(yaxis_tickformat = ',')
scatter_plot.show()
The reason for the error is the strftime(). This will convert your date to a string. From that point onwards, plotly thinks of each date as a string. So, the names are not as you may want it. You can do a Income_Expense.info() to check
So, you need to leave the dates in the datetime format. pandas Grouper can be used to group the dates by monthly frequency. You can then plot it and specify the date format so that plotly understands that these are dates. Below is the updated code.
Note that Date needs to be in index for grouper to work. So, first I do this by the set_index(), then use the grouper with frequency as month along Transaction type, then do a sum and reset_index. This will create a dataframe that looks like the one you had, except that these are now datetime, not strings.
import pandas as pd
from datetime import datetime, timedelta
import plotly.express as px
import sqlite3
import numpy as np
from dateutil.relativedelta import relativedelta
data = {'Transaction_type':[ 'Debit', 'Debit', 'Credit','Debit','Debit','Debit', 'Debit', 'Credit','Debit','Debit'], 'Amount': [40,150,1000,60,80,120, 80, 1000,500,80]}
df = pd.DataFrame(data)
df['Date'] = pd.date_range(start='6/1/2022',end='7/30/2022', periods = len(df))
df['Date'] = pd.to_datetime(df['Date'])
df['year_month'] = df['Date'].dt.strftime('%Y-%m')
#Income Expense Visual
Income_Expense = df.copy()
Income_Expense.Transaction_type.replace(['credit'], 'Income', inplace= True) #Change to Income for line legend
Income_Expense.Transaction_type.replace(['debit'], 'Expense', inplace= True) #Change to Expense for line legend
Income_Expense = df.set_index('Date').groupby([pd.Grouper(freq="M"), 'Transaction_type']).sum().reset_index()
scatter_plot = px.line(Income_Expense, x = 'Date', y = 'Amount', color = 'Transaction_type', title = 'Income and Expense', color_discrete_sequence= ['red','green'],
category_orders= {'Cash Flow': ['Expense', 'Income']})
scatter_plot.update_layout(legend_traceorder = 'reversed')
scatter_plot.update_layout(yaxis_tickformat = ',')
scatter_plot.update_xaxes(tickformat="%d-%b-%Y")
scatter_plot.show()
I am new to python. I tried to get data of VIX index of 1 hour from 1990-01-01 until now but i got this error. Could anyone help please?
ticker = '^VIX'
start = dt.datetime(1990, 1, 1)
for i in range(24):
end = dt.datetime(2022,1,1)
prices = web.DataReader(ticker, 'yahoo', start, end)['Close']
returns = prices.pct_change()
last_price = prices[-1]
print(prices)
start=end
KeyError: 'Date'
All problem is because you use for-loop with start = end so in second loop you try to get data from start='2022,1,1' to end='2022,1,1' and this range doesn't have any data and it makes problem.
You should run it without for-loop
import datetime as dt
import pandas_datareader.data as web
ticker = '^VIX'
start = dt.datetime(1990, 1, 1)
end = dt.datetime(2022, 5, 1) # almost today
data = web.DataReader(ticker, 'yahoo', start, end)
close_prices = data['Close']
print(close_prices)
last_price = close_prices[-1]
print('last:', last_price)
returns = data.pct_change()
print(returns)
close_returns = close_prices.pct_change()
print(close_returns)
EDIT:
DataReader doesn't have option to read 1hour data.
Module yfinance can read 1hour data but only for last 7-8 days.
import yfinance as yf
data = yf.download('^VIX', period="8d", interval='1h')
print(data)
When I pull stock data from yfinance, can I create other columns of data that manipulate the 'date' column? I am new to python and still learning a lot. I have created other columns using the stock price data, but I cannot figure out how to manipulate the 'date' column.
For example, 10/26/2020, I would like to create columns with the following data:
day_of_week, Monday = 1
year = 2020
month = 10
day = 26
week = 44
trade_day = 207
import pandas as pd
import numpy as np
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt
import matplotlib.pyplot as plt
##Get stock price data
ticker = 'NVDA'
#Data time period
now = dt.datetime.now()
startyear = 2017
startmonth=1
startday=1
start = dt.datetime(startyear, startmonth, startday)
#get data from YFinance
df = pdr.get_data_yahoo(ticker, start, now)
#create a column
df['% Change'] = (df['Adj Close'] / df['Adj Close'].shift(1))-1
df['Range'] = df['High'] - df['Low']
df
You want to use the index of your dataframe, which is of type pd.DatetimeIndex.
To split the date into new columns:
new_df = df.copy()
new_df['year'], new_df['month'], new_df['day'] = df.index.year, df.index.month, df.index.day
To carry up arithmetic operations from the first trade date:
start_date = df.index.min()
new_df['trade_day'] = df.index.day - start_date.day
new_df['trade_week'] = df.index.week - start_date.week
new_df['trade_year'] = df.index.year - start_date.year
new_df['day_of_week'] = df.index.weekday
new_df['days_in_month'] = df.index.days_in_month
new_df['day_name'] = df.index.day_name()
new_df['month_name'] = df.index.month_name()
Choose another start date
start_date = pd.to_datetime('2017-01-01')
I did figure out most of the problem. I cannot figure out how to calculate the 'trade date'.
#Convert the 'Date' Index to 'Date' Column
df.reset_index(inplace=True)
#Create columns manipulating 'Date'
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['Week of Year'] = df['Date'].dt.isocalendar().week
df['Day of Week'] = df['Date'].dt.dayofweek
I am trying to scrape the stock price of the same company on a daily basis for the next 30 days using Python. I used indexing of list and .append(), the initial value gets replaced as soon as the updated price is added. How can I make a list of the price of the same stock for 30 days?
*#Catalyst Pharmaceuticals
#New York Stack Exchange
import requests
import pytz
from bs4 import BeautifulSoup
import datetime
import csv
r=requests.get('https://robinhood.com/collections/technology')
html=r.content
soup=BeautifulSoup(html,'html.parser')
csv_file=open('Catalyst Pharmaceuticals Monthly.csv','a')
csv_writer=csv.writer(csv_file)
price_list = []
dttm = []
def websc():
global price_list
global dttm
global a_price
#i=10
for p in soup.find_all('a',{'class':'rh-hyperlink'})[2]:
a_price = p.text
dd=datetime.datetime.now(pytz.timezone("GMT"))
dd=dd.strftime("%Y-%m-%d %H:%M:%S")
price_list.append(a_price)
dttm.append(dd)
zipped = zip(price_list,dttm)
d = list(zipped)
print(d)
csv_writer.writerows(d)
csv_file.close()
websc()*
You need to open the file in append mode rather than write mode if you don't want to overwrite the file
Can't you just loop through some tickers, push everything into a dataframe, and then export that to a CSV?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.optimize as sco
import datetime as dt
import math
from datetime import datetime, timedelta
from pandas_datareader import data as wb
from sklearn.cluster import KMeans
np.random.seed(777)
start = '2020-01-01'
end = '2020-08-27'
#N = 165
#start = datetime.now() - timedelta(days=N)
#end = dt.datetime.today()
tickers = ['AAPL','MSFT','GOOG','SBUX','MCD','NKE']
thelen = len(tickers)
price_data = []
for ticker in tickers:
try:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Adj Close']])
except:
print(ticker)
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
# finally....
df.to_csv('file_name.csv')
Try that and post back if you need something else, related to this.