Here is my python sample for scraping some finance data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates as mdates
import datetime as dt
from mplfinance.original_flavor import candlestick_ohlc
import warnings
warnings.filterwarnings("ignore")
# fix_yahoo_finance is used to fetch data
import yfinance as yf
yf.pdr_override()
# input
symbol = 'AAPL'
start = '2018-01-01'
end = '2020-12-24'
# Read data
df = yf.download(symbol,start,end)
# View Columns
df.head()
df['Absolute_Return'] = 100 * (df['Adj Close'] - df['Adj Close'].shift(1))/df['Adj Close'].shift(1)
df.head(20)
fig = plt.figure(figsize=(14,10))
ax1 = plt.subplot(2, 1, 1)
ax1.plot(df['Adj Close'])
ax1.set_title('Stock '+ symbol +' Closing Price')
ax1.set_ylabel('Price')
ax2 = plt.subplot(2, 1, 2)
ax2.plot(df['Absolute_Return'] , label='Absolute Return', color='red')
#ax2.axhline(y=0, color='blue', linestyle='--')
#ax2.axhline(y=0.5, color='darkblue')
#ax2.axhline(y=-0.5, color='darkblue')
ax2.grid()
ax2.set_ylabel('Absolute Return')
ax2.set_xlabel('Date')
ax2.legend(loc='best')
Problem is, if I put them in Jupyter notebook and run block by block, everything works fine. But when I put them in a normal .py file then run it, it stops in halfway, right at
df = yf.download(symbol,start,end)
Anyone please explain that for me?
Related
#imports
import pandas as pd
import yfinance as yf
from datetime import date, timedelta
from matplotlib import pyplot as plt
import numpy as np
plt.style.use('fivethirtyeight')
#Get Albemarle Information
ALBINFO = yf.Ticker("ALB")
# Valid options are 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y,
# 5y, 10y and ytd.
print(ALBINFO.history(period="2y"))
#Graph the Close Values
plt.figure(figsize = (12.2,4.5))
plt.plot(ALBINFO['Close'], label = 'Close')
plt.xticks(rotation = 45)
plt.title('Close Price History')
plt.xlabel('Date')
plt.ylabel('Price USD')
plt.show()
I am writing this code on Python Visual Studio Code.
I am trying to be a graphical representation of the ALB closing stock price.
However, whenever I run the code, only a blank screen shows up.
The computer states that there is a problem with the line
plt.plot(ALBINFO['Close'], label = 'Close').
Could someone help understand what is wrong with this line?
Thank you
I coded the lines above and excepted a graph of closing prices but only got a blank graph.
You need to store a dataframe(object) in order to use it. In this case, ALBINFO is overwritten:
import pandas as pd
import yfinance as yf
from datetime import date, timedelta
from matplotlib import pyplot as plt
import numpy as np
plt.style.use('fivethirtyeight')
#Get Albemarle Information
ALBINFO = yf.Ticker("ALB")
# Valid options are 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y,
# 5y, 10y and ytd.
ALBINFO = ALBINFO.history(period="2y")
#Graph the Close Values
plt.figure(figsize = (12.2,4.5))
plt.plot(ALBINFO['Close'], label = 'Close')
plt.xticks(rotation = 45)
plt.title('Close Price History')
plt.xlabel('Date')
plt.ylabel('Price USD')
plt.show()
I'm trying to plot the stock price and the earnings on the graph but for some reason I'm getting this:
Graph1
Please see my code below:
import matplotlib.pyplot as plt
import yfinance as yf
import pandas
import pandas_datareader
import matplotlib
t = yf.Ticker("T")
df1 = t.earnings
df1['Earnings'].plot(label = 'earnings', figsize = (15,7), color='green')
print(df1)
df2 = t.history(start = '2018-01-01', end = '2021-01-01', actions = False, rounding = True)
df2['Close'].plot(label = 'price', figsize = (15,7),color = 'blue')
plt.show()
Could someone help me?
Thanks in advance.
Plotting in pandas is easy to create graphs, but if you try to overlay them with time series data, as in this example, you will encounter problems. There are many approaches, but the method that I find easiest is to convert the data level to the gregorian calendar managed by matplotlib and create the graph. Finally, you can either convert it to your preferred formatting, etc., or use the automatic formatter and locator.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import yfinance as yf
import pandas as pd
t = yf.Ticker("T")
df1 = t.earnings
df1.index = pd.to_datetime(df1.index, format='%Y')
df1.index = mdates.date2num(df1.index)
ax = df1['Earnings'].plot(label='earnings', figsize=(15, 7), color='green')
df2 = t.history(start='2018-01-01', end='2021-01-01', actions=False, rounding=True)
df2.index = mdates.date2num(df2.index)
df2['Close'].plot(label='price', ax=ax,color='blue', secondary_y=True)
#ax.set_xticklabels([x.strftime('%Y-%m') for x in mdates.num2date(df2.index)][::125])
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
plt.show()
**ERROR: ValueError: object of too small depth for desired array
Main Script:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import datetime
import backtrader as bt
from backtrader.feeds import PandasData
import backtrader.analyzers as btanalyzers
plt.style.use('seaborn-colorblind')
ticker = 'TSLA'
start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2030, 12, 28)
stock = yf.download(ticker, progress=True, actions=True, start=start, end=end)
stock = stock['Adj Close']
stock = pd.DataFrame(stock)
stock.head()
stock.rename(columns = {"Adj Close": ticker}, inplace=True)
stock['returns'] = np.log(stock/stock.shift(1))
stock.dropna(inplace=True)
stock['direction'] = np.sign(stock['returns']).astype(int)
stock.head(10)
fig, ax = plt.subplots(2, 1, sharex=True, figsize=(12, 6))
ax[0].plot(stock[ticker], label = f'{ticker} Adj Close')
ax[0].set(title=f'{ticker} Closing Price', ylabel='Price')
ax[0].grid(True)
ax[0].legend()
ax[1].plot(stock['returns'], label = 'Daily Returns')
ax[1].set(title=f'{ticker} Daily Returns', ylabel='Returns')
ax[1].grid(True)
ax[1].legend()
lags= [1,2,3,4,5]
cols = []
for lag in lags:
col = f'rtn_lag{lag}'
stock[col] = stock['returns'].shift(lag)
cols.append(col)
stock.dropna(inplace=True)
stock.head(2)
def create_bins(data, bins=[0]):
global col_bin
cols_bin = []
for col in cols:
col_bin = col+'_bin'
data[col_bin] = np.digitize(data[col], bins=bin)
cols_bin.append(col_bin)
create_bins(stock)
stock.head()
print(stock)
plt.show()
The script that causes the error(function_base.py):
(Note: the script is way to long to indent so I'll just the line that makes the error; and the script comes with pycharm, so it is supposed to run right, but it didn't!)
mono = _monotonicity(bins)
I have 2 different stock charts. I like to compare the relative development in 2 different periods which are not of same length. The attached code does that correctly allthough with inefficient code.
What I can't solve is to plot to time axis for both subplots correctly over or under the respective subplot. Under the second subplot the time axis is always fully stretched alltough I used "sharex=ax3"?
The code for Jupyter notebook, Python 3.6. is below:
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.dates as mdates
from mpl_finance import candlestick2_ohlc
import pandas as pd
import pandas_datareader.data as web
import matplotlib.dates as mdates
import matplotlib.cbook as cbook
from matplotlib.dates import date2num, DayLocator, DateFormatter
import sys
import warnings
df1=pd.read_csv('/var/samba/FD/YHO/Index/^HUI.csv', parse_dates=True, index_col=0)
df2=pd.read_csv('/var/samba/FD/YHO/Index/^GSPC.csv', parse_dates=True, index_col=0)
years = mdates.YearLocator() # every year
months = mdates.MonthLocator() # every month
years_fmt = mdates.DateFormatter('%Y')
start1=datetime(2007,10, 1)
end1=datetime(2009,10, 1)
df1_1=df1[start1:end1]
df2_1=df2[start1:end1]
df1_1['pct'] = df1_1['Close'].pct_change().cumsum()
df2_1['pct'] = df2_1['Close'].pct_change().cumsum()
df1_1['Elapsed_days'] = df1_1.index-df1_1.index[0]
df2_1['Elapsed_days'] = df2_1.index-df2_1.index[0]
df1_1['DateN'] = date2num(pd.to_datetime(df1_1.index).tolist())
df2_1['DateN'] = date2num(pd.to_datetime(df2_1.index).tolist())
start2=datetime(2019,10, 1)
end2=datetime(2020,3, 18)
df1_2=df1[start2:end2]
df2_2=df2[start2:end2]
df1_2['pct'] = df1_2['Close'].pct_change().cumsum()
df2_2['pct'] = df2_2['Close'].pct_change().cumsum()
df1_2['Elapsed_days'] = df1_2.index-df1_2.index[0]
df2_2['Elapsed_days'] = df2_2.index-df2_2.index[0]
df1_2['DateN'] = date2num(pd.to_datetime(df1_2.index).tolist())
df2_2['DateN'] = date2num(pd.to_datetime(df2_2.index).tolist())
%matplotlib inline
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(15,8),dpi=300)
ax1=plt.figtext(.1,.9,'Period over period charts for HUI, SP500')
ax1=plt.subplot2grid((12,1),(1,0),rowspan=5,colspan=1)
ax2=plt.subplot2grid((12,1),(6,0),rowspan=5,colspan=1, sharex=ax1)
ax3=plt.subplot2grid((12,1),(0,0),rowspan=1,colspan=1)
ax4=plt.subplot2grid((12,1),(11,0),rowspan=1,colspan=1, sharex=ax3)
ax1.xaxis.set_major_locator(years)
ax1.xaxis.set_major_formatter(years_fmt)
ax1.xaxis.set_minor_locator(months)
ax1.plot(df1_1.Elapsed_days,df1_1['pct'], label='HUI')
ax1.plot(df2_1.Elapsed_days,df2_1['pct'], label='SP500')
ax1.legend()
ax1.grid(True)
ax1.set_title('')
ax2.plot(df1_2.Elapsed_days,df1_2['pct'], label='HUI')
ax2.plot(df2_2.Elapsed_days,df2_2['pct'], label='SP500')
ax2.legend()
ax2.grid(True)
ax2.set_title('')
ax3.plot(df1_1.index,df1_1['Dividends'])
ax3.axes.yaxis.set_visible(False)
ax3.set_frame_on(False)
ax4.plot(df2_1.index,df2_1['Dividends'])
ax4.axes.yaxis.set_visible(False)
ax4.spines['top'].set_visible(False)
ax4.spines['right'].set_visible(False)
ax4.spines['left'].set_visible(False)
This code gives plot of candlesticks with moving averages but the x-axis is in index, I need the x-axis in dates.
What changes are required?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
#date format in data-> dd-mm-yyyy
nif = pd.read_csv('data.csv')
#nif['Date'] = pd.to_datetime(nif['Date'], format='%d-%m-%Y', utc=True)
mavg = nif['Close'].ewm(span=50).mean()
mavg1 = nif['Close'].ewm(span=13).mean()
fg, ax1 = plt.subplots()
cl = candlestick2_ohlc(ax=ax1,opens=nif['Open'],highs=nif['High'],lows=nif['Low'],closes=nif['Close'],width=0.4, colorup='#77d879', colordown='#db3f3f')
mavg.plot(ax=ax1,label='50_ema')
mavg1.plot(color='k',ax=ax1, label='13_ema')
plt.legend(loc=4)
plt.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0)
plt.show()
Output:
I also had a lot of "fun" with this in the past... Here is one way of doing it using mdates:
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
ticker = 'MCD'
start = dt.date(2014, 1, 1)
#Gathering the data
data = web.DataReader(ticker, 'yahoo', start)
#Calc moving average
data['MA10'] = data['Adj Close'].rolling(window=10).mean()
data['MA60'] = data['Adj Close'].rolling(window=60).mean()
data.reset_index(inplace=True)
data['Date']=mdates.date2num(data['Date'].astype(dt.date))
#Plot candlestick chart
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = fig.add_subplot(111)
ax3 = fig.add_subplot(111)
ax1.xaxis_date()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))
ax2.plot(data.Date, data['MA10'], label='MA_10')
ax3.plot(data.Date, data['MA60'], label='MA_60')
plt.ylabel("Price")
plt.title(ticker)
ax1.grid(True)
plt.legend(loc='best')
plt.xticks(rotation=45)
candlestick_ohlc(ax1, data.values, width=0.6, colorup='g', colordown='r')
plt.show()
Output:
Hope this helps.
Simple df:
Using plotly:
import plotly.figure_factory
fig = plotly.figure_factory.create_candlestick(df.open, df.high, df.low, df.close, dates=df.ts)
fig.show()
will automatically parse the ts column to be displayed correctly on x.
Clunky workaround here, derived from other post (if i can find again, will reference). Using a pandas df, plot by index and then reference xaxis tick labels to date strings for display. Am new to python / matplotlib, and this this solution is not so flexible, but it works basically. Also using a pd index for plotting removes the blank 'weekend' daily spaces on market price data.
Matplotlib xaxis index as dates
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
from mpl_finance import candlestick_ohlc
%matplotlib notebook # for Jupyter
# Format m/d/Y,Open,High,Low,Close,Adj Close,Volume
# csv data does not include NaN, or 'weekend' lines,
# only dates from which prices are recorded
DJIA = pd.read_csv('yourFILE.csv') #Format m/d/Y,Open,High,
Low,Close,Adj Close,Volume
print(DJIA.head())
fg, ax1 = plt.subplots()
cl =candlestick2_ohlc(ax=ax1,opens=DJIA['Open'],
highs=DJIA['High'],lows=DJIA['Low'],
closes=DJIA['Close'],width=0.4, colorup='#77d879',
colordown='#db3f3f')
ax1.set_xticks(np.arange(len(DJIA)))
ax1.set_xticklabels(DJIA['Date'], fontsize=6, rotation=-90)
plt.show()