Visualizing Market Closing Values in Python Using MatPlotLib and YahooFinance - python

#imports
import pandas as pd
import yfinance as yf
from datetime import date, timedelta
from matplotlib import pyplot as plt
import numpy as np
plt.style.use('fivethirtyeight')
#Get Albemarle Information
ALBINFO = yf.Ticker("ALB")
# Valid options are 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y,
# 5y, 10y and ytd.
print(ALBINFO.history(period="2y"))
#Graph the Close Values
plt.figure(figsize = (12.2,4.5))
plt.plot(ALBINFO['Close'], label = 'Close')
plt.xticks(rotation = 45)
plt.title('Close Price History')
plt.xlabel('Date')
plt.ylabel('Price USD')
plt.show()
I am writing this code on Python Visual Studio Code.
I am trying to be a graphical representation of the ALB closing stock price.
However, whenever I run the code, only a blank screen shows up.
The computer states that there is a problem with the line
plt.plot(ALBINFO['Close'], label = 'Close').
Could someone help understand what is wrong with this line?
Thank you
I coded the lines above and excepted a graph of closing prices but only got a blank graph.

You need to store a dataframe(object) in order to use it. In this case, ALBINFO is overwritten:
import pandas as pd
import yfinance as yf
from datetime import date, timedelta
from matplotlib import pyplot as plt
import numpy as np
plt.style.use('fivethirtyeight')
#Get Albemarle Information
ALBINFO = yf.Ticker("ALB")
# Valid options are 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y,
# 5y, 10y and ytd.
ALBINFO = ALBINFO.history(period="2y")
#Graph the Close Values
plt.figure(figsize = (12.2,4.5))
plt.plot(ALBINFO['Close'], label = 'Close')
plt.xticks(rotation = 45)
plt.title('Close Price History')
plt.xlabel('Date')
plt.ylabel('Price USD')
plt.show()

Related

Bitcoin Chart with log scale Python

I'm using Python (beginner) and I want to plot the Bitcoin price in log scale but without seeing the log price, I want to see the linear price.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from cryptocmd import CmcScraper
from math import e
from matplotlib.ticker import ScalarFormatter
# -------------IMPORT THE DATA----------------
btc_data = CmcScraper("BTC", "28-04-2012", "27-11-2022", True, True, "USD")
# Create a Dataframe
df = btc_data.get_dataframe()
#Set the index as Date instead of numerical value
df = df.set_index(pd.DatetimeIndex(df["Date"].values))
df
#Plot the Data
plt.style.use('fivethirtyeight')
plt.figure(figsize =(20, 10))
plt.title("Bitcoin Price", fontsize=18)
plt.yscale("log")
plt.plot(df["Close"])
plt.xlabel("Date", fontsize=15)
plt.ylabel("Price", fontsize=15)
plt.show()
My output
As you can see we have log scale price but I want to see "100 - 1 000 - 10 000" instead of "10^2 - 10^3 - 10^4" on the y axis.
Does anyone know how to solve this?
Have a nice day!
Welcome to Stackoverflow!
You were getting there, the following code will yield what you want (I simply added some fake data + 1 line of code to your plotting code):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
y = [10**x for x in np.arange(0, 5, 0.1)]
x = [x for x in np.linspace(2018, 2023, len(y))]
#Plot the Data
plt.style.use('fivethirtyeight')
plt.figure(figsize =(20, 10))
plt.title("Bitcoin Price", fontsize=18)
plt.yscale("log")
plt.plot(x, y)
plt.xlabel("Date", fontsize=15)
plt.ylabel("Price", fontsize=15)
plt.gca().get_yaxis().set_major_formatter(ticker.ScalarFormatter())
plt.show()
This generates the following figure:
The fundamental lines are these:
import matplotlib.ticker as ticker
plt.gca().get_yaxis().set_major_formatter(ticker.ScalarFormatter())
Explanation: plt.gca() gets the currently active axis object. This object is the one we want to adapt. And the actual thing we want to adapt is the way our ticks get formatted for our y axis. Hence the latter part: .get_yaxis().set_major_formatter(). Now, we only need to choose which formatter. I chose ScalarFormatter, which is the default for scalars. More info on your choices can be found here.
Hope this helps!

Is there any way to show mean in box plot using Python?

I'm just starting using Matplotlib, and I'm trying to learn how to draw a box plot in Python using Colab.
My problem is: I'm not able to put the median on the graph. The graph just showed the quartiles, mean, and outliers. Can someone help me?
My code is the following.
from google.colab import auth
auth.authenticate_user()
import gspread
import numpy as np
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as pl
sns.set_theme(style="ticks", color_codes=True)
wb = gc.open_by_url('URL_JUST_FOR_EXAMPLE')
boxplot = wb.worksheet('control-Scale10to100')
boxplotData = boxplot.get_all_values()
df = pd.DataFrame(boxplotData[1:], columns=boxplotData[0])
df.drop(df.columns[0], 1, inplace=True)
df = df.apply(pd.to_numeric, errors='ignore')
df.dtypes
df.describe()
dfBoxPlotData = df.iloc[:,4:15]
dfBoxPlotData.apply(pd.to_numeric)
dfBoxPlotData.head()
props = dict(whiskers="Black", medians="Black", caps="Black")
ax = df.plot.box(rot=90, fontsize=14, figsize=(15, 8), color=props, patch_artist=True, grid=False, meanline=True, showmeans=True, meanprops=dict(color='red'))
I tried running your code with a sample data set where the mean and median are distinct, and like #tdy showed, as long as the parameters showmeans=True and meanline=True are being passed to the df.plot.box method, the mean and median should both show up. Is it possible that in your data set, the mean and median are close enough together that they're hard to distinguish?
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as pl
mu, sigma = 50., 10. # mean and standard deviation
np.random.seed(42)
s = np.random.normal(mu, sigma, 30)
df = pd.DataFrame({'values':s})
props = dict(whiskers="Black", medians="Black", caps="Black")
ax = df.plot.box(rot=90, fontsize=14, figsize=(15, 8), color=props, patch_artist=True, grid=False, meanline=True, showmeans=True, meanprops=dict(color='red'))
pl.show()

pyplot line plotting erratically

I got these 2 simple csv data but when plotting the 'mon' line gone strange toward the end.
When plotting one chart, it is fine but when the 2 charts plotted together the 'monarch' one goes strange.
Thanks in advance.
Here is the code
import pandas as pd
from matplotlib import pyplot as plt
def run_plot1():
df_ash = pd.read_csv('./data/ashburn.csv')
df_mon = pd.read_csv('./data/monarch1bed.csv')
plt.grid(True)
plt.plot(df_ash['Date'], df_ash['Ash1bed'], label='Ashburn 1 bed')
plt.plot(df_mon['Date'], df_mon['Mon1bed'], label='Monarch 1 bed')
plt.xlabel("Date")
plt.ylabel("Rate")
plt.style.use("fivethirtyeight")
plt.title("One Bed Comparison")
plt.legend()
plt.savefig('data/sample.png')
plt.tight_layout()
plt.show()
run_plot1()
and the csv datas:
Date,Ash1bed,Ash2bed,Ash3bed
08-01,306,402
22-01,181,286,349
05-02,176,281,336
19-02,188,293,369
04-03,201,306,402
18-03,209
01-04,217,394,492
15-04,209,354,455
29-04,197,302,387
13-05,205,326,414
27-05,217,362,473
10-06,390,532
08-07,415
22-07,415
05-08,415
19-08,415
15-09,290,452,594
and another :
Date,Mon1bed
08-01,230
05-02,160
19-02,160
04-03,190
18-03,190
01-04,260
15-04,260
29-04,260
13-05,300
27-05,330
10-06,330
24-06,350
08-07,350
22-07,350
05-08,350
19-08,350
02-09,350
The basic reason of erratic printout is that your Date columns
in both DataFrames are of string type.
Convert them to datetime:
df_ash.Date = pd.to_datetime(df_ash.Date, format='%d-%m')
df_mon.Date = pd.to_datetime(df_mon.Date, format='%d-%m')
But to have "reader friendly" X-axis labels, a number of additional
steps are required.
Start from necessary imports:
from pandas.plotting import register_matplotlib_converters
import matplotlib.dates as mdates
Then register matplotlib converters:
register_matplotlib_converters()
And to get proper printout, run:
fig, ax = plt.subplots() # figsize=(10, 6)
ax.grid(True)
ax.plot(df_ash['Date'], df_ash['Ash1bed'], label='Ashburn 1 bed')
ax.plot(df_mon['Date'], df_mon['Mon1bed'], label='Monarch 1 bed')
plt.xlabel("Date")
plt.ylabel("Rate")
plt.style.use("fivethirtyeight")
plt.title("One Bed Comparison")
plt.legend()
dm_fmt = mdates.DateFormatter('%d-%m')
ax.xaxis.set_major_formatter(dm_fmt)
plt.xticks(rotation=45);
For your data I got:
You should convert the date variable to a date format
df1.Date = pd.to_datetime(df1.Date, format='%d-%m')
df2.Date = pd.to_datetime(df2.Date, format='%d-%m')
plt.plot(df1.Date, df1.Ash1bed)
plt.plot(df2.Date, df2.Mon1bed)

candlestick plot from pandas dataframe, replace index by dates

This code gives plot of candlesticks with moving averages but the x-axis is in index, I need the x-axis in dates.
What changes are required?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
#date format in data-> dd-mm-yyyy
nif = pd.read_csv('data.csv')
#nif['Date'] = pd.to_datetime(nif['Date'], format='%d-%m-%Y', utc=True)
mavg = nif['Close'].ewm(span=50).mean()
mavg1 = nif['Close'].ewm(span=13).mean()
fg, ax1 = plt.subplots()
cl = candlestick2_ohlc(ax=ax1,opens=nif['Open'],highs=nif['High'],lows=nif['Low'],closes=nif['Close'],width=0.4, colorup='#77d879', colordown='#db3f3f')
mavg.plot(ax=ax1,label='50_ema')
mavg1.plot(color='k',ax=ax1, label='13_ema')
plt.legend(loc=4)
plt.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0)
plt.show()
Output:
I also had a lot of "fun" with this in the past... Here is one way of doing it using mdates:
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
ticker = 'MCD'
start = dt.date(2014, 1, 1)
#Gathering the data
data = web.DataReader(ticker, 'yahoo', start)
#Calc moving average
data['MA10'] = data['Adj Close'].rolling(window=10).mean()
data['MA60'] = data['Adj Close'].rolling(window=60).mean()
data.reset_index(inplace=True)
data['Date']=mdates.date2num(data['Date'].astype(dt.date))
#Plot candlestick chart
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = fig.add_subplot(111)
ax3 = fig.add_subplot(111)
ax1.xaxis_date()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))
ax2.plot(data.Date, data['MA10'], label='MA_10')
ax3.plot(data.Date, data['MA60'], label='MA_60')
plt.ylabel("Price")
plt.title(ticker)
ax1.grid(True)
plt.legend(loc='best')
plt.xticks(rotation=45)
candlestick_ohlc(ax1, data.values, width=0.6, colorup='g', colordown='r')
plt.show()
Output:
Hope this helps.
Simple df:
Using plotly:
import plotly.figure_factory
fig = plotly.figure_factory.create_candlestick(df.open, df.high, df.low, df.close, dates=df.ts)
fig.show()
will automatically parse the ts column to be displayed correctly on x.
Clunky workaround here, derived from other post (if i can find again, will reference). Using a pandas df, plot by index and then reference xaxis tick labels to date strings for display. Am new to python / matplotlib, and this this solution is not so flexible, but it works basically. Also using a pd index for plotting removes the blank 'weekend' daily spaces on market price data.
Matplotlib xaxis index as dates
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
from mpl_finance import candlestick_ohlc
%matplotlib notebook # for Jupyter
# Format m/d/Y,Open,High,Low,Close,Adj Close,Volume
# csv data does not include NaN, or 'weekend' lines,
# only dates from which prices are recorded
DJIA = pd.read_csv('yourFILE.csv') #Format m/d/Y,Open,High,
Low,Close,Adj Close,Volume
print(DJIA.head())
fg, ax1 = plt.subplots()
cl =candlestick2_ohlc(ax=ax1,opens=DJIA['Open'],
highs=DJIA['High'],lows=DJIA['Low'],
closes=DJIA['Close'],width=0.4, colorup='#77d879',
colordown='#db3f3f')
ax1.set_xticks(np.arange(len(DJIA)))
ax1.set_xticklabels(DJIA['Date'], fontsize=6, rotation=-90)
plt.show()

Trying to plot timedelta, but the yaxis is in 1e14, want format HH:MM:SS

I am plotting date time on the xaxis (which is actual dates) and then timedelta on the yaxis, which is actually time spans, or amount of time. Originally I was using date time for the yaxis, but I came across the usecase where the time values went over 24 hours, and then it broke the code. So instead I had to use timedelta in order to accommodate these values. But when I try to plot it using plot_date, the yaxis with the timedelta values comes out funny.
I have my information stored in a dataframe originally, and then change the values to a timedelta. This is the code I have to output this graph
import datetime as dt
import matplotlib.dates as mdates
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
plt.close('all')
#put data into dataframe
location='D:\CAT'
csvpath=location+('\metrics_summaryTEST.csv')
print csvpath
df=pd.read_csv(csvpath)
#setup plot/figure
media = set(df.mediaNumber.values)
num_plots = len(media)
ax = plt.gca()
pdfpath=location+('\metrics_graphs.pdf')
pp = PdfPages(pdfpath)
#declaring some variables
publishTimevals=np.zeros(len(df.publishTime.values),dtype="S20")
xdates=np.zeros(len(df.publishTime.values),dtype="S20")
ytimes=np.zeros(len(df.totalProcessTime.values),dtype="S8")
for f in sorted(media):
name = f
plt.figure(f)
plt.clf()
color = next(ax._get_lines.color_cycle)
#PROCESS PUBLISHTIME
publishTimevals= df.loc[df['mediaNumber']==f,['publishTime']]
xdates = map(lambda x: mpl.dates.date2num(dt.datetime.strptime(x, '%Y-%m-%d %H:%M')),publishTimevals.publishTime)
#PROCESS TOTALPROCESSTIME
totalProcessTimevals= df.loc[df['mediaNumber']==f,['totalProcessTime']]
ytimes = pd.to_timedelta(totalProcessTimevals.totalProcessTime)
plt.plot_date(xdates,ytimes,'o-',label='totalProcessTime',color=color)
print ytimes
plt.show()
#format the plot
plt.gcf().autofmt_xdate()
plt.xlabel('publishTime')
plt.ylabel('ProcessTime HH:MM:SS')
plt.legend(loc=8, bbox_to_anchor=(0.5,-0.3),ncol=3,prop={'size':9})
ax.grid('on')
plt.title('%s Processing Time' % (f))
plt.margins(0.05)
#plt.grid('on')
plt.minorticks_on()
plt.grid(which = 'minor', alpha = 0.3)
plt.grid(which = 'major', alpha = 0.7)
plt.show()
Could anyone point out what's going on here?

Categories