Plot a line on a OHLC minutes graph by mpl_finance - python

Using the below code, I can successfully plot a OHLC graph
names = pd.Series(data.index.strftime("%d-%m-%y").unique())
indexs = pd.Series(data.index.date).unique()
ohlc = data[data.index.date == indexs[0]].copy()
ohlc['mdate'] = [mdates.date2num(d) for d in ohlc.index]
ohlc['SMA10'] = ohlc["Close"].rolling(10).mean()
fig, ax = plt.subplots(figsize = (10,5))
mpl_finance.candlestick2_ohlc(ax,ohlc['Open'],ohlc['High'],ohlc['Low'],ohlc['Close'], width = 0.6)
xdate = ohlc.index
def mydate(x, pos):
try:
return xdate[int(x)]
except IndexError:
return ''
ax.xaxis.set_major_formatter(ticker.FuncFormatter(mydate))
fig.autofmt_xdate()
fig.tight_layout()
plt.show()
However, when I add this line
ax.plot(ohlc.mdate, ohlc["SMA10"], color ="green", label = "SMA50"),
I can an empty graph with two vertical tine lines. What is wrong in here please?
Open High Low ... Volume mdate SMA10
Date_Time ...
2018-02-13 11:55:00 7169.7 7172.4 7167.0 ... 444 736738.496528 NaN
2018-02-13 12:00:00 7171.6 7174.2 7164.2 ... 578 736738.500000 NaN
2018-02-13 12:05:00 7174.2 7174.7 7170.7 ... 458 736738.503472 NaN
2018-02-13 12:10:00 7172.0 7175.7 7171.2 ... 401 736738.506944 NaN
2018-02-13 12:15:00 7174.7 7176.7 7173.0 ... 389 736738.510417 NaN
This is the columns of my data
Index(['Open', 'High', 'Low', 'Close', 'Volume', 'mdate', 'SMA10'], dtype='object')

replace
ax.plot(ohlc.mdate, ohlc["SMA10"], color ="green", label = "SMA50")
by
ax.plot(ohlc.index, ohlc["SMA10"], color ="green", label = "SMA50")

Related

How to generate 2-yaxis graphs on a panel data per id?

I have a dataset, df that looks like this:
Date
Code
City
State
Quantity x
Quantity y
Population
Cases
Deaths
2019-01
10001
Los Angeles
CA
445
0
0
2019-01
10002
Sacramento
CA
4450
556
0
0
2020-03
12223
Houston
TX
440
4440
35000000
23
11
...
...
...
...
...
...
...
...
...
2021-07
10002
Sacramento
CA
3220
NA
5444000
211
22
My start and end date are the same for all cities. I have over 4000 different cities, and would like to plot a 2-yaxis graph for each city, using something similar to the following code:
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots(figsize=(9,9))
color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Quantity X', color=color)
ax1.plot(df['Quantity x'], color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax2 = ax1.twinx()
color2 = 'tab:blue'
ax2.set_ylabel('Deaths', color=color2)
ax2.plot(df['Deaths'], color=color2)
ax2.tick_params(axis='y', labelcolor=color2)
plt.show()
I would like to create a loop so that the code above runs for every Code that is related to a City, with quantity x and deaths, and it saves each graph made into a folder. How can I create a loop that does that, and stops every different Code?
Observations: Some values on df['Quantity x] and df[Population] are left blank.
If I understood you correctly, you are looking for a filtering functionality:
import matplotlib.pyplot as plt
import pandas as pd
def plot_quantity_and_death(df):
# your code
fig, ax1 = plt.subplots(figsize=(9, 9))
color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Quantity X', color=color)
ax1.plot(df['Quantity x'], color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax2 = ax1.twinx()
color2 = 'tab:blue'
ax2.set_ylabel('Deaths', color=color2)
ax2.plot(df['Deaths'], color=color2)
ax2.tick_params(axis='y', labelcolor=color2)
# save & close addon
plt.savefig(f"Code_{str(df['Code'].iloc[0])}.png")
plt.close()
df = pd.DataFrame() # this needs to be replaced by your dataset
# get unique city codes, loop over them, filter data and plot it
unique_codes = pd.unique(df['Code'])
for code in unique_codes:
filtered_df = df[df['Code'] == code]
plot_quantity_and_death(filtered_df)

plot and draw curves in python matplotlib without ignoring first and last Nan values from the graph figure

I have csv format file like the below table
depth
x1
x2
x3
1000
15
Nan
Nan
1001
10
Nan
Nan
1002
5
Nan
Nan
1003
8
10
Nan
1004
12
11.11111111
Nan
1010
13
17.77777778
14.16666667
1011
14
18.88888889
15
1012
15
20
15.71428571
1013
16
20.55555556
16.42857143
1014
17
21.11111111
17.14285714
1017
20
22.77777778
19.28571429
1018
21
23.33333333
20
1019
22
23.88888889
20.83333333
1024
27
17.5
25
1025
28
15
25
1026
25
Nan
Nan
1027
26
Nan
Nan
1028
7
Nan
Nan
I want to plot x1, x2, x3 columns versus depth columns but sometimes these columns contain Nan values at start and end of columns, I want to plot whole curves points without ignoring the first and last Nan values
the below code is my attempt to plot curves but the plot always start and end at first and last valid values and ignores the first and last Nan values
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
df = pd.read_csv("result.csv")
fig = plt.figure(figsize=(15, 12), dpi=100, tight_layout=True)
gs = gridspec.GridSpec(nrows=1, ncols=5, wspace=0)
fig.add_subplot(gs[0, 1])
plt.plot(df['x1'],df["depth"], linewidth=2, color='black', marker="o", markersize=3)
plt.gca().invert_yaxis()
fig.add_subplot(gs[0,2 ])
plt.plot(df["x2"],df["depth"], linewidth =2, color='black', marker="o", markersize=3)
plt.gca().invert_yaxis()
fig.add_subplot(gs[0,3])
plt.plot(df["x3"],df["depth"], linewidth =2, color='black', marker="o", markersize=3)
plt.gca().invert_yaxis()
plt.show()
the current reult
the desired result in the below image where all curves y axis start from same depth point
You need to share the y axis with the other y axis:
fig, axs = plt.subplots(1, 3, figsize=(15, 12), dpi=100, tight_layout=True, gridspec_kw={'wspace': 0})
axs[0].plot(df.x1, df.depth, '-ok', lw=2, ms=3)
axs[1].plot(df.x2, df.depth, '-ok', lw=2, ms=3)
axs[1].sharey(axs[0])
axs[2].plot(df.x3, df.depth, '-ok', lw=2, ms=3)
axs[2].sharey(axs[0])

Different binning for histplot as JoinGrid (x,y) marginal plot

I have a pandas dataframe like this:
Date
Weight
Year
Month
Day
Week
DayOfWeek
0
2017-11-13
76.1
2017
11
13
46
0
1
2017-11-14
76.2
2017
11
14
46
1
2
2017-11-15
76.6
2017
11
15
46
2
3
2017-11-16
77.1
2017
11
16
46
3
4
2017-11-17
76.7
2017
11
17
46
4
...
...
...
...
...
...
...
...
I created a JoinGrid with:
g = sns.JointGrid(data=df,
x="Date",
y="Weight",
marginal_ticks=True,
height=6,
ratio=2,
space=.05)
Then a defined joint and marginal plots:
g.plot_joint(sns.scatterplot,
hue=df["Year"],
alpha=.4,
legend=True)
g.plot_marginals(sns.histplot,
multiple="stack",
bins=20,
hue=df["Year"])
Result is this.
Now the question is: "is it possible to specify different binning for the two histplot resulting in the x and y marginal plot?"
I don't think there is a built-in way to do that, by you can plot directly on the marginal axes using the plotting function of your choice, like so:
penguins = sns.load_dataset('penguins')
data = penguins
x_col = "bill_length_mm"
y_col = "bill_depth_mm"
hue_col = "species"
g = sns.JointGrid(data=data, x=x_col, y=y_col, hue=hue_col)
g.plot_joint(sns.scatterplot)
# top marginal
sns.histplot(data=data, x=x_col, hue=hue_col, bins=5, ax=g.ax_marg_x, legend=False, multiple='stack')
# right marginal
sns.histplot(data=data, y=y_col, hue=hue_col, bins=40, ax=g.ax_marg_y, legend=False, multiple='stack')

error in dataframe loc when applying these two dataframes

I'm trying to put these two dataframes(data2 and trades) together tto make it look like this https://i.stack.imgur.com/pR8bW.png:
data2:
Close
2015-08-28 113.290001
2015-08-31 112.760002
2015-09-01 107.720001
2015-09-02 112.339996
2015-09-03 110.370003
2015-09-04 109.269997
2015-09-08 112.309998
2015-09-09 110.150002
2015-09-10 112.570000
2015-09-11 114.209999
trades:
Trades
2015-08-28 3.0
2015-08-31 3.0
2015-09-01 3.0
2015-09-02 3.0
2015-09-03 2.0
code:
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(111, ylabel='Portfolio value in $')
data2["Close"].plot(ax=ax1, lw=2.)
ax1.plot(data2.loc[trades.Trades == 2.0].index, data2.total[trades.Trades == 2.0],
'^', markersize=10, color='m')
ax1.plot(data2.loc[trades.Trades == 3.0].index,
data2.total[trades.Trades == 3.0],
'v', markersize=10, color='k')
plt.show()
But this gives the following error:
---------------------------------------------------------------------------
IndexingError Traceback (most recent call last)
<ipython-input-38-9cde686354a8> in <module>()
7 data2["Close"].plot(ax=ax1, lw=2.)
8
----> 9 ax1.plot(data2.loc[trades.Trades == 2.0].index, data2.total[trades.Trades == 2.0],
10 '^', markersize=10, color='m')
11 ax1.plot(data2.loc[trades.Trades == 3.0].index,
3 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in check_bool_indexer(index, key)
2316 if mask.any():
2317 raise IndexingError(
-> 2318 "Unalignable boolean Series provided as "
2319 "indexer (index of the boolean Series and of "
2320 "the indexed object do not match)."
IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
The indexes of the two data frames are different. I've taken the approach of define masks for data2 dataframe that are based of values in trades dataframe and it works.
Additionally your sample code referred to total which does not exist. Updates to use Close
import pandas as pd
import io
import matplotlib.pyplot as plt
data2 = pd.read_csv(io.StringIO(""" Close
2015-08-28 113.290001
2015-08-31 112.760002
2015-09-01 107.720001
2015-09-02 112.339996
2015-09-03 110.370003
2015-09-04 109.269997
2015-09-08 112.309998
2015-09-09 110.150002
2015-09-10 112.570000
2015-09-11 114.209999"""), sep="\s+")
trades = pd.read_csv(io.StringIO(""" Trades
2015-08-28 3.0
2015-08-31 3.0
2015-09-01 3.0
2015-09-02 3.0
2015-09-03 2.0"""), sep="\s+")
# make sure it's dates
data2 = data2.reset_index().assign(index=lambda x: pd.to_datetime(x["index"])).set_index("index")
trades = trades.reset_index().assign(index=lambda x: pd.to_datetime(x["index"])).set_index("index")
fig = plt.figure()
ax1 = fig.add_subplot(111, ylabel='Portfolio value in $')
data2["Close"].plot(ax=ax1, lw=2.)
mask2 = data2.index.isin((trades.Trades == 2.0).index)
mask3 = data2.index.isin((trades.Trades == 3.0).index)
ax1.plot(data2.loc[mask2].index, data2.Close[mask2],
'^', markersize=10, color='m')
ax1.plot(data2.loc[mask3].index,
data2.Close[mask3],
'v', markersize=10, color='k')
plt.show()
output

X-Axis scales not matching with 2 data sets on same plot

I have 2 datasets that I'm trying to plot on the same figure. They share a common column that I'm using for the X-axis, however one of my sets of data is collected annually and the other monthly so the number of data points in each set is significantly different.
Pyplot is not plotting the X values for each set where I would expect when I plot both sets on the same graph
When I plot just my annually collected data set I get:
When I plot just my monthly collected data set I get:
But when I plot the two sets overlayed (code below) I get:
tframe:
10003 Date
0 257 201401
1 216 201402
2 417 201403
3 568 201404
4 768 201405
5 836 201406
6 798 201407
7 809 201408
8 839 201409
9 796 201410
tax_for_zip_data:
TAX BRACKET $1 under $25,000 ... Date
2 5740 ... 201301
0 5380 ... 201401
1 5320 ... 201501
3 5030 ... 201601
So I did as wwii suggested in the comments and converted my Date columns to datetime objects:
tframe:
10003 Date
0 257 2014-01-31
1 216 2014-02-28
2 417 2014-03-31
3 568 2014-04-30
4 768 2014-05-31
5 836 2014-06-30
6 798 2014-07-31
7 809 2014-08-31
8 839 2014-09-30
9 796 2014-10-31
tax_for_zip_data:
TAX BRACKET $1 under $25,000 ... Date
2 5740 ... 2013-01-31
0 5380 ... 2014-01-31
1 5320 ... 2015-01-31
3 5030 ... 2016-01-31
But the dates are still plotting offset,
None of my data goes back to 2012- Jan 2013 is the earliest. The tax_for_zip_data are all offset by a year. If I plot just that set alone it plots properly.
fig, ax1 = plt.subplots(sharex = True)
color = "tab:red"
ax1.set_xlabel('Date')
ax1.set_ylabel('Trips', color = color)
tframe.plot(kind = 'line',x = 'Date', y = "10003", ax = ax1, color = color)
ax1.tick_params(axis = 'y', labelcolor = color)
ax2 = ax1.twinx()
color = "tab:blue"
ax2.set_ylabel('Num Returns', color = color)
tax_for_zip_data.plot(kind = 'line', x = 'Date', y = tax_for_zip_data.columns[:-1], ax = ax2)
ax2.tick_params(axis = 'y', labelcolor = color)
plt.show()
If you can make the DataFrame index a datetime index plotting is easier.
s = '''10003 Date
257 201401
216 201402
417 201403
568 201404
768 201405
836 201406
798 201407
809 201408
839 201409
796 201410
'''
df1 = pd.read_csv(io.StringIO(s), delimiter='\s{2,}',engine='python')
df1.index = pd.to_datetime(df1['Date'],format='%Y%m')
s = '''TAX BRACKET $1 under $25,000 Date
2 5740 201301
0 5380 201401
1 5320 201501
3 5030 201601
'''
df2 = pd.read_csv(io.StringIO(s), delimiter='\s{2,}',engine='python')
df2.index = pd.to_datetime(df2['Date'],format='%Y%m')
You don't need to specify an argument for plot's x parameter.
fig, ax1 = plt.subplots(sharex = True)
color = "tab:red"
ax1.set_xlabel('Date')
ax1.set_ylabel('Trips', color = color)
df1.plot(kind = 'line',y="10003", ax = ax1, color = color)
ax1.tick_params(axis = 'y', labelcolor = color)
ax2 = ax1.twinx()
color = "tab:blue"
ax2.set_ylabel('Num Returns', color = color)
df2.plot(kind = 'line', y='$1 under $25,000', ax = ax2)
ax2.tick_params(axis = 'y', labelcolor = color)
plt.show()
plt.close()

Categories