I was able to run the "mpl_finance" candlestick_ohlc function and the graph appeared as expected, using the following (only relevant) code:
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
weekFormatter = DateFormatter('%b %d') # e.g., Jan 12
dayFormatter = DateFormatter('%d') # e.g., 12
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2)
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
candlestick_ohlc(ax, zip(mdates.date2num(quotes.index.to_pydatetime()),
quotes['open'], quotes['high'],
quotes['low'], quotes['close']),
width=0.6)
ax.xaxis_date()
ax.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
plt.title('PETR4 daily quotes')
plt.show()
Now I would like to "add" on this graph (say) a horizontal red line at y = 26.5 ... how should I proceed?
(My real question is: how/where should I type something like axvline(...) so that I am able to make new data appear inside the same graph?)
Thanks!
Sure, DavidG. Thanks again for your help. Hope to see you in other posts.
The interested readers will be able to adapt this "real stuff" below (it´s working)!
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
weekFormatter = DateFormatter('%b %d') # e.g., Jan 12
dayFormatter = DateFormatter('%d') # e.g., 12
fig, aux = plt.subplots()
fig.subplots_adjust(bottom=0.2)
aux.xaxis.set_major_locator(mondays)
aux.xaxis.set_minor_locator(alldays)
aux.xaxis.set_major_formatter(weekFormatter)
candlestick_ohlc(aux, zip(mdates.date2num(quotes.index.to_pydatetime()),
quotes['open'], quotes['high'],
quotes['low'], quotes['close']),
width=0.6)
for i in range(len(features_period.date)):
plt.plot(quotes.index, quotes.close , 'd', color='blue')
aux.xaxis_date()
aux.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
plt.title('USIM5 daily quotes')
plt.rcParams['figure.figsize'] = [10, 10]
display(candlestick_ohlc);
(The blue dots were added to the graph created by the module used/mentioned.)
Regards,
fskilnik
Related
I have data for all the time I've spent coding. This data is represented as a dictionary where the key is the date and the value is a list of tuples containing the time I started a coding session and how long the coding session lasted.
I have successfully plotted this on a broken_barh using the below code, where the y-axis is the date, the x-axis is the time in that day and each broken bar is an individual session.
for i,subSessions in enumerate(sessions.values()):
plt.broken_barh(subSessions, (i,1))
months = {}
start = getStartMonth()
for month in period_range(start=start,end=datetime.today(),freq="M"):
month = str(month)
months[month] = (datetime.strptime(month,'%Y-%m')-start).days
plt.yticks(list(months.values()),months.keys())
plt.xticks(range(0,24*3600,3600),[str(i)+":00" for i in range(24)],rotation=45)
plt.gca().invert_yaxis()
plt.show()
I want to use this data to discover what times of the day I spend the most time coding, but it isn't very clear from the above chart so I'd like to display it as a line graph or heatmap where the y-axis is the number of days I spent coding at the time on the x-axis (or, in other words, how many sessions are present in that column of the above chart). How do I accomplish this?
You can find some great examples of how to create a heatmap from matplotlib website.
Here is a basic code with some random data:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
index_labels = np.arange(0,24)
column_labels = pd.date_range(start='1/1/2022', end='1/31/2022').strftime('%m/%d')
#random data
np.random.seed(12345)
data = np.random.randint(0,60, size=(len(index_labels), len(column_labels)))
df = pd.DataFrame(data=data, columns=column_labels, index=index_labels)
#heatmap function
def heatmap(df, ax, cbarlabel="", cmap="Greens", label_num_dec_place=0):
df = df.copy()
# Ploting a blank heatmap
im = ax.imshow(df.values, cmap)
# create a customized colorbar
cbar = ax.figure.colorbar(im, ax=ax, fraction=0.05, extend='both', extendfrac=0.05)
cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom", fontsize=14)
# Setting ticks
ax.set_xticks(np.arange(df.shape[1]), labels=df.columns, fontsize=12)
ax.set_yticks(np.arange(df.shape[0]), labels=list(df.index), fontsize=12)
# proper placement of ticks
ax.tick_params(axis='x', top=True, bottom=False,
labeltop=True, labelbottom=False)
ax.spines[:].set_visible(False)
ax.grid(which="both", visible="False", color="white", linestyle='solid', linewidth=2)
ax.grid(False)
# Rotation of tick labels
plt.setp(ax.get_xticklabels(), rotation=-60,
ha="right", rotation_mode=None)
plt.setp(ax.get_yticklabels(), rotation=30)
#plotting and saving
fig, ax = plt.subplots(facecolor=(1,1,1), figsize=(20,8), dpi=200)
heatmap(df=df, ax=ax, cbarlabel="time (min)", cmap="Greens", label_num_dec_place=0)
plt.savefig('time_heatmap.png',
bbox_inches='tight',
facecolor=fig.get_facecolor(),
transparent=True,
)
Output:
One way to do it is to use sampling. Choose how many samples you want to take in a given interval (the precision, for example 288 samples per day) and split each interval by that number of samples and count how many sessions are within this sample. The downside to this is that it can't be 100% precise and increasing the precision increases the time it takes to generate (for me, it takes several minutes to generate a second-precise image, though this level of precision makes little to no difference to the result).
Here is some code which can produce both a heatmap and a line graph
# Configuration options
precisionPerDay = 288
timeTicksPerDay = 24
timeTickRotation = 60
timeTickFontSize = 6
heatmap = True
# Constants
hoursInDay = 24
secondsInHour = 3600
secondsInDay = hoursInDay*secondsInHour
xInterval = secondsInDay/precisionPerDay
timeTickSecondInterval = precisionPerDay/timeTicksPerDay
timeTickHourInterval = hoursInDay/timeTicksPerDay
# Calculating x-axis (time) ticks
xAxis = range(precisionPerDay)
timeTickLabels = []
timeTickLocations = []
for timeTick in range(timeTicksPerDay):
timeTickLocations.append(int(timeTick*timeTickSecondInterval))
hours = timeTick/timeTicksPerDay*hoursInDay
hour = int(hours)
minute = int((hours-hour)*60)
timeTickLabels.append(f"{hour:02d}:{minute:02d}")
# Calculating y-axis (height)
heights = []
for dayX in xAxis:
rangeStart = dayX*xInterval
rangeEnd = rangeStart+xInterval
y = 0
for date,sessions in sessions.items():
for session in sessions:
if session[0] < rangeEnd and session[0]+session[1] > rangeStart:
y += 1
heights.append(y)
# Plotting data
if heatmap:
plt.yticks([])
plt.imshow([heights], aspect="auto")
else:
plt.plot(xAxis,heights)
plt.ylim(ymin=0)
plt.xlim(xmin=0,xmax=len(heights))
plt.xlabel("Time of day")
plt.ylabel("How often I've coded at that time")
plt.xticks(timeTickLocations,timeTickLabels,
fontsize=timeTickFontSize,rotation=timeTickRotation)
plt.show()
And here are some sample results
Graph produced by same configuration options shown in above code
Same data but as a line graph with a lower precision (24 per day) and more time ticks (48)
How is x-ticks manually set in seaborn sns in python?
This might be a duplicate of: How to set x axis ticklabels in a seaborn plot, but the solution did not work for us.
We would like the x-ticks to start from 2020-01, but as data is only available from 2020-02, it doesn't automatically start the x-ticks on 2020-01. Although it will be an empty space, we would still like to include 2020-01. Following is the function.
def create_lineplot(dataframe):
months = mdates.MonthLocator() # every month
years_fmt = mdates.DateFormatter('%Y-%m') # This is a format. Will be clear in Screenshot
# Filtering data to only select relevant columns and data from the year 2020
dataframe = dataframe[['dev_id', 'temp_20', 'temp_60', 'datetime']]
dataframe["datetime"] = pd.to_datetime(dataframe["datetime"])
soil = dataframe[dataframe['datetime'].dt.year == 2020]
fig, axes = plt.subplots(figsize=(20, 2))
mdf = pd.melt(soil, id_vars=['datetime', 'dev_id'], var_name=['Temperature'])
g = sns.relplot(data=mdf, x='datetime', y='value', kind='line', hue='Temperature', height=5, aspect=3)
g._legend.remove()
axes.xaxis.set_major_locator(months)
axes.xaxis.set_major_formatter(years_fmt)
axes.xaxis.set_minor_locator(months)
plt.xticks(rotation='vertical')
plt.tight_layout()
plt.legend(loc='upper right')
plt.savefig('lineplot.png')
plt.show()
When we include following:
g.set_xticklabels(['2020-01','2020-02','2020-03','2020-04','2020-05','2020-06','2020-07','2020-08', '2020-09', '2020-10', '2020-11', '2020-12'])
between
g = sns.relplot(data=mdf, x='datetime', y='value', kind='line', hue='Temperature', height=5, aspect=3)
--- HERE ---
g._legend.remove()
then the tick is added as desired, but the values are stretched so it seems like there is data in 2020-01 as well.
Following is an example of the data:
Bonus
How to align the ticks after adding a new?
I am looking to add a shaded box to my plot below. I want the box to go from Aug 25-Aug 30 and to run the length of the Y axis.
The following is my code for the two plots I have made...
df = pd.read_excel('salinity_temp.xlsx')
dates = df['Date']
sal = df['Salinity']
temp = df['Temperature']
fig, axes = plt.subplots(2, 1, figsize=(8,8), sharex=True)
axes[0].plot(dates, sal, lw=5, color="red")
axes[0].set_ylabel('Salinity (PSU)')
axes[0].set_title('Salinity', fontsize=14)
axes[1].set_title('Temperature', fontsize=14)
axes[1].plot(dates, temp, lw=5, color="blue")
axes[1].set_ylabel('Temperature (C)')
axes[1].set_xlabel('Dates, 2017', fontsize=12)
axes[1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d'))
axes[0].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d'))
axes[1].xaxis_date()
axes[0].xaxis_date()
I want the shaded box to highlight when Hurricane Harvey hit Houston, Texas (Aug 25- Aug 30). My data looks like:
Date Salinity Temperature
20-Aug 15.88144647 31.64707184
21-Aug 18.83088846 31.43848419
22-Aug 19.51015264 31.47655487
23-Aug 23.41655369 31.198349
24-Aug 25.16410124 30.63014984
25-Aug 25.2273574 28.8677597
26-Aug 28.35557667 27.49458313
27-Aug 18.52829235 25.92834473
28-Aug 7.423231661 24.06635284
29-Aug 0.520394177 23.47881317
30-Aug 0.238508327 23.90857697
31-Aug 0.143210364 24.30892944
1-Sep 0.206473387 25.20442963
2-Sep 0.241343182 26.32663727
3-Sep 0.58000503 26.93431854
4-Sep 1.182055098 27.8212738
5-Sep 3.632014919 28.23947906
6-Sep 4.672006985 27.29686737
7-Sep 5.938766377 26.8693161
8-Sep 9.107671159 26.48963928
9-Sep 8.180587303 26.05213165
10-Sep 6.200532091 25.73104858
11-Sep 5.144526191 25.60035706
12-Sep 5.106032451 25.73139191
13-Sep 4.279492562 26.06132507
14-Sep 5.255868992 26.74919128
15-Sep 8.026764063 27.23724365
I have tried using the rectangle function in this link (https://discuss.analyticsvidhya.com/t/how-to-add-a-patch-in-a-plot-in-python/5518) however can't seem to get it to work properly.
Independent of your specific data, it sounds like you need axvspan. Try running this after your plotting code:
for ax in axes:
ax.axvspan('2017-08-25', '2017-08-30', color='black', alpha=0.5)
This will work if dates = df['Date'] is stored as type datetime64. It might not work with other datetime data types, and it won't work if dates contains date strings.
I want to draw a plot in matplotlib that shows the temperature of August in 2016 and 2017. x-axis is time and y-axis is temparature. I try to stack 2 plots (one for 2016, one for 2017) on top of each other by sharing the x-axis that ranges from 2016-08-01 00:00:00 to 2016-08-31 23:00:00 and showing only the day of the month.
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')
# times series from 2016-08-01 00:00:00 to 2016-08-31 23:00:00
x = stats_august_2016.MESS_DATUM
# temperature in 08.2016
y1 = stats_august_2016.TT_TU
# temperature in 08.2017
y2 = stats_august_2017.TT_TU
f, ax = plt.subplots()
# plot temp in 08.2016
ax.plot(x, y1, 'yellow', label = '2016')
# plot temp in 08.2017
ax.plot(x, y2, 'red', label = '2017')
# format x-axis to show only days of the month
ax.xaxis.set_major_formatter(myFmt)
ax.grid(True)
plt.rcParams["figure.figsize"] = (12, 8)
plt.xlabel("Day of the Month", fontsize = 20, color = 'Blue')
plt.xticks(fontsize = 15)
plt.ylabel("Temperature ($\degree$C)", fontsize = 20, color = 'Blue')
plt.yticks(fontsize = 15)
ax.set_ylim(5, 35)
plt.title("Temperature in August 2016 and 2017", fontsize = 30, color = 'DarkBlue')
plt.legend(prop = {'size': 20}, frameon = True, fancybox = True, shadow = True, framealpha = 1, bbox_to_anchor=(1.22, 1))
plt.show()
Everything looks fine except the last tick of x-axis is somehow 2016-09-01 00:00:00. And the result looks odd with the 1 at the end.
How can I fix this?
The problem is, that your data is ranging until to some time late at the 31st of August of each year
# times series from 2016-08-01 00:00:00 to 2016-08-31 23:00:00
Matplotlib is then autoscaling the axis reaching up to the first day of the next month, displayed as a 1 in your chosen format. If you want to avoid this, you can set the x limit of the axis to the last timestamp of your data
ax.set_xlim([x[0], x[-1]])
The whitespace margin left and right of your axis will disappear then, though. If you want to keep this margin and still want to avoid the ticklabel of 1st of september, you can hide the last x-tick label with
xticks = ax.xaxis.get_major_ticks()
xticks[-1].label1.set_visible(False)
try:
ax.set_xlim(right=pd.Timestamp("2016-08-30 00:00:00"))
This will set the limit to day 30th.
I'm not managing to plot matplotlib.finance.candlestick without the weekends (blank spaces between every 5 candlesticks). The example from Matplotlib's website doesn't exclude weekends either and the way to exclude weekends on other plots doesn't seem to apply to CandleSticks.
Has anybody come across this before?
ps. as requested, here is the example:
#!/usr/bin/env python
from pylab import *
from matplotlib.dates import DateFormatter, WeekdayLocator, HourLocator, \
DayLocator, MONDAY
from matplotlib.finance import quotes_historical_yahoo, candlestick,\
plot_day_summary, candlestick2
# (Year, month, day) tuples suffice as args for quotes_historical_yahoo
date1 = ( 2004, 2, 1)
date2 = ( 2004, 4, 12 )
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
weekFormatter = DateFormatter('%b %d') # Eg, Jan 12
dayFormatter = DateFormatter('%d') # Eg, 12
quotes = quotes_historical_yahoo('INTC', date1, date2)
fig = figure()
fig.subplots_adjust(bottom=0.2)
ax = fig.add_subplot(111)
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
#plot_day_summary(ax, quotes, ticksize=3)
candlestick(ax, quotes, width=0.6)
ax.xaxis_date()
ax.autoscale_view()
setp( gca().get_xticklabels(), rotation=45, horizontalalignment='right')
show()
After your 'quotes' line:
weekday_quotes = [tuple([i]+list(quote[1:])) for i,quote in enumerate(quotes)]
then
candlestick(ax, weekday_quotes, width=0.6)
This will plot the data without the gaps between weekdays, now you have to change the xticks back to dates, preferably mondays. Assuming your first quote was a monday:
import matplotlib.dates as mdates
ax.set_xticks(range(0,len(weekday_quotes),5))
ax.set_xticklabels([mdates.num2date(quotes[index][0]).strftime('%b-%d') for index in ax.get_xticks()])
This is pretty gross but seems to get the job done - good luck!
While #JMJR's answer works, I find this to be more robust:
def plot(x):
plt.figure()
plt.title("VIX curve")
def idx(val=[0]):
val[0] = val[0] + 1
return val[0]
d = collections.defaultdict(idx)
# give each date an index
[d[t] for t in sorted(x.index.get_level_values('baropen_datetime').unique())]
# use the index
x['idx'] = [d[t] for t in x.index.get_level_values('baropen_datetime')]
# plot using index
x.groupby('code').apply(lambda y: plt.plot(y.idx.values,
y.close.values,
label=y.index.get_level_values('code')[0]))
plt.legend()
plt.show()
plt.close()