How can I use a date from a Sqlite database on the x-axis to make a bar graph with matplotlib?
If I convert the date to unix timestamp the graph works, but I would like to get something like this: http://i.stack.imgur.com/ouKBy.png
lowestNumber = self.c.execute('SELECT number,date, time FROM testDB ORDER BY number ASC LIMIT 1')
for rows in lowestNumber:
datesLow = rows[1]#returns 2016-02-23
splitDate = datesLow.split('-' )
spaces = ""
# tabs = '/'
# tabsDatesLow = tabs.join( splitDate )
joinDatesLow = spaces.join( splitDate )
x = int(joinDatesLow)
plt.bar(x,low, label="Minimum number of players", color="red")
plt.show()
You need to have an integer time format for plotting dates in matplotlib, and then a date formatting object is passed to format the axes. Matplotlib's date2num function can do this for you. Another good example is Matplotlib's documentation with an example here: http://matplotlib.org/examples/pylab_examples/date_demo1.html. Here is a solution yo may find useful:
import datetime
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateLocator, AutoDateFormatter, date2num
#make my own data:
date = '2016-02-23'
low = 10
#how to format dates:
date_datetime = datetime.datetime.strptime(date, '%Y-%m-%d')
int_date = date2num( date_datetime)
#create plots:
fig, ax = plt.subplots()
#plot data:
ax.bar(int_date,low, label="Minimum number of players", color="red")
#format date strings on xaxis:
locator = AutoDateLocator()
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter( AutoDateFormatter(locator) )
#adjust x limits and apply autoformatter fordisplay of dates
min_date = date2num( datetime.datetime.strptime('2016-02-16', '%Y-%m-%d') )
max_date = date2num( datetime.datetime.strptime('2016-02-28', '%Y-%m-%d') )
ax.set_xlim([min_date, max_date])
fig.autofmt_xdate()
#show plot:
plt.show()
Related
I want to plot date vs time graph using matplot lib. The issue I am facing is that due to access of data many lines are showing on the xaxis and I can't find a way to plot my time on xaxis cleanly with one hour gap. Say i have data in my list as string as ['6:01','6:30','7:20','7:25']. I want to divide my xaxis from 6:00 to 7:00 and the time points between them should be plotted based on time.
Note: time list is just and example I want to do this for whole 24 hour.
I tried to use ticks and many other options to complete my task but unfortunatly I am stuck at this problem. My data is in csv file.
Below is my code:
def arrivalGraph():
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates
with open("Timetable2021.csv","r") as f:
fileData = f.readlines()
del fileData[0]
date = []
train1 = []
for data in fileData:
ind = data.split(",")
date.append(datetime.strptime(ind[0],"%d/%m/%Y").date())
train1Time = datetime.strptime(ind[1],"%H:%M").time()
train1.append(train1Time.strftime("%H:%M"))
plt.style.use("seaborn")
plt.figure(figsize = (10,10))
plt.plot_date(train1,date)
plt.gcf().autofmt_xdate()#gcf is get current figure - autofmt is auto format
dateformater = mpl_dates.DateFormatter("%b ,%d %Y")
plt.gca().xaxis.set_major_formatter(dateformater) # to format the xaxis
plt.xlabel("Date")
plt.ylabel("Time")
plt.title("Train Time vs Date Schedule")
plt.tight_layout()
plt.show()
When i run the code i get the following output:
output of above code
Assuming that every single minute that every single minute is present in train1 (i.e. train1 = ["00:00", "00:01", "00:02", "00:03", ... , "23:59"]), you can use plt.xticks() by generating an array representing xticks with empty string on every minute which is not 0.
unique_times = sorted(set(train1))
xticks = ['' if time[-2:]!='00' else time for time in unique_times]
plt.style.use("seaborn")
plt.figure(figsize = (10,10))
plt.plot_date(train1,date)
plt.gcf().autofmt_xdate()#gcf is get current figure - autofmt is auto format
dateformater = mpl_dates.DateFormatter("%b ,%d %Y")
# I think you wanted to format the yaxis instead of xaxis
plt.gca().yaxis.set_major_formatter(dateformater) # to format the yaxis
plt.ylabel("Date")
plt.xlabel("Time")
plt.title("Train Time vs Date Schedule")
plt.xticks(range(len(xticks)), xticks)
plt.tight_layout()
plt.show()
If every single minute is not in the train1 array, you have to keep train1 data as an object and generate arrays representing xticks location and values to be used as plt.xticks() parameters.
date = []
train1 = []
for data in fileData:
ind = data.split(",")
date.append(datetime.strptime(ind[0],"%d/%m/%Y").date())
train1Time = datetime.strptime(ind[1],"%H:%M")
train1.append(train1Time)
plt.style.use("seaborn")
plt.figure(figsize = (10,10))
plt.plot_date(train1,date)
plt.gcf().autofmt_xdate()#gcf is get current figure - autofmt is auto format
dateformater = mpl_dates.DateFormatter("%b ,%d %Y")
# I think you wanted to format the y axis instead of xaxis
plt.gca().yaxis.set_major_formatter(dateformater) # to format the yaxis
plt.ylabel("Date")
plt.xlabel("Time")
plt.title("Train Time vs Date Schedule")
ax = plt.gca()
xticks_val = []
xticks_loc = []
distance = (ax.get_xticks()[-1] - ax.get_xticks()[0]) / 24
def to_hour_str(x):
x = str(x)
if len(x) < 2:
x = '0' + x
return x + ':00'
for h in range(25):
xticks_val.append(to_hour_str(h))
xticks_loc.append(ax.get_xticks()[0] + h * distance)
plt.xticks(xticks_loc, xticks_val, rotation=90, ha='left')
plt.tight_layout()
plt.show()
Here's the code output using dummy data I generated myself.
I have very simple code:
from matplotlib import dates
import matplotlib.ticker as ticker
my_plot=df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90)
I've got:
but I would like to have fewer labels on X axis. To do this I've add:
my_plot.xaxis.set_major_locator(ticker.MaxNLocator(12))
It generates fewer labels but values of labels have wrong values (=first of few labels from whole list)
What am I doing wrong?
I have add additional information:
I've forgoten to show what is inside DataFrame.
I have three columns:
reg_Date - datetime64 (index)
temperature - float64
Day - date converted from reg_Date to string, it looks like '2017-10' (YYYY-MM)
Box plot group date by 'Day' and I would like to show values 'Day" as a label but not all values
, for example every third one.
You were almost there. Just set ticker.MultipleLocator.
The pandas.DataFrame.boxplot also returns axes, which is an object of class matplotlib.axes.Axes. So you can use this code snippet to customize your labels:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
center = np.random.randint(50,size=(10, 20))
spread = np.random.rand(10, 20) * 30
flier_high = np.random.rand(10, 20) * 30 + 30
flier_low = np.random.rand(10, 20) * -30
y = np.concatenate((spread, center, flier_high, flier_low))
fig, ax = plt.subplots(figsize=(10, 5))
ax.boxplot(y)
x = ['Label '+str(i) for i in range(20)]
ax.set_xticklabels(x)
ax.set_xlabel('Day')
# Set a tick on each integer multiple of a base within the view interval.
ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
plt.xticks(rotation=90)
I think there is a compatibility issue with Pandas plots and Matplotlib formatters.
With the following code:
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
for l in labels:
if i % 3 == 0:
label = labels[i]
i += 1
new_labels.append(label)
else:
label = ''
i += 1
new_labels.append(label)
ax.set_xticklabels(new_labels)
plt.show()
You get this chart:
But I notice that this is grouped by month instead of by day. It may not be what you wanted.
Adding the day component to the string 'Day' messes up the chart as there seems to be too many boxes.
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m-%d')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
for l in labels:
if i % 15 == 0:
label = labels[i]
i += 1
new_labels.append(label)
else:
label = ''
i += 1
new_labels.append(label)
ax.set_xticklabels(new_labels)
plt.show()
The for loop creates the tick labels every as many periods as desired. In the first chart they were set every 3 months. In the second one, every 15 days.
If you would like to see less grid lines:
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m-%d')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
new_locs = list()
for l in labels:
if i % 3 == 0:
label = labels[i]
loc = locs[i]
i += 1
new_labels.append(label)
new_locs.append(loc)
else:
i += 1
ax.set_xticks(new_locs)
ax.set_xticklabels(new_labels)
ax.grid(axis='y')
plt.show()
I've read about x_compat in Pandas plot in order to apply Matplotlib formatters, but I get an error when trying to apply it. I'll give it another shot later.
Old unsuccesful answer
The tick labels seem to be dates. If they are set as datetime in your dataframe, you can:
months = mdates.MonthLocator(1,4,7,10) #Choose the months you like the most
ax.xaxis.set_major_locator(months)
Otherwise, you can let Matplotlib know they are dates by:
ax.xaxis_date()
Your comment:
I have add additional information:
I've forgoten to show what is inside DataFrame.
I have three columns:
reg_Date - datetime64 (index)
temperature - float64
Day - date converted from reg_Date to string, it looks like '2017-10' *(YYYY-MM) *
Box plot group date by 'Day' and I would like to show values 'Day" as a label but not all values
, for example every third one.
Based on your comment in italic above, I would use reg_Date as the input and the following lines:
days = mdates.DayLocator(interval=3)
daysFmt = mdates.DateFormatter('%Y-%m') #to format display
ax.xaxis.set_major_locator(days)
ax.xaxis.set_major_formatter(daysFmt)
I forgot to mention that you will need to:
import matplotlib.dates as mdates
Does this work?
I currently have a function that creates a time series graph from time/date data that is in MM-DD-YYYY HH-MM format. I am unsure as to how to change the x axis ticks such that it displays hours as well as the date as it currently only shows dates.
The set_major_locator line I included only returns ticks that have the year even though I have specified the hour_locator and the data is hourly.
def graph(region):
fig = plt.figure(num=None, figsize=(60, 20), dpi=100, facecolor='w', edgecolor='k')
df_da_region = df_da_abv_09[df_da_abv_09['Settlement Point'] == region]
df_rt_region = df_rt_abv_09[df_rt_abv_09['Settlement Point Name'] == region]
fig = plt.plot_date(x=list(df_da_region['DateTime']), y=list(df_da_region['Settlement Point Price']), xdate = True, fmt="r-", linewidth=0.7)
fig = plt.plot_date(x=list(df_rt_region['DateTime']), y=list(df_rt_region['Settlement Point Price']), xdate = True, fmt="g-", alpha=0.5, linewidth=0.7)
fig = plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=5))
plt.show()
Use matplotlib.dates.DateFormatter. First import it at the top
import matplotlib.dates as mdates
then replace this line
fig = plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=5))
by something like this
myFmt = mdates.DateFormatter('%y-%m-%d %H') # here you can format your datetick labels as desired
plt.gca().xaxis.set_major_formatter(myFmt)
In a example with random numbers (since you haven't provided sample data), it looks like this
Here, the formatter is chosen as you wanted: dates + hours. For further info about how to format the date on the axis, check here
I'm trying to plot some data but I'm getting stuck on plotting 2 plots on same figure. It looks like this:
The code is:
import re
import sqlite3
import matplotlib.pyplot as plt
from matplotlib.dates import datetime as dt
from matplotlib.dates import DateFormatter
...
for company in companies:
cursor.execute("select distinct url from t_surv_data where company = ? order by product_type", (company,))
urls = [r[0] for r in cursor.fetchall()]
for idx, url in enumerate(urls):
cursor.execute("select price, timestamp from t_surv_data where url = ? order by timestamp", (url,))
data = [[r[0], r[1]] for r in cursor.fetchall()]
price, date = zip(*data)
date = [dt.datetime.strptime(d, '%Y-%m-%d %H:%M:%S') for d in date]
f = plt.figure('''figsize=(3, 2)''')
ax = f.add_subplot(111)
ax.plot(date, price) # x, y
ax.xaxis.set_major_formatter(DateFormatter('%d\n%h\n%Y'))
#ax.set_ylim(ymin=0) # If I use this a break the plot
ax2 = f.add_subplot(211)
ax2.scatter(date, [1,1,-1])
ax2.xaxis.set_major_formatter(DateFormatter('%d\n%h\n%Y'))
#ax2.set_ylim(ymin=-1, ymax=1) # If I use this a break the plot
plt.savefig('plt/foo' + str(idx) + '.png')
plt.close()
How can I solve this questions:
1 - The plots looks like they are one above the other. How can I format this with a visual to look like independent plots on the same figure.
2 - I'm using this line of code to both plots "ax2.xaxis.set_major_formatter(DateFormatter('%d\n%h\n%Y'))" but there is no sync in the dates. The dates should be equal in the two plots.
Some one can give me a clue on this questions?
Best Regards,
You are not using add_subplot correctly:
ax = f.add_subplot(2,1,1)
ax2 = f.add_subplot(2,1,2)
The first number indicates the number of rows, the second the number of columns and the third the index of the plot.
If you want the plots to share the x axis (that is the axis with dates), you have to specify the sharex property.
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
ax1.plot(...)
ax2.scatter(...)
ax1.xaxis.set_major_formatter(DateFormatter('%d\n%h\n%Y'))
You only have to set the major formatter once since they share the x axis.
I'm not managing to plot matplotlib.finance.candlestick without the weekends (blank spaces between every 5 candlesticks). The example from Matplotlib's website doesn't exclude weekends either and the way to exclude weekends on other plots doesn't seem to apply to CandleSticks.
Has anybody come across this before?
ps. as requested, here is the example:
#!/usr/bin/env python
from pylab import *
from matplotlib.dates import DateFormatter, WeekdayLocator, HourLocator, \
DayLocator, MONDAY
from matplotlib.finance import quotes_historical_yahoo, candlestick,\
plot_day_summary, candlestick2
# (Year, month, day) tuples suffice as args for quotes_historical_yahoo
date1 = ( 2004, 2, 1)
date2 = ( 2004, 4, 12 )
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
weekFormatter = DateFormatter('%b %d') # Eg, Jan 12
dayFormatter = DateFormatter('%d') # Eg, 12
quotes = quotes_historical_yahoo('INTC', date1, date2)
fig = figure()
fig.subplots_adjust(bottom=0.2)
ax = fig.add_subplot(111)
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
#plot_day_summary(ax, quotes, ticksize=3)
candlestick(ax, quotes, width=0.6)
ax.xaxis_date()
ax.autoscale_view()
setp( gca().get_xticklabels(), rotation=45, horizontalalignment='right')
show()
After your 'quotes' line:
weekday_quotes = [tuple([i]+list(quote[1:])) for i,quote in enumerate(quotes)]
then
candlestick(ax, weekday_quotes, width=0.6)
This will plot the data without the gaps between weekdays, now you have to change the xticks back to dates, preferably mondays. Assuming your first quote was a monday:
import matplotlib.dates as mdates
ax.set_xticks(range(0,len(weekday_quotes),5))
ax.set_xticklabels([mdates.num2date(quotes[index][0]).strftime('%b-%d') for index in ax.get_xticks()])
This is pretty gross but seems to get the job done - good luck!
While #JMJR's answer works, I find this to be more robust:
def plot(x):
plt.figure()
plt.title("VIX curve")
def idx(val=[0]):
val[0] = val[0] + 1
return val[0]
d = collections.defaultdict(idx)
# give each date an index
[d[t] for t in sorted(x.index.get_level_values('baropen_datetime').unique())]
# use the index
x['idx'] = [d[t] for t in x.index.get_level_values('baropen_datetime')]
# plot using index
x.groupby('code').apply(lambda y: plt.plot(y.idx.values,
y.close.values,
label=y.index.get_level_values('code')[0]))
plt.legend()
plt.show()
plt.close()