TimeSeries in Bokeh using a dataframe with index - python

I'm trying to use Bokeh to plot a Pandas dataframe with a DateTime column containing years and a numeric one. If the DateTime is specified as x, the behaviour is the expected (years in the x-axis). However, if I use set_index to turn the DateTime column into the index of the dataframe and then only specify the y in the TimeSeries I get time in milliseconds in the x-axis. A minimal example
import pandas as pd
import numpy as np
from bokeh.charts import TimeSeries, output_file, show
output_file('fig.html')
test = pd.DataFrame({'datetime':pd.date_range('1/1/1880', periods=2000),'foo':np.arange(2000)})
fig = TimeSeries(test,x='datetime',y='foo')
show(fig)
output_file('fig2.html')
test = test.set_index('datetime')
fig2 = TimeSeries(test,y='foo')
show(fig2)
Is this the expected behaviour or a bug? I would expect the same picture with both approaches.
Cheers!!

Bokeh used to add an index for internal reasons but as of not-so-recent versions (>= 0.12.x) it no longer does this. Also it's worth noting that the bokeh.charts API has been deprecated and removed. The equivalent code using the stable bokeh.plotting API yields the expected result:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_file, show
from bokeh.layouts import row
output_file('fig.html')
test = pd.DataFrame({'datetime':pd.date_range('1/1/1880', periods=2000),'foo':np.arange(2000)})
fig = figure(x_axis_type="datetime")
fig.line(x='datetime',y='foo', source=test)
test = test.set_index('datetime')
fig2 = figure(x_axis_type="datetime")
fig2.line(x='datetime', y='foo', source=test)
show(row(fig, fig2))

Related

Formatting hover text when plotting with hvplot

I am trying to use hvplot.line to plot 2 y variables in a line chart. My goal is to format the hover text to some format I want (say 1 decimal). I used the standard method to format them in bokeh's hovertool and try to pass it with ".opts(tools=)".
But the formatting does not reflect in the plot. I specified the format should '0.0',but the hover text still shows 3 decimal. What did I do wrong?
My code looks like something below:
import pandas as pd
import numpy as np
import hvplot.pandas
hvplot.extension('bokeh')
from numpy import random
from bokeh.models import HoverTool
df=pd.DataFrame({'length':np.linspace(0,4000,6),
'slope':np.linspace(1.7,2.4,6),
'Direction':np.linspace(1.2,-0.5,6),
'clearance':random.rand(6),
'weight':random.rand(6)},)
hover=HoverTool(tooltips=[('clearance','#clearance{0.0}'),('weight','#weight{0.0}')])
df.hvplot.line(x='length',y=['slope','Direction'],invert=True,hover_cols=['clearance','weight']).opts(tools=[hover])
But if I reduce the number of y variable to just 1. It works fine.
Replace the last line of code to be:
df.hvplot.line(x='length',y=['Direction'],invert=True,hover_cols=['clearance','weight']).opts(tools=[hover])
You can pass the tools to the plot call as a keyword argument.
Change your code
# df.hvplot.line(x='length',y=['slope','Direction'], hover_cols=['clearance','weight'], invert=True).opts(tools=[hover])
df.hvplot.line(x='length',y=['slope','Direction'], hover_cols=['clearance','weight'], tools=[hover], invert=True)
and your hovertool with your formatter is applied.
Minimal Example
import hvplot.pandas
import numpy as np
import pandas as pd
from bokeh.models import HoverTool
hvplot.extension('bokeh')
df=pd.DataFrame({
'length':np.linspace(0,4000,6),
'slope':np.linspace(1.7,2.4,6),
'Direction':np.linspace(1.2,-0.5,6),
'clearance':np.random.rand(6),
'weight':np.random.rand(6)}
)
hover=HoverTool(tooltips=[('clearance','#clearance{0.0}'),('weight','#weight{0.0}')])
df.hvplot.line(
x='length',
y=['slope','Direction'],
hover_cols=['clearance','weight'],
tools=[hover],
invert=True
)
Output

How do I make a line graph from a dataframe in bokeh?

I'm reading a .csv file in bokeh which has two columns: one for date and one for the values corresponding to that date. I'm trying to make a line graph with the dates on the x axis and the values on y, but it isn't working. Any ideas?
CODE:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from datetime import datetime
from bokeh.palettes import Spectral3
output_file('output.html')
df = pd.read_csv('speedgraphak29.csv')
p = figure(x_axis_type="datetime")
p.line(x=df.dates, y=df.windspeed, line_width=2)
show(p)
It's returning an empty graph. What should I do?
Since you didn't provide an example of the input data I had to make something up. You probably forgot to specify that the dates column should be interpreted as datetime values as bigreddot noted. Here is a working example:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from datetime import datetime
from bokeh.palettes import Spectral3
output_file('output.html')
df = pd.DataFrame.from_dict({'dates': ["1-1-2019", "2-1-2019", "3-1-2019", "4-1-2019", "5-1-2019", "6-1-2019", "7-1-2019", "8-1-2019", "9-1-2019", "10-1-2019"], 'windspeed': [10, 15, 20,30 , 25, 5, 15, 30, 35, 25]})
df['dates'] = pd.to_datetime(df['dates'])
source = ColumnDataSource(df)
p = figure(x_axis_type="datetime")
p.line(x='dates', y='windspeed', line_width=2, source=source)
show(p)
You could use this. Say you have a CSV called sample_data.csv with columns Date and Amount. Just to add on to what Jasper had.
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
output_file('output.html')
df = pd.read_csv('sample_data.csv', parse_dates=['Date'])
source = ColumnDataSource(df)
p = figure(x_axis_type="datetime")
p.line(x='Date', y='Amount', line_width=2, source=source)
show(p)
In this case, read the CSV with the column as a date format. Using ColumnDataSource allows you to use advanced features like hovering over a plot to see more details if needed.
You may alternatively also use lists directly which would look like.
p.line(x='my_list_of_dates', y='my_list_of_counts', line_width=2)
This would mean reading each column and making a list from it. All in all, using ColumnDataSource would allow you to directly call a column by its name.

How to combine bar and line plots with x-axis as datetime in matplotlib

I have a dataFrame with datetimeIndex and two columns with int values. I would like to plot on the same graph Col1 as a bar plot, and Col2 as a line plot.
Important feature is to have correctly labeled x-axis as datetime, also when zooming in-out. I think solutions with DateFormatter would not work, since I want a dynamic xtick labeling.
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import numpy as np
startDate = dt.datetime(2018,1,1,0,0)
nrHours = 144
datetimeIndex = [startDate + dt.timedelta(hours=x) for x in range(0,nrHours)]
dF = pd.DataFrame(index=datetimeIndex)
dF['Col1'] = np.random.randint(1,3,nrHours)
dF['Col2'] = np.random.randint(3,6,nrHours)
axes = dF[['Col1']].plot(kind='bar')
dF[['Col2']].plot(ax=axes)
What seemed to be a simple task turns out being very challenging. Actually, after extensive search on the net, I still haven't found any clean solutions.
I have tried to use both pandas plot and matplotlib.
The main issue arises from the bar plot that seems to have difficulties handling datetime index (prefers integers, in some cases it plot dates but in Epoch 1970-1-1 style which is equivalent to 0).
I finally found a way using mdates and date2num. The solution is not very clean but provides an efficient solution to:
Combine bar and line plot on same graph
Using datetime on x-axis
Correctly and dynamically displaying x-ticks time labels (also when zooming in and out)
Working example :
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import datetime as dt
import numpy as np
startDate = dt.datetime(2018,1,1,0,0)
nrHours = 144
datetimeIndex = [startDate + dt.timedelta(hours=x) for x in range(0, nrHours)]
dF = pd.DataFrame(index=datetimeIndex)
dF['Col1'] = np.random.randint(1,3,nrHours)
dF['Col2'] = np.random.randint(3,6,nrHours)
fig,axes = plt.subplots()
axes.xaxis_date()
axes.plot(mdates.date2num(list(dF.index)),dF['Col2'])
axes.bar(mdates.date2num(list(dF.index)),dF['Col1'],align='center',width=0.02)
fig.autofmt_xdate()
Sample output:

Plotting from a pandas dataframe with a timedelta index

I'm trying to plot some data from a pandas dataframe with a timedelta index and I want to customize the time ticks and labels in my x-axis. This should be simple but it's proving to be a tough job.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as dates
## I have a df similar to this
timestamps = pd.date_range(start="2017-05-08", freq="10T", periods=6*6)
timedeltas = timestamps - pd.to_datetime("2017-05-08")
yy = np.random.random((len(timedeltas),10))
df = pd.DataFrame(data=yy, index=timedeltas) # Ok, this is what I have
## Now I want to plot this but I want detailed control of the plot so I use matplotlib instead of df.plot
fig,axes=plt.subplots()
axes.plot(df.index.values, df.values)
#axes.plot_date(df.index, df.values, '-')
axes.xaxis.set_major_locator(dates.HourLocator(byhour=range(0,24,2)))
axes.xaxis.set_minor_locator(dates.MinuteLocator(byminute=range(0,24*60,10)))
axes.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
plt.show()
As you can see, the ticks are not even showing up. How can I add major ticks and labels every two hours and minor ticks every 10 minutes, for example?
Although I don't know exactly what the root issue is, it seems that it is related to the used package versions. When I run your example with an older python distribution (matplotlib 1.5.1, numpy 1.11.1, pandas 0.18.1 and python 2.7.12), then I get a plot without ticks just as you described.
However I can get a plot with the correct ticks
by running the code below with a recent python distribution (matplot 2.0.1, numpy 1.12.1, pandas 0.19.1 and python 3.6.1).
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as dates
timestamps = pd.date_range(start="2017-05-08", freq="10T", periods=6*6)
timedeltas = timestamps - pd.to_datetime("2017-05-08")
yy = np.random.random((len(timedeltas),10))
df = pd.DataFrame(data=yy, index=timedeltas)
fig,axes=plt.subplots()
axes.plot_date(df.index, df.values, '-')
axes.xaxis.set_major_locator(dates.HourLocator(byhour=range(0,24,2)))
axes.xaxis.set_minor_locator(dates.MinuteLocator(byminute=range(0,24*60,10)))
axes.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
plt.show()

python bokeh plot how to format axis display

the y axis ticks seem to be formatting numbers like 500000000 to 5.000e+8. Is there a way to control the display so that it displays as 500000000?
using python 2.7, bokeh 0.5.2
i m trying out the timeseries example at bokeh tutorials page
The tutorial plots 'Adj Close' against 'Date' but i'm plotting with 'Volume' against 'Date'
You can also use NumeralTickFormatter as used in the toy plot below. The other possible values in place of '00' are listed here.
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_file, show
from bokeh.models import NumeralTickFormatter
df = pd.DataFrame(np.random.randint(0, 90000000000, (10,1)), columns=['my_int'])
p = figure(plot_width=700, plot_height=280, y_range=[0,100000000000])
output_file("toy_plot_with_commas")
for index, record in df.iterrows():
p.rect([index], [record['my_int']/2], 0.8, [record['my_int']], fill_color="red", line_color="black")
p.yaxis.formatter=NumeralTickFormatter(format="00")
show(p)
You have to add the option p.left[0].formatter.use_scientific = False to your code. In the timeseries tutorial, it'd be:
p1 = figure(title="Stocks")
p1.line(
AAPL['Date'],
AAPL['Adj Close'],
color='#A6CEE3',
legend='AAPL',
)
p1.left[0].formatter.use_scientific = False # <---- This forces showing 500000000 instead of 5.000e+8 as you want
show(VBox(p1, p2))

Categories