This code produces the figure I've attached. Notice the sums are the totals over the df, but I need the columns to only show the totals for that particular month. What do you have to set in the
text = ...
assignment for this to occur?
df = data[['Month', 'A', 'B']]
for X in df['A'].unique():
trace = go.Bar(
x = df[df['A']==X]['Month'],
y = df[df['A']==X]['B'],
text = str(df[df['A']==X]['B'].sum())
)
traces.append(trace)
df = data.groupby(['Month','TA']).sum().reset_index()
for TA in df['TA'].unique():
trace = go.Bar(
x = df[df['TA']==TA]['Month'],
y = df[df['TA']==TA]['Studies'],
text = df[df['TA']==TA]['Studies'],
name = TA
)
traces.append(trace)
As long as all values are already showing in your figure, the following will work regardless of how you've built your figure or grouped your data:
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()
Result:
Example of original figure:
Complete code:
# imports
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# data
df = px.data.stocks()
df = df[df.columns[:3]]
df = df.tail(25)
df['date'] = pd.to_datetime(df['date'])
# group py month
dfm = df.groupby(pd.Grouper(key = 'date', freq='M')).agg('sum').reset_index()
# figure setup
fig = go.Figure()
for col in dfm.columns[1:]:
fig.add_trace(go.Bar(x=dfm.date, y = dfm[col], text = [str(v)[:3] for v in dfm[col]], textposition = 'auto'))
fig.update_layout(barmode = 'stack')
# grap and sum data for all bars
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()
Related
I need help in calculating levels based on OHLC data in python, Please find the sample code below. The output expected is of all the price levels with datetime, which would help me plot ranges on the charts. More detailing are commented as part of the code. Any help here would be really helpfull.
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
df = pd.read_csv("https://raw.githubusercontent.com/IRPK16/SampleShare/5372e0c9fe07f6e31ac2729c86209684f6af69d1/CADCHF.csv")
df['Date'] = pd.to_datetime(df['Date'])
fig = go.Figure(data=go.Candlestick(x=df['Date'],
open=df['Open'],
high=df['High'],
low=df['Low'],
close=df['Close']))
fig.update(layout_xaxis_rangeslider_visible=False)
fig.update_layout(
title='CADCHF',
yaxis_title='Price',
xaxis_title='Date')
# Scenario 1: Identify normal range, currently hardcoded for last 20 bars. Need help in getting array[range1, range2,...] programatically
range_line = df.iloc[-22:].copy()
range_line['min_line'] = range_line['Close'].min()
range_line['mid_line'] = range_line['Close'].mean()
range_line['max_line'] = range_line['Close'].max()
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line['Date'], y=range_line['min_line'], line={'color':'black', 'width':1}
))
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line['Date'], y=range_line['max_line'], line={'color':'black', 'width':1}
))
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line['Date'], y=range_line['mid_line'], line={'color':'black', 'width':1}
))
print(df)
# Scenario 2: Identify squeeze range, currently hardcoded for last 53 to 62 bars. Need help in getting such array[range1, range2,...] programatically
range_line2 = df.iloc[53:62].copy()
range_line2['min_line'] = range_line2['Close'].min()
range_line2['mid_line'] = range_line2['Close'].mean()
range_line2['max_line'] = range_line2['Close'].max()
fig.add_trace(
go.Scatter(mode = 'lines', x=range_line2['Date'], y=range_line2['mid_line'], line={'color':'black', 'width':1}
))
fig.update_traces(showlegend=False)
fig.show()
expected output is as below:
def detect_level_method_2(df):
levels = []
max_list = []
min_list = []
for i in range(5, len(df)-5):
high_range = df['High'][i-5:i+4]
current_max = high_range.max()
if current_max not in max_list:
max_list = []
max_list.append(current_max)
if len(max_list) == 5 and isFarFromLevel(current_max, levels, df):
levels.append((high_range.idxmax(), current_max))
low_range = df['Low'][i-5:i+5]
current_min = low_range.min()
if current_min not in min_list:
min_list = []
min_list.append(current_min)
if len(min_list) == 5 and isFarFromLevel(current_min, levels, df):
levels.append((low_range.idxmin(), current_min))
return levels
Update:
I am able to identify based on previous value, but unable to group the values based on close nearest range. Please help me in solving this problem.
df = pd.read_csv("https://raw.githubusercontent.com/IRPK16/SampleShare/5372e0c9fe07f6e31ac2729c86209684f6af69d1/CADCHF.csv")
df['Date'] = pd.to_datetime(df['Date'])
dfOrderedByDateDesc = df.sort_values(['Date'], ascending=[True])
bar = dfOrderedByDateDesc.iloc[-1:].copy() # Returns last bar
from statistics import median
df.set_index('Date', inplace=True)
df['Prev_High'] = df['High'].shift()
df['Prev_Low'] = df['Low'].shift()
df['isInRange'] = ((df['Close'] < df['Prev_High']) & (df['Close'] > df['Prev_Low']))
print(df.tail(3))
I have a relatively simple issue, but cannot find any answer online that addresses it. Starting from a simple boxplot:
import plotly.express as px
df = px.data.iris()
fig = px.box(
df, x='species', y='sepal_length'
)
val_counts = df['species'].value_counts()
I would now like to add val_counts (in this dataset, 50 for each species) to the plots, preferably on either of the following places:
On top of the median line
On top of the max/min line
Inside the hoverbox
How can I achieve this?
The snippet below will set count = 50 for all unique values of df['species'] on top of the max line using fig.add_annotation like this:
for s in df.species.unique():
fig.add_annotation(x=s,
y = df[df['species']==s]['sepal_length'].max(),
text = str(len(df[df['species']==s]['species'])),
yshift = 10,
showarrow = False
)
Plot:
Complete code:
import plotly.express as px
df = px.data.iris()
fig = px.box(
df, x='species', y='sepal_length'
)
for s in df.species.unique():
fig.add_annotation(x=s,
y = df[df['species']==s]['sepal_length'].max(),
text = str(len(df[df['species']==s]['species'])),
yshift = 10,
showarrow = False
)
f = fig.full_figure_for_development(warn=False)
fig.show()
Using same approach that I presented in this answer: Change Plotly Boxplot Hover Data
calculate all the measures a box plot calculates plus the additional measure you want count
overlay bar traces over box plot traces so hover has all measures required
import plotly.express as px
df = px.data.iris()
# summarize data as per same dimensions as boxplot
df2 = df.groupby("species").agg(
**{
m
if isinstance(m, str)
else m[0]: ("sepal_length", m if isinstance(m, str) else m[1])
for m in [
"max",
("q75", lambda s: s.quantile(0.75)),
"median",
("q25", lambda s: s.quantile(0.25)),
"min",
"count",
]
}
).reset_index().assign(y=lambda d: d["max"] - d["min"])
# overlay bar over boxplot
px.bar(
df2,
x="species",
y="y",
base="min",
hover_data={c:not c in ["y","species"] for c in df2.columns},
hover_name="species",
).update_traces(opacity=0.1).add_traces(px.box(df, x="species", y="sepal_length").data)
I want to change the hover text and hover data for a python plotly boxplot. Instead of 5 separate hover boxes for max, q3, median, q1, and min, I want one condensed hover box for Median, Mean, IQR and date. I have played around with every "hover" variable with no luck. My sample code is found below.
import numpy as np
import plotly.express as px
lst = [['2020'], ['2021']]
numbers = [20 , 25]
r = [x for i, j in zip(lst, numbers) for x in i*j]
df = pd.DataFrame(r, columns=['year'])
df['obs'] = np.arange(1,len(df)+1) * np.random.random()
mean = df.groupby('year').mean()[['obs']]
median = df.groupby('year').median()[['obs']]
iqr = df.groupby('year').quantile(0.75)[['obs']] - df.groupby('year').quantile(0.25)[['obs']]
stats = pd.concat([mean,median,iqr], axis=1)
stats.columns = ['Mean','Median','IQR']
tot_df = pd.merge(df,stats, right_index=True, left_on='year', how = 'left')
fig = px.box(tot_df, x="year", y="obs", points=False, hover_data=['year','Mean','Median','IQR'])
fig.show()
In this case I tried to use "hover_data", which does not raise an error, but also does not change the plot, as shown above. I have tried both express and graph_objects with no luck. My plotly versions is 4.9.0. Thank you!
have used technique of overlaying a bar trace over boxplot trace
bar trace can be configured to show information you want
for sake of demonstration, I have set opacity to 0.05 it can be set to 0 to make it fully invisible
have built this against plotly 5.2.1, have not tested against 4.9.0
import numpy as np
import plotly.express as px
import pandas as pd
lst = [['2020'], ['2021']]
numbers = [20 , 25]
r = [x for i, j in zip(lst, numbers) for x in i*j]
df = pd.DataFrame(r, columns=['year'])
df['obs'] = np.arange(1,len(df)+1) * np.random.random()
mean = df.groupby('year').mean()[['obs']]
median = df.groupby('year').median()[['obs']]
iqr = df.groupby('year').quantile(0.75)[['obs']] - df.groupby('year').quantile(0.25)[['obs']]
stats = pd.concat([mean,median,iqr], axis=1)
stats.columns = ['Mean','Median','IQR']
tot_df = pd.merge(df,stats, right_index=True, left_on='year', how = 'left')
fig = px.box(tot_df, x="year", y="obs", points=False)
fig2 = px.bar(
tot_df.groupby("year", as_index=False)
.agg(base=("obs", "min"), bar=("obs", lambda s: s.max() - s.min()))
.merge(
tot_df.groupby("year", as_index=False).agg(
{c: "first" for c in tot_df.columns if c not in ["year", "obs"]}
),
on="year",
),
x="year",
y="bar",
base="base",
hover_data={
**{c: True for c in tot_df.columns if c not in ["year", "obs"]},
**{"base": False, "bar": False},
},
).update_traces(opacity=0.05)
fig.add_traces(fig2.data)
fig2 without named aggregations
fig2 = px.bar(
tot_df.groupby("year", as_index=False)["obs"]
.apply(lambda s: pd.Series({"base": s.min(), "bar": s.max() - s.min()}))
.merge(
tot_df.groupby("year", as_index=False).agg(
{c: "first" for c in tot_df.columns if c not in ["year", "obs"]}
),
on="year",
),
x="year",
y="bar",
base="base",
hover_data={
**{c: True for c in tot_df.columns if c not in ["year", "obs"]},
**{"base": False, "bar": False},
},
).update_traces(opacity=0.05)
I'm trying to add custom text inside of a plotly pie chart.
I want the largest share to be labeled Rank 1, second largest Rank 2, and so on...
Is there any way to do this using the texttemplate or some other method?
import plotly.graph_objects as go
labels = list('ABCD')
values = [25,45,13,78]
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=("Rank %i" % 1))])
fig.show()
Passing an array to texttemplate helps
import plotly.graph_objects as go
labels = list('ABCD')
values = [25,45,13,78]
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=([4,2,3,'Rank 1']))])
fig.show()
You need this function:
def get_ranks(lst, begin_with_one=False):
srtd = sorted(lst, reverse=True)
res = [srtd.index(x) for x in lst]
return [x + 1 for x in res] if begin_with_one else res
import plotly.graph_objects as go
import numpy as np
labels = list('ABCD')
values = [25,45,13,78]
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=[f"Rank {x}" for x in get_ranks(values, begin_with_one=True)])])
fig.show()
Use as often as possible the f-strings in Python3 - they are super convenient!
Since this works only with unique list elements, I created a better index assessor:
def map_to_index(lst1, lst2):
"""Return lst1 as indexes of lst2"""
dct = {}
for i, x in enumerate(lst2):
dct[x] = dct.get(x, []) + [i]
indexes = []
for x in lst1:
indexes.append(dct[x][0])
if len(dct[x]) > 0:
dct[x] = dct[x][1:]
return indexes
And an improved get_ranks():
def get_ranks(lst, begin_with_one=False):
srtd = sorted(lst, reverse=True)
res = map_to_index(lst, srtd)
return [x + 1 for x in res] if begin_with_one else res
Then it works also with:
import plotly.graph_objects as go
import numpy as np
labels = list('ABCDEF')
values = [25,45,13,78,45,78] # with elements of same value
fig = go.Figure(data=[go.Pie(labels=labels, values=values,
texttemplate=[f"Rank {x}" for x in get_ranks(values, begin_with_one=True)])])
fig.show()
I am trying to make a heat map like this one from bokeh:
Where all the code is here: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I got pretty close, but for some reason it is only printing the values in a diagonal order.
I tried to format my data the same way and just substitute it, but it got a little more complicated than that. Here is my data:
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import ColumnDataSource, figure, show, output_file
from bokeh.models import HoverTool
import pandas.util.testing as tm; tm.N = 3
df = pd.read_csv('MYDATA.csv', usecols=[1, 16])
df = df.set_index('recvd_dttm')
df.index = pd.to_datetime(df.index, format='%m/%d/%Y %H:%M')
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg(len).reset_index()
result.columns = ['Month', 'CompanyName', 'NumberCalls']
pivot_table = result.pivot(index='Month', columns='CompanyName', values='NumberCalls').fillna(0)
s = pivot_table.sum().sort(ascending=False,inplace=False)
pivot_table = pivot_table.ix[:,s.index[:46]]
pivot_table = pivot_table.transpose()
pivot_table.to_csv('pivot_table.csv')
pivot_table = pivot_table.reset_index()
pivot_table['CompanyName'] = [str(x) for x in pivot_table['CompanyName']]
Companies = list(pivot_table['CompanyName'])
months = ["1","2","3","4","5","6","7","8","9","10","11","12"]
pivot_table = pivot_table.set_index('CompanyName')
# this is the colormap from the original plot
colors = [
"#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce",
"#ddb7b1", "#cc7878", "#933b41", "#550b1d"
]
# Set up the data for plotting. We will need to have values for every
# pair of year/month names. Map the rate to a color.
month = []
company = []
color = []
rate = []
for y in pivot_table.index:
for m in pivot_table.columns:
month.append(m)
company.append(y)
num_calls = pivot_table.loc[y,m]
rate.append(num_calls)
color.append(colors[min(int(num_calls)-2, 8)])
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
output_file('heatmap.html')
TOOLS = "resize,hover,save,pan,box_zoom,wheel_zoom"
p = figure(title="Customer Calls This Year",
x_range=Companies, y_range=list(reversed(months)),
x_axis_location="above", plot_width=1400, plot_height=900,
toolbar_location="left", tools=TOOLS)
p.rect("Companies", "months", 1, 1, source=source,
color="color", line_color=None)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
('Company Name', '#Companies'),
('Number of Calls', '#rate'),
])
show(p) # show the plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# just following your previous post to simulate your data
np.random.seed(0)
dates = np.random.choice(pd.date_range('2015-01-01 00:00:00', '2015-06-30 00:00:00', freq='1h'), 10000)
company = np.random.choice(['company' + x for x in '1 2 3 4 5'.split()], 10000)
df = pd.DataFrame(dict(recvd_dttm=dates, CompanyName=company)).set_index('recvd_dttm').sort_index()
df['C'] = 1
df.columns = ['CompanyName', '']
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg({df.columns[1]: sum}).reset_index()
result.columns = ['Month', 'CompanyName', 'counts']
pivot_table = result.pivot(index='CompanyName', columns='Month', values='counts')
x_labels = ['Month'+str(x) for x in pivot_table.columns.values]
y_labels = pivot_table.index.values
fig, ax = plt.subplots()
x = ax.imshow(pivot_table, cmap=plt.cm.winter)
plt.colorbar(mappable=x, ax=ax)
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
ax.set_xlabel('Month')
ax.set_ylabel('Company')
ax.set_title('Customer Calls This Year')
The answer was in this line:
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
It should have been:
source = ColumnDataSource(
data=dict(month=months, company=company, color=color, rate=rate)
)