Related
I would like to make a dropdown menu that shows each column name from the dataset, and the user can choose a column to visualize the data.
With Python this code is:
for i in df.columns:
plt.figure(figsize = (10,6))
plt.title("Count vs "+i)
sns.histplot(data = df, x = i, element = 'bars')
I have tried the following code, which does not account for each column name, nor does it output charts similar to the above code.
def f(x):
return x
def f(age):
df.loc[age].iplot(
xTitle='Year',
yTitle='Count of {}'.format(age),
title='Count per Age'
)
interact(f, age=df.index)
I am working on a charting module where I can pass on dataframe and the module will create reports based on plots generated by calling few functions as mentioned below.
I am using Altair for plotting and "Datapane" for creating the report, the documentation of the same can be found here : https://datapane.github.io/datapane/
My DataFrame looks like this
d = {'Date': ['2021-01-01', '2021-01-01','2021-01-01','2021-01-01','2021-01-02','2021-01-03'],
'country': ['IND','IND','IND','IND','IND','IND' ],
'channel': ['Organic','CRM','Facebook','referral','CRM','CRM' ],
'sessions': [10000,8000,4000,2000,7000,6000 ],
'conversion': [0.1,0.2,0.1,0.05,0.12,0.11 ],
}
country_channel = pd.DataFrame(d)
Plotting functions :
def plot_chart(source,Y_axis_1,Y_axis_2,chart_caption):
base = alt.Chart(source).encode(
alt.X('Date:T', axis=alt.Axis(title="Date"))
)
line_1 = base.mark_line(opacity=1, color='#5276A7').encode(
alt.Y(Y_axis_1,
axis=alt.Axis( titleColor='#5276A7'))
)
line_2 = base.mark_line(opacity=0.3,color='#57A44C', interpolate='monotone').encode(
alt.Y(Y_axis_2,
axis=alt.Axis( titleColor='#57A44C'))
)
chart_ae=alt.layer(line_1, line_2).resolve_scale(
y = 'independent'
).interactive()
charted_plot = dp.Plot(chart_ae , caption=chart_caption)
return charted_plot
def channel_plot_split(filter_1,filter_2,country,channel):
channel_split_data = country_channel[(country_channel[filter_1]==country.upper())]
channel_split_data =channel_split_data[(channel_split_data[filter_2].str.upper()==channel.upper())]
channel_split_data=channel_split_data.sort_values(by='Date',ascending = True)
channel_split_data=channel_split_data.reset_index(drop=True)
channel_split_data.head()
plot_channel_split = plot_chart(source=channel_split_data,Y_axis_1='sessions:Q',Y_axis_2='conversion:Q',chart_caption="Sessions-Conversion Plot for Country "+country.upper()+" and channel :"+ channel)
channel_plot=dp.Group(dp.HTML("<div class='center'> <h3> Country : "+country.upper()+" & Channel : "+channel.upper()+"</h3></div>"),plot_channel_split,rows=2)
return channel_plot
def grpplot(plot_1,plot_2):
gp_plot = dp.Group(plot_1,plot_2,columns=2)
return gp_plot
The above functions when called, will filter the dataframe, create plot for each filters and group 2 plots in a row.
row_1 = grpplot(channel_plot_split('country','channel','IND','Organic'),channel_plot_split('country','channel','IND','CRM'))
row_2 = grpplot(channel_plot_split('country','channel','IND','Facebook'),channel_plot_split('country','channel','IND','referral'))
I can now generate a report by calling datapane.Report() function as follows
r= dp.Report(row_1,row_2)
Problem: This works fine when I know how many channels are present, but my channel list is dynamic.I am thing of using "for" loop to generate rows, but not sure how can I pass on these rows as kwargs in dp.Report() function. For example, if I have 10 channels, I need to pass 10 rows dynamically.
I had a similar problem and solved it as follows
Create a list to store the pages or elements of the report, such as
report_pages=[]
report_pages.append(dp.Page)
report_pages.append(dp.Table)
report_pages.append(dp.Plot)
At the end just generate the report with a pointer to the list
dp.Report(*pages)
In your case, I think you can do the following
create a list
rows=[]
add the rows to the list
rows.append(row_1)
rows.append(row_2)
and then create the report with
r= dp.Report(*rows)
I found this solution on datapane's GitHub and in this notebook in the last line of code.
So here is how I solved this problem.
channel_graph_list=[]
for i in range(0,len(unique_channels),1):
channel_1_name = unique_channels[i]
filtered_data = filter_the_data(source=channel_data,filter_1='channel',fv_1=channel_1_name)
get_chart = plot_chart(filtered_data,Y_axis_1='sessions:Q',Y_axis_2='conversion:Q',chart_title='Session & Conv. Chart for '+channel_1_name)
#This is where the trick starts - The below code creates a dynamic variable
vars() ["channel_row_"+str(i)] = get_chart
channel_graph_list.append("dp.Plot(channel_row_"+str(i)+",label='"+channel_1_name+"')")
#convert the list to a string
channel_graph_row = ','.join(channel_graph_list)
# assign the code you want to run
code="""channel_graph = dp.Select(blocks=["""+channel_graph_row+ """],type=dp.SelectType.TABS)"""
#execute the code
exec(code)
Hope the above solution helps others looking to pass dynamically generated parameters into any function.
I have two dictionaries, both dictionaries contains a number of keys. What i am trying to do here. I want to plot their data side by side. For example both dictionaries have key '1', so i want to plot the data of key 1 from both dictionaries side by side.
dict_a = {1: [10.60626299560636,9.808507783184758, 9.80184985166152, 9.820483229791137,9.822087257017674],
2: [10.60626299560636, 9.808507783184758, 9.80184985166152, 9.820483229791137, 9.822087257017674]}
dict_b = {1: [14.420548834522766,13.886147271592971,14.522980401561725,14.876615652026173,13.379224382776899],
2: [14.650926514851816,13.984378530820885,14.566825972585173, 16.434690726796628,15.24108978696146]}
after a search i came to towards the following code, but both code snippets helps to draw for one dict at one time.
fig, ax = plt.subplots()
ax.boxplot(dict_a .values())
ax.set_xticklabels(dict_a.keys())
Another code which i found is following but still it does not give me what i want.
labels, data = dict_a .keys(), dict_a .values()
plt.boxplot(data)
plt.xticks(range(1, len(labels) + 1), labels)
plt.show()
is there a way, which performs the same way i want.
Try this
key = 1
values = [data[key] for data in [dict_a, dict_b]]
fig, ax = plt.subplots()
ax.boxplot(values)
ax.set_xticklabels(['dict_a', 'dict_b'])
ax.set_title('value: %s' % key)
I am trying to produce a box plot using matplotlib with data from nested dictionaries. Below is a rough outline of the structure of dictionary in question.
m_data = {scenario:{variable:{'model_name':value, ''model_name':value ...}
One issue is that I want to look at the change in the models output between the two different scenarios ( scenario 1 [VAR1] - scenario 2 [VAR2]) and then plot this difference in a box plot.
I have managed to do this, however, I want to be able to label the outliers with the model name. My current method separates the keys from the values, therefore the outlier data point has no name associated with it anymore.
#BOXPLOT
#set up blank lists
future_rain = []
past_rain = []
future_temp = []
past_temp = []
#single out the values for each model from the nested dictioaries
for key,val in m_data[FUTURE_SCENARIO][VAR1].items():
future_rain.append(val)
for key,val in m_data[FUTURE_SCENARIO][VAR2].items():
future_temp.append(val)
for key,val in m_data['historical'][VAR1].items():
past_rain.append(val)
for key,val in m_data['historical'][VAR2].items():
past_temp.append(val)
#blanks for final data
bx_plt_rain = []
bx_plt_temp = []
#allow for the subtration of two lists
zip_object = zip(future_temp, past_temp)
for future_temp_i, past_temp_i in zip_object:
bx_plt_temp.append(future_temp_i - past_temp_i)
zip_object = zip(future_rain, past_rain)
for future_rain_i, past_rain_i in zip_object:
bx_plt_rain.append(future_rain_i - past_rain_i)
#colour ouliers red
c = 'red'
outlier_col = {'flierprops': dict(color =c, markeredgecolor=c)}
#plot
bp = plt.boxplot(bx_plt_rain, patch_artist=True, showmeans=True, vert= False, meanline=True, **outlier_col)
bp['boxes'][0].set(facecolor = 'lightgrey')
plt.show()
If anyone knows of a workaround for this I would be extremely grateful.
As a bit of a hack you could create a function that looks through the dict for the outlier value and returns the key.
def outlier_name(outlier_val, inner_dict):
for key, value in inner_dict.items():
if value == outlier_val:
return key
This could be pretty intensive if your data sets are large.
I need to display two separate charts side by side including their legends in Jupyterlab and the only way I managed to do that was using hconcat.
I've gotten this far:
However even with .resolve_legend(color='independent') I get the entries from both charts displayed in both legends at the top - which is mighty confusing.
The result should look like this:
How can I remove the unwanted legend entries?
Or if anyone knows a good alternative how to have to charts side-by-side in a single jupyterlab cell I would be happy to take a different route.
My code looks like this:
import altair as alt
import pandas as pd
from altair.expr import datum
df_test=pd.read_csv("test_df.csv")
chart_m1=alt.Chart(df_test).mark_bar().encode(
x=alt.X('counts:Q', stack="normalize",axis=None),
y=alt.Y('category:N',sort=['A','B','C'],title=None),
color=alt.Color('grade:N',
sort = alt.EncodingSortField( 'sort:Q', order = 'ascending' ),
scale = alt.Scale(domain=['good <10', 'average 10-20', 'bad >20'], range=['#0cce6b', '#ffa400', '#ff4e42']),
legend = alt.Legend(title="Metric1",orient='top')),
order='sort:Q',
tooltip=['category:N','grade:N','counts:Q']
).transform_filter(datum.metric=='metric1'
).properties(height=50,width=150)
chart_m2=alt.Chart(df_test).mark_bar().encode(
x=alt.X('counts:Q', stack="normalize",axis=None),
y=alt.Y('category:N',sort=['A','B','C'],title=None),
color=alt.Color('grade:N',
sort = alt.EncodingSortField( 'sort:Q', order = 'ascending' ),
scale = alt.Scale(domain=['good <100', 'average 100-350', 'bad >350'], range=['#0cce6b', '#ffa400', '#ff4e42']),
legend = alt.Legend(title="Metric2",orient='top')),
order='sort:Q',
tooltip=['category:N','grade:N','counts:Q']
).transform_filter(datum.metric=='metric2'
).properties(height=50,width=150)
alt.hconcat(chart_m1,chart_m2).resolve_legend(color='independent').configure_view(stroke=None)
The test_df.csv I used is this:
category,metric,sort,grade,counts
A,metric1,1,good <10,345
B,metric1,1,good <10,123
C,metric1,1,good <10,567
A,metric1,2,average 10-20,567
B,metric1,2,average 10-20,678
C,metric1,2,average 10-20,789
A,metric1,3,bad >20,900
B,metric1,3,bad >20,1011
C,metric1,3,bad >20,1122
A,metric2,1,good <100,1122
B,metric2,1,good <100,1011
C,metric2,1,good <100,900
A,metric2,2,average 100-350,789
B,metric2,2,average 100-350,678
C,metric2,2,average 100-350,567
A,metric2,3,bad >350,567
B,metric2,3,bad >350,345
C,metric2,3,bad >350,123
Use resolve_scale(color='independent')
alt.hconcat(
chart_m1, chart_m2
).resolve_scale(
color='independent'
).configure_view(
stroke=None
)
More information at https://altair-viz.github.io/user_guide/scale_resolve.html