Creating new df from series of widget boxes - python

I have created an "input form" with several ipywidget boxes. I want to be able to reference all the values to create a new dataframe.
I'm currently doing this in a horrible way.
portfolio_df = pd.DataFrame([[VBox1.children[0].value, VBox2.children[0].value, VBox3.children[0].value, VBox4.children[0].value]],
columns=['Product Name','Units','Price', 'Invested Amount'])
row_2 = [VBox1.children[1].value, VBox2.children[1].value, VBox3.children[1].value, VBox4.children[21].value]
portfolio_df.loc[len(portfolio_df)] = row_2
row_3 = [VBox1.children[2].value, VBox2.children[2].value, VBox3.children[2].value, VBox4.children[2].value]
portfolio_df.loc[len(portfolio_df)] = row_3
row_4 = [VBox1.children[3].value, VBox2.children[3].value, VBox3.children[3].value, VBox4.children[3].value]
portfolio_df.loc[len(portfolio_df)] = row_4
and so on up till row 23 in this instance !! (but the length will vary up to the number of children within a VBox)
I suspect I can do this more pythonically using a for loop but cant figure it out.
Full code as per requests (I've edited columns so my live data is different but this is exact replica of the set up)
import pandas as pd
import numpy as np
import datetime as dt
import ipywidgets as ipw
from ipywidgets import *
barrier_list = pd.DataFrame(np.random.randn(24, 4), columns=('Product
Name','ISIN','A','B'))
barrier_list= barrier_list.astype(str)
dd_list = []
for i in range(len(barrier_list['Product Name'])):
dropdown = ipw.FloatText(description=barrier_list['ISIN'][i],
value=barrier_list['Product Name'][i],
disabled=False,
layout = {'width':'350px'})
dropdown.style.description_width = 'initial'
dd_list.append(dropdown)
dd_list1 = []
for i in range(len(barrier_list['Product Name'])):
dropdown1 = ipw.FloatText(description='Units',
value=0,
layout = {'width':'200px'})
dd_list1.append(dropdown1)
dd_list2 = []
for i in range(len(barrier_list['Product Name'])):
dropdown2 = ipw.FloatText(description='Price',
value=0,
layout = {'width':'200px'})
dd_list2.append(dropdown2)
dd_list3 = []
for i in range(len(barrier_list['Product Name'])):
dropdown3 = ipw.FloatText(description='Value',
value=0,
layout = {'width':'200px'})
dd_list3.append(dropdown3)
VBox1 = ipw.VBox(dd_list)
VBox2 = ipw.VBox(dd_list1)
VBox3 = ipw.VBox(dd_list2)
VBox4 = ipw.VBox(dd_list3)
HBox = widgets.HBox([VBox1, VBox2, VBox3, VBox4])

solved this one by looping through the VBoxes one by one and then concatenating the dataframes into one main one.
product_df = pd.DataFrame()
for i in range(len(dd_list)):
product_name_df = pd.DataFrame([[VBox1.children[i].value]],columns=
['Product Name'])
product_df = product_df.append(product_name_df)
unit_df = pd.DataFrame()
for i in range(len(dd_list)):
unit_amount_df = pd.DataFrame([[VBox2.children[i].value]],columns=
['Units'])
unit_df = unit_df.append(unit_amount_df)
price_df = pd.DataFrame()
for i in range(len(dd_list)):
price_amount_df = pd.DataFrame([[VBox3.children[i].value]],columns=
['Price'])
price_df = price_df.append(price_amount_df)
value_df = pd.DataFrame()
for i in range(len(dd_list)):
value_amount_df = pd.DataFrame([[VBox4.children[i].value]],columns=
['Value'])
value_df = value_df.append(value_amount_df)
df_list = [product_df.reset_index(drop=True),unit_df.reset_index(drop=True),
price_df.reset_ind ex(drop=True),value_df.reset_index(drop=True)]
portfolio_df = pd.concat((df_list), axis=1)
portfolio_df

Related

Plotly scatter_mapbox python: How to update the displayed scatter_mapbox

i want to plot open street map points in a scatter_mapbox. To plot the data it is no problem.
But when I try to update the data the new data will not shown in the plot.
First, when I call again the .show function the new result will be displayed in a new firefox tab. How can I solve it that I get an update in the same windows.
I want to create dropdown menus und input boxes for the costumer and when he change the data, it should be shown in the same window.
Also nice to have improvements for the rest of the code.
`
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from osm.osm_reader import OSMHandler, calcDistanceBetweenTwoGPS
osmhandler = OSMHandler()
osmhandler.apply_file("data/osm/hamburg.osm")
data_colnames_ways = ['type', 'id', 'nodeId', 'version', 'visible', 'name', 'ts', 'uid',
'user', 'chgset']
data_colnames_nodes = ['type', 'id', 'version', 'visible', 'ts', 'uid',
'user', 'chgset', 'lat', 'lon']
df_osm_ways = pd.DataFrame(osmhandler.osm_ways, columns=data_colnames_ways)
df_osm_ways = df_osm_ways.sort_values(by=['type', 'id', 'ts'])
df_osm_nodes = pd.DataFrame(osmhandler.osm_nodes, columns=data_colnames_nodes)
df_osm_nodes = df_osm_nodes.sort_values(by=['type', 'id', 'ts'])
df_traffic_nodes = pd.DataFrame(osmhandler.traffic_nodes, columns=data_colnames_nodes)
df_traffic_nodes = df_traffic_nodes.sort_values(by=['type', 'id', 'ts'])
from accident_atlas.csv_reader import CsvReader
csv_reader = CsvReader()
csv_reader.set_data_path("data/aatlas/Unfallorte2021_LinRef.csv")
accident_data = csv_reader.get_data(lat_limits=(min(df_osm_nodes["lat"]), max(df_osm_nodes["lat"])),
lon_limits=(min(df_osm_nodes["lon"]), max(df_osm_nodes["lon"])))
accident_data["CloseToNode"] = False
max_distance = 50
df_traffic_nodes["CloseAccidents"] = 0
for idx, row_x in accident_data.iterrows():
for idy, row_y in df_traffic_nodes.iterrows():
if max_distance > calcDistanceBetweenTwoGPS(row_x["YGCSWGS84"], row_y["lat"], row_x["XGCSWGS84"], row_y["lon"]):
df_traffic_nodes.loc[idy, "CloseAccidents"] += 1
if not accident_data["CloseToNode"][idx]:
accident_data.loc[idx, "CloseToNode"] = True
df_acdata_filtered = accident_data[accident_data["CloseToNode"] == True]
df_traffic_nodes_filtered = df_traffic_nodes[df_traffic_nodes["CloseAccidents"] >= 0]
fig = px.scatter_mapbox(data_frame=df_traffic_nodes_filtered, lat=df_traffic_nodes_filtered["lat"], lon=df_traffic_nodes_filtered["lon"], color="CloseAccidents",
zoom=12, height=800, size_max = 50, hover_name="CloseAccidents", color_continuous_scale="bluered")
fig2 = px.scatter_mapbox(data_frame=None, lat=df_acdata_filtered["YGCSWGS84"], lon=df_acdata_filtered["XGCSWGS84"], color_discrete_sequence=["Black"],
zoom=12, height=800, size_max = 50)
fig.add_trace(fig2.data[0])
fig.update_layout(title = "Traffic lights on roads with number of accidents.")
fig.update_layout(mapbox_style="open-street-map")
f = go.FigureWidget(fig)
f.show()
f.data[0]["legendgroup"] = "test"
f.data[1]["legendgroup"] = "test2"
fig.update_traces(lat=fig.data[1].lat[-1], lon= fig.data[1].lon[-1], selector=dict(legendgroup= 'test'))
print("Test")
`
I tried to plot data to a scatter_mapbox and change the data after the show.

How to overwrite Python Jupyter display output

I have created a small form with ipywidgets. The sample code can be run in Jupyter or Google colab.
Each time the form is filled and the button is clicked a row gets added to a dataframe. Subsequently the dataframe gets displayed.
My problem is that the output displays the new updated dataframe on top of the old one. What I want is that the new display output overwrites the old one. See image description here.
import ipywidgets as widgets
from ipywidgets import HBox, Label
from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider
import time
import pandas as pd
#Create DF
df = df = pd.DataFrame(columns = ['Dropdown_column', 'Float_column'])
df
# Layout
form_item_layout = Layout(
display='flex',
flex_flow='row',
justify_content='space-between',
)
button_item_layout = Layout(
display='flex',
flex_flow='row',
justify_content='center',
padding = '5%'
)
# Dropdown item
drop_down_input = 'Dropdown_input_1'
drop_down = widgets.Dropdown(options=[('Dropdown_input_1', 'Dropdown_input_1'), ('Dropdown_input_2','Dropdown_input_2'), ('Dropdown_input_3', 'Dropdown_input_3')])
def dropdown_handler(change):
global drop_down_input
print('\r','Dropdown: ' + str(change.new),end='')
drop_down_input = change.new
drop_down.observe(dropdown_handler, names='value')
# FloatText item
float_input = 0
FloatText = widgets.FloatText()
def IntText_handler(change):
global float_input
print('\r','Float text:' + str(change.new),end='')
float_input = change.new
FloatText.observe(IntText_handler, names='value')
# Button
button = widgets.Button(description='Add row to dataframe')
out = widgets.Output()
def on_button_clicked(b):
global df
button.description = 'Row added'
time.sleep(1)
with out:
new_row = {'Dropdown_column': drop_down_input, 'Float_column': float_input}
df = df.append(new_row, ignore_index=True)
button.description = 'Add row to dataframe'
display(df)
button.on_click(on_button_clicked)
# Form items
form_items = [
Box([Label(value='Dropdown'),
drop_down], layout=form_item_layout),
Box([Label(value='FloatText'),
FloatText], layout=form_item_layout),
Box([Label(value=''), button],
layout=button_item_layout),
]
form = Box(form_items, layout=Layout(
display='flex',
flex_flow='column',
border='solid 1px',
align_items='stretch',
width='30%',
padding = '1%'
))
display(form)
display(out)
I have tried using the print() function in combination with '/r' and changing #button part of my code.
Change:
display(df)
to
print('\r',str(df), end='')
or
print(str(df), end='\r')
But this does not work either.
Does somebody have any idea what to do?
\r works only for single line of normal text but df is not displayed as normal text (and it is not single line) but as HTML code.
You have to use out.clear_output() to remove previous content.
with out:
new_row = {'Dropdown_column': drop_down_input, 'Float_column': float_input}
df = df.append(new_row, ignore_index=True)
button.description = 'Add row to dataframe'
out.clear_output() # <---
display(df)
You can see more about out.clear_output() in documentation:
Output widgets: leveraging Jupyter’s display system

Bokeh soo slow when updating plot in the browser

I am trying to create a dashboard in bokeh. I am fairly new to bokeh. The plots work alright but when I try to update it using bokeh server, it gets very slow, it takes more than a minute to update the plots.
I don't know if I'm doing anything wrong. Below is the code I'm using:
import pandas as pd
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.models import (
Div, SingleIntervalTicker, DatetimeTickFormatter, NumeralTickFormatter, DateRangeSlider, ColumnDataSource
)
from bokeh.layouts import layout
def _get_data(path, name):
df = pd.read_csv(path)
df.drop(columns='Province/State', inplace=True)
df.rename(columns={'Country/Region': 'country', 'Lat': 'lat', 'Long': 'long'}, inplace=True)
df = df.melt(var_name='date', value_name=name, id_vars=['country', 'lat', 'long'])
df = df.groupby(by=['country', 'date'], as_index=False, sort=False, dropna=False).sum()
df['id'] = df.country + df.date
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%y', infer_datetime_format=True)
return df
def _merged_data():
confirmed = _get_data('data/time_series_covid19_confirmed_global.csv', 'confirmed')
deaths = _get_data('data/time_series_covid19_deaths_global.csv', 'deaths')
recovered = _get_data('data/time_series_covid19_recovered_global.csv', 'recovered')
merged = pd.merge(confirmed, deaths[['id', 'deaths']], on='id', validate='1:1')
merged = merged.merge(recovered[['id', 'recovered']], on='id', validate='1:1')
merged.drop(columns='id', inplace=True)
return merged
def line_fig(label, interval, **kwargs):
fig = figure(
plot_width=400,
plot_height=250,
background_fill_color='#222222',
y_axis_label=label,
border_fill_color='#222222',
outline_line_color='#222222',
**kwargs
)
# Fig
fig.toolbar_location = None
fig.tools = []
# Axis
fig.axis.major_label_text_color = '#bdbdbd'
fig.axis.major_tick_line_color = '#5c5c5c'
fig.axis.major_tick_in = 0
fig.axis.minor_tick_line_color = None
fig.axis.axis_label_text_color = '#bdbdbd'
fig.axis.axis_line_color = "#5c5c5c"
# X-Axis
fig.xgrid.grid_line_color = "#5c5c5c"
fig.xgrid.grid_line_width = 1
fig.xgrid.grid_line_alpha = 0.4
fig.xgrid.grid_line_dash = [3, 9]
fig.xaxis.formatter = DatetimeTickFormatter(months="%b %Y")
# Y-Axis
fig.ygrid.grid_line_color = "#5c5c5c"
fig.ygrid.grid_line_width = 1
fig.ygrid.grid_line_alpha = 0.4
fig.ygrid.grid_line_dash = [3, 9]
fig.yaxis.ticker = SingleIntervalTicker(interval=interval)
fig.yaxis.formatter = NumeralTickFormatter(format='0a')
return fig
source = _merged_data()
datacache = ColumnDataSource(source)
date = datacache.data['date']
start_date = date.min()
end_date = date.max()
date_slider = DateRangeSlider(start=start_date,
end=end_date,
value=(start_date, end_date),
step=1,
show_value=False,
default_size=400)
del date
def date_slider_callback(attr, old, new):
old = pd.to_datetime(date_slider.value_as_date[0], infer_datetime_format=True)
new = pd.to_datetime(date_slider.value_as_date[1], infer_datetime_format=True)
temp_data = source[(source['date'] >= old) & (source['date'] <= new)]
datacache.data = ColumnDataSource.from_df(temp_data)
date_slider.on_change('value', date_slider_callback)
confirmed_chart = make_chart('Confirmed Cases', 'date', 'confirmed', interval=10000000.00, color='#D83020')
death_chart = make_chart('Confirmed Deaths', 'date', 'deaths', interval=300000.00, color='#eaeaea')
recovery_chart = make_chart('Confirmed Recovery', 'date', 'recovered', interval=10000000.00, color='#35ac46')
lay_out = layout(
children=[
[date_slider],
[confirmed_chart],
[death_chart],
[recovery_chart]
]
)
document = curdoc()
document.add_root(lay_out)
Right now, I don't know what I'm doing wrong, maybe there's some kind of best practice I should follow, I don't really know what's making the plot so slow.

DashTable not updating with DatePickerSingle input in Callback

I am pretty new to dash and I have tried to read as much as I can to understand what the issue might be. In a nutshell I have a single datepicker which is an input to the DataTable and Graph callback. The graph callback is working fine so it is just the DataTable which is causing problems. I also tried the single input to multiple output callback but didnt work. My code is as below:
app = JupyterDash()
folder = os.getcwd()
portfolio_returns_table = pd.read_csv(Path(folder, 'portfolioreturns_maria.csv',parse_dates=[0]))
portfolio_returns_table = portfolio_returns_table.set_index('Unnamed: 0')
name_portfolioID_table = pd.read_csv(Path(folder, 'name_portfolioID.csv'))
#Calculate portfolio cumulative returns
df_cumret = (portfolio_returns_table+1).cumprod().round(5)
df_cumret.index = pd.to_datetime(df_cumret.index)
app.layout = html.Div(html.Div([dcc.DatePickerSingle(
id='my-date-picker-single',
min_date_allowed=dt.date(df_cumret.index.min()),
max_date_allowed=dt.date(df_cumret.index.max()),
initial_visible_month=dt.date(df_cumret.index.max()),
date = dt.date(df_cumret.index.max())
,display_format = 'Y-MM-DD',clearable = True),
html.Div(id='output-container-date-picker-single'),
html.Div(dash_table.DataTable(id = 'data_table',
data = {},
fixed_rows={'headers': True},
style_cell = {'textAlign': 'left'},
style_table={'height': 400})),
html.Div(dcc.Graph('my_graph'))
]))
#app.callback([Output('data_table','data'),Output('data_table','columns')],
[Input('my-date-picker-
single','date')])
def update_leader_table(date):
#Get data for the selected date and transpose
df_T = df_cumret.loc[[date]].T
#Sort the table to reveal the top leaders
df_Top = df_T.sort_values(df_T.columns[0], ascending=False)[:10]
#Convert the index to an interger
df_Top.index = df_Top.index.astype(int)
#Generate the leaderboard to given date
df_leader = pd.merge(df_Top,name_portfolioID_table,
left_index=True,right_index=True, how = 'left')
#Create the col rank
df_leader['Rank'] = range(1,len(df_leader)+1)
df_leader.columns = ['Cum Return', 'Investor','Rank']
df_leader.reset_index(drop = True, inplace = True)
data = df_leader.to_dict('records')
columns= [{'id': c, 'name': c, "selectable": True} for c in
df_leader.columns]
return (data,columns)
#callback to link calendar to graph
#app.callback(Output('my_graph','figure'),[Input('my-date-picker-single','date')])
def update_graph(date):
#date filter
df_T = df_cumret.loc[:date].T
#Sort the table to reveal the top leaders & filter for leaderboard
df_Top = df_T.sort_values(df_T.columns[-1], ascending=False)[:10]
#Transpose to have date as index
df_top_graph = df_Top.T
#set the columns as an Int
df_top_graph.columns = df_top_graph.columns.astype(int)
#Rename columns
df_top_graph.rename(columns=dict(zip(name_portfolioID_table.index,
name_portfolioID_table.name)),
inplace=True)
#Generate graph
fig = px.line(df_top_graph, x = df_top_graph.index, y =
df_top_graph.columns, title='ETF LEADERBOARD PERFORMANCE: '+date, labels=
{'Unnamed: 0':'Date','value':'Cumulative Returns'})
fig.update_layout(hovermode = 'x unified')
fig.update_traces(hovertemplate='Return: %{y} <br>Date: %{x}')
fig.update_layout(legend_title_text = 'Investor')
return fig
if __name__ == '__main__':
app.run_server(mode = 'inline',debug=True, port = 65398)

How to update Pretext in Bokeh with a Select tool

I have a bokeh plot that updates my plot through a select tool. The select tool contains subjects that update the plot where the values are x='Polarity'and y='Subjectivity'.
Here is a dummy data for what I want:
import pandas as pd
import random
list_type = ['All', 'Compliment', 'Sport', 'Remaining', 'Finance', 'Infrastructure', 'Complaint', 'Authority',
'Danger', 'Health', 'English']
df = pd.concat([pd.DataFrame({'Subject' : [list_type[i] for t in range(110)],
'Polarity' : [random.random() for t in range(110)],
'Subjectivity' : [random.random() for t in range(110)]}) for i in range(len(list_type))], axis=0)
My code for updating the plot looks like this:
options = []
options.append('All')
options.extend(df['Subject'].unique().tolist())
source = ColumnDataSource(df)
p = figure()
r = p.circle(x='Polarity', y='Subjectivity', source = source)
select = Select(title="Subject", options=options, value="All")
output_notebook()
def update_plot(attr, old, new):
if select.value=="All":
df_filter = df.copy()
else:
df_filter = df[df['Subject']==select.value]
source1 = ColumnDataSource(df_filter)
r.data_source.data = source1.data
select.on_change('value', update_plot)
layout = column(row(select, width=400), p)
#show(layout)
curdoc().add_root(layout)
I want to add a 'Pretext' that has a df.describe(), that can update with the plot through the select tool. I tried this by adding these codes but it displays nothing:
stats = PreText(text='', width=500)
t1 = select.value
def update_stats(df, t1):
stats.text = str(df[[t1, select.value+'_returns']].describe())
select.on_change('value', update_plot, update_stats)
layout = column(row(select, width=400), p, stats)
curdoc().add_root(layout)
show(layout)
Anyone know a solution? Thanks!
You don't need two separate function for that, you can just change your original function update_plot to add statement to change the text for PreText as stats.text = str(df_filter.describe()). The function will look as below -
def update_plot(attr, old, new):
if select.value=="All":
df_filter = df.copy()
else:
df_filter = df[df['Subject']==select.value]
source1 = ColumnDataSource(df_filter)
r.data_source.data = source1.data
stats.text = str(df_filter.describe())
Entire code
from bokeh.models.widgets import Select, PreText
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, curdoc
from bokeh.plotting import figure, show
import pandas as pd
import random
list_type = ['All', 'Compliment', 'Sport', 'Remaining', 'Finance', 'Infrastructure', 'Complaint', 'Authority',
'Danger', 'Health', 'English']
df = pd.concat([pd.DataFrame({'Subject' : [list_type[i] for t in range(110)],
'Polarity' : [random.random() for t in range(110)],
'Subjectivity' : [random.random() for t in range(110)]}) for i in range(len(list_type))], axis=0)
options = []
options.append('All')
options.extend(df['Subject'].unique().tolist())
source = ColumnDataSource(df)
p = figure()
r = p.circle(x='Polarity', y='Subjectivity', source = source)
select = Select(title="Subject", options=options, value="All")
#output_notebook()
stats = PreText(text=str(df.describe()), width=500)
def update_plot(attr, old, new):
if select.value=="All":
df_filter = df.copy()
else:
df_filter = df[df['Subject']==select.value]
source1 = ColumnDataSource(df_filter)
r.data_source.data = source1.data
stats.text = str(df_filter.describe())
select.on_change('value', update_plot)
layout = column(row(select, width=400), p, stats)
#show(layout)
curdoc().add_root(layout)

Categories