Multiselect/Selectbox doesn’t wait after first selection - Streamlit - python

All,
I have used multiselect successfully before, but when I try this specific example that I was trying as a POC, the behavior is very weird. Essentially, what I am trying to do is use multiselect to make the app wait for user input at an intermediate step. However, multiselect does not wait for me to select the inputs I want to select, as soon as I select one thing, it just runs and doesn’t even execute correctly. Can someone guide me as to what am I doing wrong ? I am on version 0.82.
I also tested the same using selectbox and am seeing the same behavior.
So, here is what I have:
import streamlit as st
import pandas as pd
def basic_skeleton() -> tuple:
"""Prepare the basic UI for the app"""
st.sidebar.title('User Inputs')
beta_expander = st.sidebar.beta_expander("Upload csv")
with beta_expander:
user_file_path = st.sidebar.file_uploader(
label='Random Data',
type='csv'
)
return user_file_path
def get_filtered_dataframe(df) -> pd.DataFrame:
columns_list = df.columns
with st.form(key='Selecting Columns'):
columns_to_aggregate = st.selectbox(
label='Select columns to summarize',
options=columns_list
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
df1 = df[columns_to_aggregate]
return df1
def main():
"""Central wrapper to control the UI"""
# add title
st.header('Streamlit Testing')
# add high level site inputs
user_file_path = basic_skeleton()
load = st.sidebar.button(label='Load Data')
if load:
df = pd.read_csv(user_file_path)
st.dataframe(df)
clean_df = get_filtered_dataframe(df)
run = st.button("Aggregate Selected columns")
if run:
result = clean_df.describe(include='all')
st.dataframe(result)
main()

A user on the streamlit community helped answer this question. I wanted to make sure, the answer was provided here so anybody who comes looking is also provided here:
import streamlit as st
import pandas as pd
def basic_skeleton() -> tuple:
"""Prepare the basic UI for the app"""
st.sidebar.title('User Inputs')
beta_expander = st.sidebar.beta_expander("Upload csv")
with beta_expander:
user_file_path = st.sidebar.file_uploader(
label='Random Data',
type='csv'
)
return user_file_path
def get_filtered_dataframe(df):
columns_list = df.columns
with st.form(key='Selecting Columns'):
columns_to_aggregate = st.multiselect(
label='Select columns to summarize',
options=columns_list
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
df1 = df[columns_to_aggregate]
return df1
def main():
"""Central wrapper to control the UI"""
# add title
st.header('Streamlit Testing')
# add high level site inputs
user_file_path = basic_skeleton()
if user_file_path:
load = st.sidebar.checkbox(label='Load Data')
if load:
df = pd.read_csv(user_file_path)
st.dataframe(df)
clean_df = get_filtered_dataframe(df)
if clean_df is not None:
result = clean_df.describe()
st.dataframe(result)
main()

Related

How do I compile and bring in multiple outputs from the same worker?

I'm developing a kubeflow pipeline that takes in a data set, splits that dataset into two different datasets based on a filter inside the code, and outputs both datasets. That function looks like the following:
def merge_promo_sales(input_data: Input[Dataset],
output_data_hd: OutputPath("Dataset"),
output_data_shop: OutputPath("Dataset")):
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)
import numpy as np
from google.cloud import bigquery
from utils import google_bucket
client = bigquery.Client("gcp-sc-demand-plan-analytics")
print("Client creating using default project: {}".format(client.project), "Pulling Data")
query = """
SELECT * FROM `gcp-sc-demand-plan-analytics.Modeling_Input.monthly_delivery_type_sales` a
Left Join `gcp-sc-demand-plan-analytics.Modeling_Input.monthly_promotion` b
on a.ship_base7 = b.item_no
and a.oper_cntry_id = b.corp_cd
and a.dmand_mo_yr = b.dates
"""
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
) # API request - starts the query
df = query_job.to_dataframe()
df.drop(['corp_cd', 'item_no', 'dates'], axis = 1, inplace=True)
df.loc[:, 'promo_objective_increase_margin':] = df.loc[:, 'promo_objective_increase_margin':].fillna(0)
items = df_['ship_base7'].unique()
df = df[df['ship_base7'].isin(items)]
df_hd = df[df['location_type'] == 'home_delivery']
df_shop = df[df['location_type'] != 'home_delivery']
df_hd.to_pickle(output_data_hd)
df_shop.to_pickle(output_data_shop)
That part works fine. When I try to feed those two data sets into the next function with the compiler, I hit errors.
I tried the following:
#kfp.v2.dsl.pipeline(name=PIPELINE_NAME)
def my_pipeline():
merge_promo_sales_nl = merge_promo_sales(input_data = new_launch.output)
rule_3_hd = rule_3(input_data = merge_promo_sales_nl.output_data_hd)
rule_3_shop = rule_3(input_data = merge_promo_sales_nl.output_data_shop)`
The error I get is the following:
AttributeError: 'ContainerOp' object has no attribute 'output_data_hd'
output_data_hd is the parameter I put that dataset out to but apparently it's not the name of parameter kubeflow is looking for.
I just figured this out.
When you run multiple outputs, you use the following in the compile section:
rule_3_hd = rule_3(input_data = merge_promo_sales_nl.outputs['output_data_hd'])
rule_3_shop = rule_3(input_data = merge_promo_sales_nl.outputs['output_data_shop'])

Parameterized FileSelector / FileInput read excel file

I am attempting to set up a panel app that includes a FileInput button. I can not manage to get the uploaded file read by pn.widgets.FileInput properly.
class try_upload(param.Parameterized):
fileselector = param.FileSelector()
file_input_df = param.DataFrame()
def __init__(self,**params):
super().__init__(**params)
self.fileselector_widget = pn.Param(self.param.fileselector),
self.file_input_df_widget = pn.Param(self.param.file_input_df),
self._layout = pn.Row(pn.Param(self,parameters=['fileselector','file_input_df']
),
pn.Param(widgets={'fileselector': pn.widgets.FileInput(accept='.xlsx,.xls',multiple=True)
}
),
self.file_input_df_widget,
width=1,
sizing_mode='fixed'
)
#pn.depends('fileselector',watch=True)
def _updateinput(self):
if self.fileselector is not None:
# decode = base64.b64decode(self.fileselector)
# df = pd.read_excel(io.StringIO(decode.decode('utf-8')))
# self.input_data = pd.read_excel(io.BytesIO(decode))
# self.file_input_df = df
self.file_input_df = self.fileselector
self.file_input_df_widget = self.file_input_df
#pn.depends('file_input_df',watch=True)
def _update_input_widget(self):
if self.file_input_df is not None:
self.file_input_df_widget = self.file_input_df
return pn.widgets.Tabulator(self.file_input_widget_df,width=100)
def __panel__(self):
return self._layout
try_upload_class = try_upload(name='upload')
pn.extension()
app = pn.Row(pn.Column(pn.Param(try_upload_class.param,
widgets = {'fileselector': pn.widgets.FileInput(accept='.xlsx,.xls',multiple=True)
}
)
),
pn.Column(try_upload_class._updateinput)
)
app
Uploading a file with the button gives:
ValueError: DataFrame parameter 'file_input_df' value must be an instance of DataFrame, not [b'PK\x03\x04\x14\x00\x06\x00\x08\x00\x00\x00!\x00b\xee\x9dh^\x01\x00\x00\x90\x04\x00\x00\x13\x00\x08\x02[Content_Types].xml \xa2\x04\x02(\xa0\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0
which is essentially a whatever Tornado sends back from the backend if I am not mistaken. Attempting to decode with base64 also fails and using [0] to get the list object out of a list does not work. Thanks for any help in advance.
Expected file to get input into param.DataFrame. Can't decode properly and various methods seem to fail.

Streamlit AgGrid, output table does not update values after being changed

I am building a table that updates the values of an output DF into a csv file (or whatever output defined).
I defined a generate_agrid(df) function that outputs a class that contains a data method that is a pd.DataFrame. When I run the code grid_table = generate_agrid(df), the grid_table generated contains the original df, even if I modify it in the UI. I noticed that when I checked the input that my update function received.
What I want is to:
Graph the data in df -> update DF data in the UI and return -> save new df data into a csv every time I press update button
Why does my generate_agrid method always returns the initial DF used as an input? How can i update it?
My code
import streamlit as st
from metrics.get_metrics import get_data
from metrics.config import PATH_SAMPLES
filename: str = 'updated_sample.csv'
save_path = PATH_SAMPLES.joinpath(filename)
def generate_agrid(data: pd.DataFrame):
gb = GridOptionsBuilder.from_dataframe(data)
gb.configure_default_column(editable=True) # Make columns editable
gb.configure_pagination(paginationAutoPageSize=True) # Add pagination
gb.configure_side_bar() # Add a sidebar
gb.configure_selection('multiple', use_checkbox=True,
groupSelectsChildren="Group checkbox select children") # Enable multi-row selection
gridOptions = gb.build()
grid_response = AgGrid(
data,
gridOptions=gridOptions,
data_return_mode=DataReturnMode.AS_INPUT,
update_on='MANUAL', # <- Should it let me update before returning?
fit_columns_on_grid_load=False,
theme=AgGridTheme.STREAMLIT, # Add theme color to the table
enable_enterprise_modules=True,
height=350,
width='100%',
reload_data=True
)
data = grid_response['data']
selected = grid_response['selected_rows']
df = pd.DataFrame(selected) # Pass the selected rows to a new dataframe df
return grid_response
def update(grid_table: classmethod, filename: str = 'updated_sample.csv'):
save_path = PATH_SAMPLES.joinpath(filename)
grid_table_df = pd.DataFrame(grid_table['data'])
grid_table_df.to_csv(save_path, index=False)
# First data gather
df = get_data()
if __name__ == '__main__':
# Start graphing
grid_table = generate_agrid(df)
# Update
st.sidebar.button("Update", on_click=update, args=[grid_table])
Found the issue, it was just a small parameter that was activated.
While instantiating the AgGrid, I had to eliminate the reload_data=True parameter. Doing that, everything worked as expected and the data could be successfully updated after manually inputting and pressing "update"
This is how AgGrid must be instantiated
grid_response = AgGrid(
data,
gridOptions=gridOptions,
data_return_mode=DataReturnMode.AS_INPUT,
update_on='MANUAL',
fit_columns_on_grid_load=False,
theme=AgGridTheme.STREAMLIT, # Add theme color to the table
enable_enterprise_modules=True,
height=350,
width='100%',
)

Python Dash - How to Download pandas dataframe (csv) used in the output chart

I am building a web app where I get user input -> connect to api -> clean data and generate csv -> plot the csv using Dash.
I am able to update the graph without a problem, but I am not sure how to implement a download function for the dataframe that is used for the graph. Does it require 2 def () (one for the graph, other for the download button) functions after the #app.callback()? Or can this be done by wrapping the download function within the graph function?
Any pointers would be appreciated.
#app.callback(
Output('dash_graph', 'figure'),
[Input('button', 'n_clicks'),
State('ticker_input', 'value'),
State('start_date_input', 'date'),
State('end_date_input', 'date'),
State('entry_price_input', 'value'),
State('spread_input', 'value')
]
)
def update_result(n_clicks, ticker_input, start_date_input, end_date_input, entry_price_input, spread_input):
if n_clicks is None:
return dash.no_update
else:
API_TOKEN = "demo"
url = *some url*
response = urllib.request.urlopen(url)
eod_data = json.loads(response.read())
eod_data_df = pd.json_normalize(eod_data)
full_list = prep_df(eod_data_df)[0]
date_list = prep_df(eod_data_df)[1]
eod_data_df = prep_graph(start_date_input, end_date_input, full_list, date_list, entry_price_input, entry_price_input, spread_input)
df = eod_data_df.copy()
* todo: download this df here *
chart = px.bar(
data_frame=df,
x='date',
y='count',
title='Time Series Chart' + '<br>' + '<span style="font-size: 10px;">{} | {} to {} | ${:.2f} to ${:.2f} | Spread ${}</span>'.format(ticker_input, start_date_input, end_date_input, entry_price_input, entry_price_input+spread_input, spread_input),
# title=url
)
return (chart)
#-------------------------------------------------------------------------------
if __name__ == '__main__':
app.run_server(debug=True, dev_tools_ui=False)
Not sure how your app looks, I usually leave a button reserved for downloading data.
Do you want to have a separate button to do it? Or should it be triggered with the button that you already have ('button')?
In any case, you will need to add a dcc.Download(id="download-dataframe") to your layout, and then you callback should have an output Output(component_id="download-dataframe", component_property='data'), then your callback should create the csv file and return send_file('your_file.csv')
Whether you want it in the same button or a different one will mean if you need to create a new callback or not.

How to append DataFrame data when using Streamlit

Streamlit with Dataframe add row into the table action and also download as csv by Button with still showing the dataframe on screen.
OP1: Using session_state (Best way to manage and applies for more)
import pandas as pd
import streamlit as st
# "st.session_state object:", st.session_state
if "df_result" not in st.session_state:
st.session_state['df_result'] = pd.DataFrame(columns=['h1','h2'])
# st.write(st.session_state)
def onAddRow():
data = {
'h1':"something",
'h2':"something",
}
st.session_state['df_result'] = st.session_state['df_result'].append(data, ignore_index=True)
st.button("Add row", on_click = onAddRow)
#st.cache
def convert_df(df):
return df.to_csv().encode('utf-8')
st.download_button(
"Press to Download",
convert_df(st.session_state.df_result),
"file.csv",
"text/csv",
key='download-csv'
)
st.dataframe(st.session_state['df_result'])
OP2: Basic append on global parameters but it will reset view when take action download btn because it rerun.
import pandas as pd
import streamlit as st
df_log = pd.DataFrame(columns=['h1','h2'])
df_log_process = st.dataframe(df_log)
for x in range(10):
df_log = df_log.append(
{
'h1':"something",
'h2':"something",
}, ignore_index=True)
df_log_process = df_log_process.dataframe(df_log)

Categories