All,
I have used multiselect successfully before, but when I try this specific example that I was trying as a POC, the behavior is very weird. Essentially, what I am trying to do is use multiselect to make the app wait for user input at an intermediate step. However, multiselect does not wait for me to select the inputs I want to select, as soon as I select one thing, it just runs and doesn’t even execute correctly. Can someone guide me as to what am I doing wrong ? I am on version 0.82.
I also tested the same using selectbox and am seeing the same behavior.
So, here is what I have:
import streamlit as st
import pandas as pd
def basic_skeleton() -> tuple:
"""Prepare the basic UI for the app"""
st.sidebar.title('User Inputs')
beta_expander = st.sidebar.beta_expander("Upload csv")
with beta_expander:
user_file_path = st.sidebar.file_uploader(
label='Random Data',
type='csv'
)
return user_file_path
def get_filtered_dataframe(df) -> pd.DataFrame:
columns_list = df.columns
with st.form(key='Selecting Columns'):
columns_to_aggregate = st.selectbox(
label='Select columns to summarize',
options=columns_list
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
df1 = df[columns_to_aggregate]
return df1
def main():
"""Central wrapper to control the UI"""
# add title
st.header('Streamlit Testing')
# add high level site inputs
user_file_path = basic_skeleton()
load = st.sidebar.button(label='Load Data')
if load:
df = pd.read_csv(user_file_path)
st.dataframe(df)
clean_df = get_filtered_dataframe(df)
run = st.button("Aggregate Selected columns")
if run:
result = clean_df.describe(include='all')
st.dataframe(result)
main()
A user on the streamlit community helped answer this question. I wanted to make sure, the answer was provided here so anybody who comes looking is also provided here:
import streamlit as st
import pandas as pd
def basic_skeleton() -> tuple:
"""Prepare the basic UI for the app"""
st.sidebar.title('User Inputs')
beta_expander = st.sidebar.beta_expander("Upload csv")
with beta_expander:
user_file_path = st.sidebar.file_uploader(
label='Random Data',
type='csv'
)
return user_file_path
def get_filtered_dataframe(df):
columns_list = df.columns
with st.form(key='Selecting Columns'):
columns_to_aggregate = st.multiselect(
label='Select columns to summarize',
options=columns_list
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
df1 = df[columns_to_aggregate]
return df1
def main():
"""Central wrapper to control the UI"""
# add title
st.header('Streamlit Testing')
# add high level site inputs
user_file_path = basic_skeleton()
if user_file_path:
load = st.sidebar.checkbox(label='Load Data')
if load:
df = pd.read_csv(user_file_path)
st.dataframe(df)
clean_df = get_filtered_dataframe(df)
if clean_df is not None:
result = clean_df.describe()
st.dataframe(result)
main()
Related
I'm developing a kubeflow pipeline that takes in a data set, splits that dataset into two different datasets based on a filter inside the code, and outputs both datasets. That function looks like the following:
def merge_promo_sales(input_data: Input[Dataset],
output_data_hd: OutputPath("Dataset"),
output_data_shop: OutputPath("Dataset")):
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)
import numpy as np
from google.cloud import bigquery
from utils import google_bucket
client = bigquery.Client("gcp-sc-demand-plan-analytics")
print("Client creating using default project: {}".format(client.project), "Pulling Data")
query = """
SELECT * FROM `gcp-sc-demand-plan-analytics.Modeling_Input.monthly_delivery_type_sales` a
Left Join `gcp-sc-demand-plan-analytics.Modeling_Input.monthly_promotion` b
on a.ship_base7 = b.item_no
and a.oper_cntry_id = b.corp_cd
and a.dmand_mo_yr = b.dates
"""
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
) # API request - starts the query
df = query_job.to_dataframe()
df.drop(['corp_cd', 'item_no', 'dates'], axis = 1, inplace=True)
df.loc[:, 'promo_objective_increase_margin':] = df.loc[:, 'promo_objective_increase_margin':].fillna(0)
items = df_['ship_base7'].unique()
df = df[df['ship_base7'].isin(items)]
df_hd = df[df['location_type'] == 'home_delivery']
df_shop = df[df['location_type'] != 'home_delivery']
df_hd.to_pickle(output_data_hd)
df_shop.to_pickle(output_data_shop)
That part works fine. When I try to feed those two data sets into the next function with the compiler, I hit errors.
I tried the following:
#kfp.v2.dsl.pipeline(name=PIPELINE_NAME)
def my_pipeline():
merge_promo_sales_nl = merge_promo_sales(input_data = new_launch.output)
rule_3_hd = rule_3(input_data = merge_promo_sales_nl.output_data_hd)
rule_3_shop = rule_3(input_data = merge_promo_sales_nl.output_data_shop)`
The error I get is the following:
AttributeError: 'ContainerOp' object has no attribute 'output_data_hd'
output_data_hd is the parameter I put that dataset out to but apparently it's not the name of parameter kubeflow is looking for.
I just figured this out.
When you run multiple outputs, you use the following in the compile section:
rule_3_hd = rule_3(input_data = merge_promo_sales_nl.outputs['output_data_hd'])
rule_3_shop = rule_3(input_data = merge_promo_sales_nl.outputs['output_data_shop'])
I am attempting to set up a panel app that includes a FileInput button. I can not manage to get the uploaded file read by pn.widgets.FileInput properly.
class try_upload(param.Parameterized):
fileselector = param.FileSelector()
file_input_df = param.DataFrame()
def __init__(self,**params):
super().__init__(**params)
self.fileselector_widget = pn.Param(self.param.fileselector),
self.file_input_df_widget = pn.Param(self.param.file_input_df),
self._layout = pn.Row(pn.Param(self,parameters=['fileselector','file_input_df']
),
pn.Param(widgets={'fileselector': pn.widgets.FileInput(accept='.xlsx,.xls',multiple=True)
}
),
self.file_input_df_widget,
width=1,
sizing_mode='fixed'
)
#pn.depends('fileselector',watch=True)
def _updateinput(self):
if self.fileselector is not None:
# decode = base64.b64decode(self.fileselector)
# df = pd.read_excel(io.StringIO(decode.decode('utf-8')))
# self.input_data = pd.read_excel(io.BytesIO(decode))
# self.file_input_df = df
self.file_input_df = self.fileselector
self.file_input_df_widget = self.file_input_df
#pn.depends('file_input_df',watch=True)
def _update_input_widget(self):
if self.file_input_df is not None:
self.file_input_df_widget = self.file_input_df
return pn.widgets.Tabulator(self.file_input_widget_df,width=100)
def __panel__(self):
return self._layout
try_upload_class = try_upload(name='upload')
pn.extension()
app = pn.Row(pn.Column(pn.Param(try_upload_class.param,
widgets = {'fileselector': pn.widgets.FileInput(accept='.xlsx,.xls',multiple=True)
}
)
),
pn.Column(try_upload_class._updateinput)
)
app
Uploading a file with the button gives:
ValueError: DataFrame parameter 'file_input_df' value must be an instance of DataFrame, not [b'PK\x03\x04\x14\x00\x06\x00\x08\x00\x00\x00!\x00b\xee\x9dh^\x01\x00\x00\x90\x04\x00\x00\x13\x00\x08\x02[Content_Types].xml \xa2\x04\x02(\xa0\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0
which is essentially a whatever Tornado sends back from the backend if I am not mistaken. Attempting to decode with base64 also fails and using [0] to get the list object out of a list does not work. Thanks for any help in advance.
Expected file to get input into param.DataFrame. Can't decode properly and various methods seem to fail.
I am building a table that updates the values of an output DF into a csv file (or whatever output defined).
I defined a generate_agrid(df) function that outputs a class that contains a data method that is a pd.DataFrame. When I run the code grid_table = generate_agrid(df), the grid_table generated contains the original df, even if I modify it in the UI. I noticed that when I checked the input that my update function received.
What I want is to:
Graph the data in df -> update DF data in the UI and return -> save new df data into a csv every time I press update button
Why does my generate_agrid method always returns the initial DF used as an input? How can i update it?
My code
import streamlit as st
from metrics.get_metrics import get_data
from metrics.config import PATH_SAMPLES
filename: str = 'updated_sample.csv'
save_path = PATH_SAMPLES.joinpath(filename)
def generate_agrid(data: pd.DataFrame):
gb = GridOptionsBuilder.from_dataframe(data)
gb.configure_default_column(editable=True) # Make columns editable
gb.configure_pagination(paginationAutoPageSize=True) # Add pagination
gb.configure_side_bar() # Add a sidebar
gb.configure_selection('multiple', use_checkbox=True,
groupSelectsChildren="Group checkbox select children") # Enable multi-row selection
gridOptions = gb.build()
grid_response = AgGrid(
data,
gridOptions=gridOptions,
data_return_mode=DataReturnMode.AS_INPUT,
update_on='MANUAL', # <- Should it let me update before returning?
fit_columns_on_grid_load=False,
theme=AgGridTheme.STREAMLIT, # Add theme color to the table
enable_enterprise_modules=True,
height=350,
width='100%',
reload_data=True
)
data = grid_response['data']
selected = grid_response['selected_rows']
df = pd.DataFrame(selected) # Pass the selected rows to a new dataframe df
return grid_response
def update(grid_table: classmethod, filename: str = 'updated_sample.csv'):
save_path = PATH_SAMPLES.joinpath(filename)
grid_table_df = pd.DataFrame(grid_table['data'])
grid_table_df.to_csv(save_path, index=False)
# First data gather
df = get_data()
if __name__ == '__main__':
# Start graphing
grid_table = generate_agrid(df)
# Update
st.sidebar.button("Update", on_click=update, args=[grid_table])
Found the issue, it was just a small parameter that was activated.
While instantiating the AgGrid, I had to eliminate the reload_data=True parameter. Doing that, everything worked as expected and the data could be successfully updated after manually inputting and pressing "update"
This is how AgGrid must be instantiated
grid_response = AgGrid(
data,
gridOptions=gridOptions,
data_return_mode=DataReturnMode.AS_INPUT,
update_on='MANUAL',
fit_columns_on_grid_load=False,
theme=AgGridTheme.STREAMLIT, # Add theme color to the table
enable_enterprise_modules=True,
height=350,
width='100%',
)
I am building a web app where I get user input -> connect to api -> clean data and generate csv -> plot the csv using Dash.
I am able to update the graph without a problem, but I am not sure how to implement a download function for the dataframe that is used for the graph. Does it require 2 def () (one for the graph, other for the download button) functions after the #app.callback()? Or can this be done by wrapping the download function within the graph function?
Any pointers would be appreciated.
#app.callback(
Output('dash_graph', 'figure'),
[Input('button', 'n_clicks'),
State('ticker_input', 'value'),
State('start_date_input', 'date'),
State('end_date_input', 'date'),
State('entry_price_input', 'value'),
State('spread_input', 'value')
]
)
def update_result(n_clicks, ticker_input, start_date_input, end_date_input, entry_price_input, spread_input):
if n_clicks is None:
return dash.no_update
else:
API_TOKEN = "demo"
url = *some url*
response = urllib.request.urlopen(url)
eod_data = json.loads(response.read())
eod_data_df = pd.json_normalize(eod_data)
full_list = prep_df(eod_data_df)[0]
date_list = prep_df(eod_data_df)[1]
eod_data_df = prep_graph(start_date_input, end_date_input, full_list, date_list, entry_price_input, entry_price_input, spread_input)
df = eod_data_df.copy()
* todo: download this df here *
chart = px.bar(
data_frame=df,
x='date',
y='count',
title='Time Series Chart' + '<br>' + '<span style="font-size: 10px;">{} | {} to {} | ${:.2f} to ${:.2f} | Spread ${}</span>'.format(ticker_input, start_date_input, end_date_input, entry_price_input, entry_price_input+spread_input, spread_input),
# title=url
)
return (chart)
#-------------------------------------------------------------------------------
if __name__ == '__main__':
app.run_server(debug=True, dev_tools_ui=False)
Not sure how your app looks, I usually leave a button reserved for downloading data.
Do you want to have a separate button to do it? Or should it be triggered with the button that you already have ('button')?
In any case, you will need to add a dcc.Download(id="download-dataframe") to your layout, and then you callback should have an output Output(component_id="download-dataframe", component_property='data'), then your callback should create the csv file and return send_file('your_file.csv')
Whether you want it in the same button or a different one will mean if you need to create a new callback or not.
Streamlit with Dataframe add row into the table action and also download as csv by Button with still showing the dataframe on screen.
OP1: Using session_state (Best way to manage and applies for more)
import pandas as pd
import streamlit as st
# "st.session_state object:", st.session_state
if "df_result" not in st.session_state:
st.session_state['df_result'] = pd.DataFrame(columns=['h1','h2'])
# st.write(st.session_state)
def onAddRow():
data = {
'h1':"something",
'h2':"something",
}
st.session_state['df_result'] = st.session_state['df_result'].append(data, ignore_index=True)
st.button("Add row", on_click = onAddRow)
#st.cache
def convert_df(df):
return df.to_csv().encode('utf-8')
st.download_button(
"Press to Download",
convert_df(st.session_state.df_result),
"file.csv",
"text/csv",
key='download-csv'
)
st.dataframe(st.session_state['df_result'])
OP2: Basic append on global parameters but it will reset view when take action download btn because it rerun.
import pandas as pd
import streamlit as st
df_log = pd.DataFrame(columns=['h1','h2'])
df_log_process = st.dataframe(df_log)
for x in range(10):
df_log = df_log.append(
{
'h1':"something",
'h2':"something",
}, ignore_index=True)
df_log_process = df_log_process.dataframe(df_log)