Streamlit with Dataframe add row into the table action and also download as csv by Button with still showing the dataframe on screen.
OP1: Using session_state (Best way to manage and applies for more)
import pandas as pd
import streamlit as st
# "st.session_state object:", st.session_state
if "df_result" not in st.session_state:
st.session_state['df_result'] = pd.DataFrame(columns=['h1','h2'])
# st.write(st.session_state)
def onAddRow():
data = {
'h1':"something",
'h2':"something",
}
st.session_state['df_result'] = st.session_state['df_result'].append(data, ignore_index=True)
st.button("Add row", on_click = onAddRow)
#st.cache
def convert_df(df):
return df.to_csv().encode('utf-8')
st.download_button(
"Press to Download",
convert_df(st.session_state.df_result),
"file.csv",
"text/csv",
key='download-csv'
)
st.dataframe(st.session_state['df_result'])
OP2: Basic append on global parameters but it will reset view when take action download btn because it rerun.
import pandas as pd
import streamlit as st
df_log = pd.DataFrame(columns=['h1','h2'])
df_log_process = st.dataframe(df_log)
for x in range(10):
df_log = df_log.append(
{
'h1':"something",
'h2':"something",
}, ignore_index=True)
df_log_process = df_log_process.dataframe(df_log)
Related
I'm pulling data from the NHL API for player stats based on individual games. I'm trying to make a loop that calls the data, parses the JSON, creates a dict which I then can create a data frame from for an entire team. The code before my looping looks like this:
API_URL = "https://statsapi.web.nhl.com/api/v1"
response = requests.get(API_URL + "/people/8477956/stats?stats=gameLog", params={"Content-Type": "application/json"})
data = json.loads(response.text)
df_list_dict = []
for game in data['stats'][0]['splits']:
curr_dict = game['stat']
curr_dict['date'] = game['date']
curr_dict['isHome'] = game['isHome']
curr_dict['isWin'] = game['isWin']
curr_dict['isOT'] = game['isOT']
curr_dict['team'] = game['team']['name']
curr_dict['opponent'] = game['opponent']['name']
df_list_dict.append(curr_dict)
df = pd.DataFrame.from_dict(df_list_dict)
print(df)
This gives me a digestible data frame for a single player. (/people/{player}/....
I want to iterate through a list (the list being an NHL team), while adding a column that identifies the player and concatenates the created data frames. My attempt thus far looks like this:
import requests
import json
import pandas as pd
Rangers = ['8478550', '8476459', '8479323', '8476389', '8475184', '8480817', '8480078', '8476624', '8481554', '8482109', '8476918', '8476885', '8479324',
'8482073', '8479328', '8480833', '8478104', '8477846', '8477380', '8477380', '8477433', '8479333', '8479991']
def callapi(player):
response = (requests.get(f'https://statsapi.web.nhl.com/api/v1/people/{player}/stats?stats=gameLog', params={"Content-Type": "application/json"}))
data = json.loads(response.text)
df_list_dict = []
for game in data['stats'][0]['splits']:
curr_dict = game['stat']
curr_dict['date'] = game['date']
curr_dict['isHome'] = game['isHome']
curr_dict['isWin'] = game['isWin']
curr_dict['isOT'] = game['isOT']
curr_dict['team'] = game['team']['name']
curr_dict['opponent'] = game['opponent']['name']
df_list_dict.append(curr_dict)
df = pd.DataFrame.from_dict(df_list_dict)
print(df)
for player in Rangers:
callapi(player)
print(callapi)
When this is printed I can see all the data frames that were created. I cannot use curr_dict[] to add a column based on the list position (the player ID) because must be a slice or integer, not string.
What I'm hoping to do is make this one data frame in which the stats are identified by a player id column.
My python knowledge is very scattered, I feel as if with the progress I've made I should know how to complete this but I've simply hit a wall. Any help would be appreciated.
You can use concurrent.futures to parallelize the requests before concatenating them all together, and json_normalize to parse the json.
import concurrent.futures
import json
import os
import pandas as pd
import requests
class Scrape:
def main(self) -> pd.DataFrame:
rangers = ["8478550", "8476459", "8479323", "8476389", "8475184", "8480817", "8480078",
"8476624", "8481554", "8482109", "8476918", "8476885", "8479324", "8482073",
"8479328", "8480833", "8478104", "8477846", "8477380", "8477380", "8477433",
"8479333", "8479991"]
with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
return pd.concat(executor.map(self.get_stats, rangers)).reset_index(drop=True).fillna(0)
#staticmethod
def get_stats(player: str) -> pd.DataFrame:
url = f"https://statsapi.web.nhl.com/api/v1/people/{player}/stats?stats=gameLog"
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
data = json.loads(response.text)
df = (pd.
json_normalize(data=data, record_path=["stats", "splits"])
.rename(columns={"team.id": "team_id", "team.name": "team_name",
"opponent.id": "opponent_id", "opponent.name": "opponent_name"})
).assign(player_id=player)
df = df[df.columns.drop(list(df.filter(regex="link|gamePk")))]
df.columns = df.columns.str.split(".").str[-1]
if "faceOffPct" not in df.columns:
df["faceOffPct"] = 0
return df
if __name__ == "__main__":
stats = Scrape().main()
print(stats)
I am building a table that updates the values of an output DF into a csv file (or whatever output defined).
I defined a generate_agrid(df) function that outputs a class that contains a data method that is a pd.DataFrame. When I run the code grid_table = generate_agrid(df), the grid_table generated contains the original df, even if I modify it in the UI. I noticed that when I checked the input that my update function received.
What I want is to:
Graph the data in df -> update DF data in the UI and return -> save new df data into a csv every time I press update button
Why does my generate_agrid method always returns the initial DF used as an input? How can i update it?
My code
import streamlit as st
from metrics.get_metrics import get_data
from metrics.config import PATH_SAMPLES
filename: str = 'updated_sample.csv'
save_path = PATH_SAMPLES.joinpath(filename)
def generate_agrid(data: pd.DataFrame):
gb = GridOptionsBuilder.from_dataframe(data)
gb.configure_default_column(editable=True) # Make columns editable
gb.configure_pagination(paginationAutoPageSize=True) # Add pagination
gb.configure_side_bar() # Add a sidebar
gb.configure_selection('multiple', use_checkbox=True,
groupSelectsChildren="Group checkbox select children") # Enable multi-row selection
gridOptions = gb.build()
grid_response = AgGrid(
data,
gridOptions=gridOptions,
data_return_mode=DataReturnMode.AS_INPUT,
update_on='MANUAL', # <- Should it let me update before returning?
fit_columns_on_grid_load=False,
theme=AgGridTheme.STREAMLIT, # Add theme color to the table
enable_enterprise_modules=True,
height=350,
width='100%',
reload_data=True
)
data = grid_response['data']
selected = grid_response['selected_rows']
df = pd.DataFrame(selected) # Pass the selected rows to a new dataframe df
return grid_response
def update(grid_table: classmethod, filename: str = 'updated_sample.csv'):
save_path = PATH_SAMPLES.joinpath(filename)
grid_table_df = pd.DataFrame(grid_table['data'])
grid_table_df.to_csv(save_path, index=False)
# First data gather
df = get_data()
if __name__ == '__main__':
# Start graphing
grid_table = generate_agrid(df)
# Update
st.sidebar.button("Update", on_click=update, args=[grid_table])
Found the issue, it was just a small parameter that was activated.
While instantiating the AgGrid, I had to eliminate the reload_data=True parameter. Doing that, everything worked as expected and the data could be successfully updated after manually inputting and pressing "update"
This is how AgGrid must be instantiated
grid_response = AgGrid(
data,
gridOptions=gridOptions,
data_return_mode=DataReturnMode.AS_INPUT,
update_on='MANUAL',
fit_columns_on_grid_load=False,
theme=AgGridTheme.STREAMLIT, # Add theme color to the table
enable_enterprise_modules=True,
height=350,
width='100%',
)
When I click on add row button Iam able to add new row but how to add data to that new row, Iam using data frames here ?
Please find the executable code
import qgrid,ipysheet
from ipywidgets import widgets
import numpy as np
import pandas as pd
#qgrid.show_grid(df,show_toolbar=True)
random_data = [['ABC','DEF','GHI'],
[0.69494459,0.90502388,0.13374771],
[0.89609645,0.66166011,0.12123602],
[0.51229257,0.57240808,0.25482961],
[0.25478973,0.08438657,0.76977642]]
df = pd.DataFrame(random_data, columns=["Col1", "Col2", "Col3"])
sheet = ipysheet.from_dataframe(df)
btn = wg.Button(description = 'Add Row')
def on_bttn_clicked(b):
sheet.rows += 1
btn.on_click(on_bttn_clicked)
display(btn,sheet)
All,
I have used multiselect successfully before, but when I try this specific example that I was trying as a POC, the behavior is very weird. Essentially, what I am trying to do is use multiselect to make the app wait for user input at an intermediate step. However, multiselect does not wait for me to select the inputs I want to select, as soon as I select one thing, it just runs and doesn’t even execute correctly. Can someone guide me as to what am I doing wrong ? I am on version 0.82.
I also tested the same using selectbox and am seeing the same behavior.
So, here is what I have:
import streamlit as st
import pandas as pd
def basic_skeleton() -> tuple:
"""Prepare the basic UI for the app"""
st.sidebar.title('User Inputs')
beta_expander = st.sidebar.beta_expander("Upload csv")
with beta_expander:
user_file_path = st.sidebar.file_uploader(
label='Random Data',
type='csv'
)
return user_file_path
def get_filtered_dataframe(df) -> pd.DataFrame:
columns_list = df.columns
with st.form(key='Selecting Columns'):
columns_to_aggregate = st.selectbox(
label='Select columns to summarize',
options=columns_list
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
df1 = df[columns_to_aggregate]
return df1
def main():
"""Central wrapper to control the UI"""
# add title
st.header('Streamlit Testing')
# add high level site inputs
user_file_path = basic_skeleton()
load = st.sidebar.button(label='Load Data')
if load:
df = pd.read_csv(user_file_path)
st.dataframe(df)
clean_df = get_filtered_dataframe(df)
run = st.button("Aggregate Selected columns")
if run:
result = clean_df.describe(include='all')
st.dataframe(result)
main()
A user on the streamlit community helped answer this question. I wanted to make sure, the answer was provided here so anybody who comes looking is also provided here:
import streamlit as st
import pandas as pd
def basic_skeleton() -> tuple:
"""Prepare the basic UI for the app"""
st.sidebar.title('User Inputs')
beta_expander = st.sidebar.beta_expander("Upload csv")
with beta_expander:
user_file_path = st.sidebar.file_uploader(
label='Random Data',
type='csv'
)
return user_file_path
def get_filtered_dataframe(df):
columns_list = df.columns
with st.form(key='Selecting Columns'):
columns_to_aggregate = st.multiselect(
label='Select columns to summarize',
options=columns_list
)
submit_button = st.form_submit_button(label='Submit')
if submit_button:
df1 = df[columns_to_aggregate]
return df1
def main():
"""Central wrapper to control the UI"""
# add title
st.header('Streamlit Testing')
# add high level site inputs
user_file_path = basic_skeleton()
if user_file_path:
load = st.sidebar.checkbox(label='Load Data')
if load:
df = pd.read_csv(user_file_path)
st.dataframe(df)
clean_df = get_filtered_dataframe(df)
if clean_df is not None:
result = clean_df.describe()
st.dataframe(result)
main()
I am trying to create a python web application to show a time series graph with gold price distribution. I have added app layout, app calls back, and update_graph function as well. after that, I tried to take a copy of my data frame and save it to a new data frame as 'dff' but it is throwing an error. Also at the end of the code, I have put 'return fig' and it is throwing an error as well. I am fairly new to python and need help to figure out what's wrong with my code. Below is the entire code.
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output
from urllib.request import urlopen, Request
url = "http://goldpricez.com/gold/history/lkr/years-3"
req = Request(url=url)
html = urlopen(req).read()
df = pd.read_html(url) # this will give you a list of dataframes from html
df1 = df[3]
first_val = df1.iloc[0][0]
date = df1[0]
price = df1[1]
data = [df1[0],df1[1]]
headers = ["Date", "Price"]
df3 = pd.concat(data, axis=1, keys=headers)
from datetime import datetime
df3['Date'] = df3['Date'].apply(lambda x: datetime.strptime(x, '%d-%m-%Y'))
df3['Year'] = pd.DatetimeIndex(df3['Date']).year
df3['Month'] = pd.DatetimeIndex(df3['Date']).month
df3['Day'] = pd.DatetimeIndex(df3['Date']).day
df3['WDay'] = df3['Date'].dt.dayofweek
df3['WeekDayName'] = pd.DatetimeIndex(df3['Date']).day_name()
print(df3['WDay'])
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df3.to_excel(writer, sheet_name='Sheet1')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Gold Price Analyst" , style={'text-align' : 'center'}),
dcc.Graph(id='this_year_graph', figure={})
])
#app.callback(
[Output(component_id='this_year_graph', component_property='figure')]
)
def update_graph():
dff = df3.copy()
dff = [df4['Date'],df4['Price']]
fig = px.line(dff , x=dff['Date'], y=dff['Price'])
return fig
if __name__ =='__main__' :
app.run_server(debug=True)
def update_graph():
dff = df3.copy()
dff = [df4['Date'],df4['Price']]
fig = px.line(dff , x=dff['Date'], y=dff['Price'])
return fig