I am New in pandas and streamlit , What I am trying is to filter such a dataframe using streamlit selectbox
but unfortunately everything is going well except that when changing the filter value it does not reflect on the shown table
as you could see the name in the filter does not update the table
here is the code I have used:
import xlrd
import pandas as pd
import os
from datetime import datetime
import streamlit as st
# import plotly_express as px
# to refer to the file
# change the current directory
currentDir = os.chdir('C:\\Users\\user\\Desktop\\lists');
files=os.listdir(currentDir)
columns=['Name','status','memorize-from-surah','memorize-from-ayah','memorize-to-surah','memorize-to-ayah','memorization-grade','words-meaning-grade','revision-from-surah','revision-from-ayah','revision-to-surah','revision-to-ayah','revision-grade']
folderDF=pd.DataFrame()
for file in files:
# get the file name without extension for the sheikh name
sheikh=os.path.splitext(file)[0]
sheetDF=pd.DataFrame()
workbook = pd.ExcelFile(f'C:\\users\\user\\Desktop\\lists\\{file}')
sheets_numbers = len(workbook.sheet_names)
print(sheets_numbers)
for i in range(1, sheets_numbers-1):
# print(workbook.sheet_by_index(i).name)
current_sheet = pd.read_excel(file,sheet_name=i,header=None,index_col=1)
date= current_sheet.iloc[6, 10]
# for j in range(7,current_sheet.nrows):
# if current_sheet.cell(j,3).value=="غاب" or current_sheet.cell(j,3).value=="عذر":
# for k in range(4,current_sheet.ncols):
# current_sheet.cell(j,k).value=""
sheets=pd.read_excel(file,sheet_name=i,skiprows=11,header=None,index_col=1)
# df = pd.DataFrame(sheets.iloc[:,1:], index=index)
#remove the first col
df=pd.DataFrame(sheets.iloc[:,1:])
#remove empty rows
df=df[df.iloc[:,0].notna()]
#rename the columns
df.columns = columns
#get the nrows
nrows= len(df.index)
sheikhCol=pd.Series(nrows*sheikh)
dateCol=pd.Series(nrows*date)
halkaCol=pd.Series(nrows*i)
# df.insert(1,"sheikh",sheikhCol)
df.insert(1,"halka",halkaCol)
df.insert(2,"sheikh",sheikhCol)
df.insert(3,"date",dateCol)
df["sheikh"]=sheikh
df['date']=date
df['halka']=i
if i == 1:
sheetDF=pd.DataFrame(df)
datatoexcel = pd.ExcelWriter('C:\\users\\user\\Desktop\\dataOut.xlsx')
sheetDF.to_excel(datatoexcel)
datatoexcel.save()
else:
sheetDF = pd.concat([sheetDF, df], axis=0)
folderDF=pd.concat([folderDF,sheetDF],axis=0)
datatoexcel=pd.ExcelWriter('C:\\users\\user\\Desktop\\dataOut.xlsx')
folderDF.to_excel(datatoexcel)
datatoexcel.save()
#
# setting up the streamlit page
st.set_page_config(page_title='makraa reports',layout='wide')
# make filteration
#
st.sidebar.header("make filtration criteria")
nameFilter= folderDF['Name'].unique()
halkaFilter= folderDF['halka'].unique()
sheikhFilter= folderDF['sheikh'].unique()
student_choice= st.sidebar.selectbox("select the student Name",nameFilter)
halka_choice= st.sidebar.selectbox("select the halka Number",halkaFilter)
sheikh_choice= st.sidebar.selectbox("select the sheikh Number",sheikhFilter)
# student_choice2= st.sidebar.multiselect("select the student Name",options=nameFilter,default=nameFilter)
# filteredDf=folderDF[folderDF["Name"]== student_choice]
filteredDf = folderDF[(folderDF["Name"] == student_choice) & (folderDF["halka"] == halka_choice)]
# filteredDf=folderDF.query('Name==#student_choice')
st.write(filteredDf)
note st.dataframe(filteredDf) does not make any difference
the streamlit version I used is 0.75 , since the recent version gave me the StreamlitAPIException like that enter link description here
could you give a hand in this
Here is a sample code with example data.
Code
import streamlit as st
import pandas as pd
data = {
'Name': ['a', 'b', 'c'],
'halka': [1, 2, 3]
}
st.set_page_config(page_title='makraa reports',layout='wide')
folderDF = pd.DataFrame(data)
# make filteration
#
st.sidebar.header("make filtration criteria")
nameFilter = folderDF['Name'].unique()
halkaFilter = folderDF['halka'].unique()
# sheikhFilter = folderDF['sheikh'].unique()
student_choice = st.sidebar.selectbox("select the student Name", nameFilter)
halka_choice = st.sidebar.selectbox("select the halka Number", halkaFilter)
# sheikh_choice= st.sidebar.selectbox("select the sheikh Number",sheikhFilter)
# student_choice2= st.sidebar.multiselect("select the student Name",options=nameFilter,default=nameFilter)
filteredDf = folderDF[(folderDF["Name"] == student_choice) & (folderDF["halka"] == halka_choice)]
# filteredDf = filteredDf[filteredDf["halka"] == halkaFilter]
st.write(filteredDf)
Output
Related
I have tried doing this:
import pandas as pd
import streamlit as st
import plotly.express as px
from PIL import Image
st.set_page_config(page_title=' Sales Report ')
st.header('Sales Report')
st.subheader('Results')
df=pd.read_csv('data')
st.dataframe(df)
I got the table showing up on Website, but how can I add that filter checkbox?
Explanation in the code
import streamlit as st
import pandas as pd
import numpy as np
np.random.seed(0)
# toy data
def get_data():
df = pd.DataFrame(np.random.randn(10, 3), columns=["Category", "A","B"])
df["Category"] = np.random.choice(['Apple', 'Banana', 'Grapes'], 10)
return df
df = get_data()
st.subheader("Filtered Dataframe")
st.sidebar.write('Select Filter')
cat_list = df.Category.unique()
val = [None]* len(cat_list) # this list will store info about which category is selected
for i, cat in enumerate(cat_list):
# create a checkbox for each category
val[i] = st.sidebar.checkbox(cat, value=True) # value is the preselect value for first render
# filter data based on selection
df_flt = df[df.Category.isin(cat_list[val])].reset_index(drop=True)
if df_flt.shape[0]>0:
st.dataframe(df_flt)
else:
st.write("Empty Dataframe")
Edit with #RJ Adriaansen update:
I'm trying to pull two or more dataframes from a module so that I can use the data in the main script.
I only get 4 empty dataframes returned from the df_make module.
The main and df_make codes are below.
Any advice would be great thanks.
import pandas as pd
import df_make
df_trn = pd.DataFrame()
df_trn_trk = pd.DataFrame()
df_jky = pd.DataFrame()
df_jky_code = pd.DataFrame()
def main():
df_make.jky_trn(df_trn, df_trn_trk, df_jky, df_jky_code)
#df_make.jky_trn([df_trn])
print(df_trn)
print(df_trn_trk)
print(df_jky)
print(df_jky_code)
if __name__ == '__main__':
main()
import pandas as pd
#def jky_trn(df_trn):
def jky_trn(df_trn, df_trn_trk, df_jky, df_jky_code):
#global df_trn
#global df_trn_trk
#global df_jky
#global df_jky_code
path = (r"C:\Users\chris\Documents\UKHR\PythonSand\PY_Scripts\StackOF")
xls_tbl = "\Racecards.xlsx"
xls_link = path + xls_tbl
df1 = pd.read_excel(xls_link, usecols=["Jockey","Course","RaceDesc"])
df2 = pd.read_excel(xls_link, usecols=["Trainer","Course","RaceDesc"])
df1 = df1.drop_duplicates(subset=["Jockey","Course","RaceDesc"])
df1 = df1.dropna() # Remove rows with NaN
df1['Course'] = df1['Course'].str.replace(' \(AW\)', '') #Replace (AW) in Course
df2['Course'] = df2['Course'].str.replace(' \(AW\)', '')
df_jky = df1[['Jockey']].copy()
df_jky_code = df1[['Jockey', 'Course']].copy()
df_jky = df_jky.drop_duplicates()
df_jky_code = df_jky_code.drop_duplicates()
df_trn = df2[['Trainer']].copy()
df_trn_trk = df2[['Trainer', 'Course']].copy()
df_trn = df_trn.drop_duplicates()
df_trn_trk = df_trn_trk.drop_duplicates()
#print(df_jky_code)
#print(df_trn_trk)
return df_jky, df_jky_code, df_trn, df_trn_trk
So, it turns out that I needed to refer to the dataframes as a tuple item in the main script e.g. df_jt = df_make.jky_trn()
The new main script code is:
import pandas as pd
import df_make
def main():
df_jt = df_make.jky_trn()
print(df_jt[0])
print(df_jt[1])
print(df_jt[2])
print(df_jt[3])
if name == 'main':
main()
I am pretty new to dash and I have tried to read as much as I can to understand what the issue might be. In a nutshell I have a single datepicker which is an input to the DataTable and Graph callback. The graph callback is working fine so it is just the DataTable which is causing problems. I also tried the single input to multiple output callback but didnt work. My code is as below:
app = JupyterDash()
folder = os.getcwd()
portfolio_returns_table = pd.read_csv(Path(folder, 'portfolioreturns_maria.csv',parse_dates=[0]))
portfolio_returns_table = portfolio_returns_table.set_index('Unnamed: 0')
name_portfolioID_table = pd.read_csv(Path(folder, 'name_portfolioID.csv'))
#Calculate portfolio cumulative returns
df_cumret = (portfolio_returns_table+1).cumprod().round(5)
df_cumret.index = pd.to_datetime(df_cumret.index)
app.layout = html.Div(html.Div([dcc.DatePickerSingle(
id='my-date-picker-single',
min_date_allowed=dt.date(df_cumret.index.min()),
max_date_allowed=dt.date(df_cumret.index.max()),
initial_visible_month=dt.date(df_cumret.index.max()),
date = dt.date(df_cumret.index.max())
,display_format = 'Y-MM-DD',clearable = True),
html.Div(id='output-container-date-picker-single'),
html.Div(dash_table.DataTable(id = 'data_table',
data = {},
fixed_rows={'headers': True},
style_cell = {'textAlign': 'left'},
style_table={'height': 400})),
html.Div(dcc.Graph('my_graph'))
]))
#app.callback([Output('data_table','data'),Output('data_table','columns')],
[Input('my-date-picker-
single','date')])
def update_leader_table(date):
#Get data for the selected date and transpose
df_T = df_cumret.loc[[date]].T
#Sort the table to reveal the top leaders
df_Top = df_T.sort_values(df_T.columns[0], ascending=False)[:10]
#Convert the index to an interger
df_Top.index = df_Top.index.astype(int)
#Generate the leaderboard to given date
df_leader = pd.merge(df_Top,name_portfolioID_table,
left_index=True,right_index=True, how = 'left')
#Create the col rank
df_leader['Rank'] = range(1,len(df_leader)+1)
df_leader.columns = ['Cum Return', 'Investor','Rank']
df_leader.reset_index(drop = True, inplace = True)
data = df_leader.to_dict('records')
columns= [{'id': c, 'name': c, "selectable": True} for c in
df_leader.columns]
return (data,columns)
#callback to link calendar to graph
#app.callback(Output('my_graph','figure'),[Input('my-date-picker-single','date')])
def update_graph(date):
#date filter
df_T = df_cumret.loc[:date].T
#Sort the table to reveal the top leaders & filter for leaderboard
df_Top = df_T.sort_values(df_T.columns[-1], ascending=False)[:10]
#Transpose to have date as index
df_top_graph = df_Top.T
#set the columns as an Int
df_top_graph.columns = df_top_graph.columns.astype(int)
#Rename columns
df_top_graph.rename(columns=dict(zip(name_portfolioID_table.index,
name_portfolioID_table.name)),
inplace=True)
#Generate graph
fig = px.line(df_top_graph, x = df_top_graph.index, y =
df_top_graph.columns, title='ETF LEADERBOARD PERFORMANCE: '+date, labels=
{'Unnamed: 0':'Date','value':'Cumulative Returns'})
fig.update_layout(hovermode = 'x unified')
fig.update_traces(hovertemplate='Return: %{y} <br>Date: %{x}')
fig.update_layout(legend_title_text = 'Investor')
return fig
if __name__ == '__main__':
app.run_server(mode = 'inline',debug=True, port = 65398)
I like to retrieve information from NewsApi and ran into an issue. Enclosed the code:
from NewsApi import NewsApi
import pandas as pd
import os
import datetime as dt
from datetime import date
def CreateDF(JsonArray,columns):
dfData = pd.DataFrame()
for item in JsonArray:
itemStruct = {}
for cunColumn in columns:
itemStruct[cunColumn] = item[cunColumn]
# dfData = dfData.append(itemStruct,ignore_index=True)
# dfData = dfData.append({'id': item['id'], 'name': item['name'], 'description': item['description']},
# ignore_index=True)
# return dfData
return itemStruct
def main():
# access_token_NewsAPI.txt must contain your personal access token
with open("access_token_NewsAPI.txt", "r") as f:
myKey = f.read()[:-1]
#myKey = 'a847cee6cc254d8495632f83d5c77d39'
api = NewsApi(myKey)
# get sources of news
# columns = ['id', 'name', 'description']
# rst_source = api.GetSources()
# df = CreateDF(rst_source['sources'], columns)
# df.to_csv('source_list.csv')
#
#
# # get news for specific country
# rst_country = api.GetHeadlines()
# columns = ['author', 'publishedAt', 'title', 'description','content', 'url']
# df = CreateDF(rst_country['articles'], columns)
# df.to_csv('Headlines_country.csv')
# get news for specific symbol
symbol = "coronavirus"
sources = 'bbc.co.uk'
columns = ['author', 'publishedAt', 'title', 'description', 'content', 'source']
limit = 500 # maximum requests per day
i = 1
startDate = dt.datetime(2020, 3, 1, 8)
# startDate = dt.datetime(2020, 3, 1)
df = pd.DataFrame({'author': [], 'publishedAt': [], 'title': [], 'description': [], 'content':[], 'source': []})
while i < limit:
endDate = startDate + dt.timedelta(hours=2)
rst_symbol = api.GetEverything(symbol, 'en', startDate, endDate, sources)
rst = CreateDF(rst_symbol['articles'], columns)
df = df.append(rst, ignore_index=True)
# DF.join(df.set_index('publishedAt'), on='publishedAt')
startDate = endDate
i += 1
df.to_csv('Headlines_symbol.csv')
main()
I got following error:
rst = CreateDF(rst_symbol['articles'], columns)
KeyError: 'articles'
In this line:
rst = CreateDF(rst_symbol['articles'], columns)
I think there is some problem regarding the key not being found or defined - does anyone has an idea how to fix that? I'm thankful for every hint!
MAiniak
EDIT:
I found the solution after I tried a few of your hints. Apparently, the error occurred when the NewsAPI API key ran into a request limit. This happened every time, until I changed the limit = 500 to limit = 20. For some reason, there is no error with a new API Key and reduced limit.
Thanks for your help guys!
Probably 'articles' is not one of your columns in rst_symbol object.
The python documentation [2] [3] doesn't mention any method named NewsApi() or GetEverything(), but rather NewsApiClient() and get_everything(), i.e.:
from newsapi import NewsApiClient
# Init
newsapi = NewsApiClient(api_key='xxx')
# /v2/top-headlines
top_headlines = newsapi.get_top_headlines(q='bitcoin',
sources='bbc-news,the-verge',
category='business',
language='en',
country='us')
# /v2/everything
all_articles = newsapi.get_everything(q='bitcoin',
sources='bbc-news,the-verge',
domains='bbc.co.uk,techcrunch.com',
from_param='2017-12-01',
to='2017-12-12',
language='en',
sort_by='relevancy',
page=2)
# /v2/sources
sources = newsapi.get_sources()
I'am trying to get some information from a website with python, from a webshop.
I tried this one:
def proba():
my_url = requests.get('https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL')
data = my_url.json()
results = []
products = data['MainContent'][0]['contents'][0]['productList']['products']
for product in products:
name = product['productModel']['displayName']
try:
priceGross = product['priceInfo']['priceItemSale']['gross']
except:
priceGross = product['priceInfo']['priceItemToBase']['gross']
url = product['productModel']['url']
results.append([name, priceGross, url])
df = pd.DataFrame(results, columns = ['Name', 'Price', 'Url'])
# print(df) ## print df
df.to_csv(r'/usr/src/Python-2.7.13/test.csv', sep=',', encoding='utf-8-sig',index = False )
while True:
mytime=datetime.now().strftime("%H:%M:%S")
while mytime < "23:59:59":
print mytime
proba()
mytime=datetime.now().strftime("%H:%M:%S")
In this webshop there are 9 items, but i see only 1 row in the csv file.
Not entirely sure what you intend as end result. Are you wanting to update an existing file? Get data and write out all in one go? Example of latter shown below where I add each new dataframe to an overall dataframe and use a Return statement for the function call to provide each new dataframe.
import requests
from datetime import datetime
import pandas as pd
def proba():
my_url = requests.get('https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL')
data = my_url.json()
results = []
products = data['MainContent'][0]['contents'][0]['productList']['products']
for product in products:
name = product['productModel']['displayName']
try:
priceGross = product['priceInfo']['priceItemSale']['gross']
except:
priceGross = product['priceInfo']['priceItemToBase']['gross']
url = product['productModel']['url']
results.append([name, priceGross, url])
df = pd.DataFrame(results, columns = ['Name', 'Price', 'Url'])
return df
headers = ['Name', 'Price', 'Url']
df = pd.DataFrame(columns = headers)
while True:
mytime = datetime.now().strftime("%H:%M:%S")
while mytime < "23:59:59":
print(mytime)
dfCurrent = proba()
mytime=datetime.now().strftime("%H:%M:%S")
df = pd.concat([df, dfCurrent])
df.to_csv(r"C:\Users\User\Desktop\test.csv", encoding='utf-8')