I have created a small form with ipywidgets. The sample code can be run in Jupyter or Google colab.
Each time the form is filled and the button is clicked a row gets added to a dataframe. Subsequently the dataframe gets displayed.
My problem is that the output displays the new updated dataframe on top of the old one. What I want is that the new display output overwrites the old one. See image description here.
import ipywidgets as widgets
from ipywidgets import HBox, Label
from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider
import time
import pandas as pd
#Create DF
df = df = pd.DataFrame(columns = ['Dropdown_column', 'Float_column'])
df
# Layout
form_item_layout = Layout(
display='flex',
flex_flow='row',
justify_content='space-between',
)
button_item_layout = Layout(
display='flex',
flex_flow='row',
justify_content='center',
padding = '5%'
)
# Dropdown item
drop_down_input = 'Dropdown_input_1'
drop_down = widgets.Dropdown(options=[('Dropdown_input_1', 'Dropdown_input_1'), ('Dropdown_input_2','Dropdown_input_2'), ('Dropdown_input_3', 'Dropdown_input_3')])
def dropdown_handler(change):
global drop_down_input
print('\r','Dropdown: ' + str(change.new),end='')
drop_down_input = change.new
drop_down.observe(dropdown_handler, names='value')
# FloatText item
float_input = 0
FloatText = widgets.FloatText()
def IntText_handler(change):
global float_input
print('\r','Float text:' + str(change.new),end='')
float_input = change.new
FloatText.observe(IntText_handler, names='value')
# Button
button = widgets.Button(description='Add row to dataframe')
out = widgets.Output()
def on_button_clicked(b):
global df
button.description = 'Row added'
time.sleep(1)
with out:
new_row = {'Dropdown_column': drop_down_input, 'Float_column': float_input}
df = df.append(new_row, ignore_index=True)
button.description = 'Add row to dataframe'
display(df)
button.on_click(on_button_clicked)
# Form items
form_items = [
Box([Label(value='Dropdown'),
drop_down], layout=form_item_layout),
Box([Label(value='FloatText'),
FloatText], layout=form_item_layout),
Box([Label(value=''), button],
layout=button_item_layout),
]
form = Box(form_items, layout=Layout(
display='flex',
flex_flow='column',
border='solid 1px',
align_items='stretch',
width='30%',
padding = '1%'
))
display(form)
display(out)
I have tried using the print() function in combination with '/r' and changing #button part of my code.
Change:
display(df)
to
print('\r',str(df), end='')
or
print(str(df), end='\r')
But this does not work either.
Does somebody have any idea what to do?
\r works only for single line of normal text but df is not displayed as normal text (and it is not single line) but as HTML code.
You have to use out.clear_output() to remove previous content.
with out:
new_row = {'Dropdown_column': drop_down_input, 'Float_column': float_input}
df = df.append(new_row, ignore_index=True)
button.description = 'Add row to dataframe'
out.clear_output() # <---
display(df)
You can see more about out.clear_output() in documentation:
Output widgets: leveraging Jupyter’s display system
Related
I have created an "input form" with several ipywidget boxes. I want to be able to reference all the values to create a new dataframe.
I'm currently doing this in a horrible way.
portfolio_df = pd.DataFrame([[VBox1.children[0].value, VBox2.children[0].value, VBox3.children[0].value, VBox4.children[0].value]],
columns=['Product Name','Units','Price', 'Invested Amount'])
row_2 = [VBox1.children[1].value, VBox2.children[1].value, VBox3.children[1].value, VBox4.children[21].value]
portfolio_df.loc[len(portfolio_df)] = row_2
row_3 = [VBox1.children[2].value, VBox2.children[2].value, VBox3.children[2].value, VBox4.children[2].value]
portfolio_df.loc[len(portfolio_df)] = row_3
row_4 = [VBox1.children[3].value, VBox2.children[3].value, VBox3.children[3].value, VBox4.children[3].value]
portfolio_df.loc[len(portfolio_df)] = row_4
and so on up till row 23 in this instance !! (but the length will vary up to the number of children within a VBox)
I suspect I can do this more pythonically using a for loop but cant figure it out.
Full code as per requests (I've edited columns so my live data is different but this is exact replica of the set up)
import pandas as pd
import numpy as np
import datetime as dt
import ipywidgets as ipw
from ipywidgets import *
barrier_list = pd.DataFrame(np.random.randn(24, 4), columns=('Product
Name','ISIN','A','B'))
barrier_list= barrier_list.astype(str)
dd_list = []
for i in range(len(barrier_list['Product Name'])):
dropdown = ipw.FloatText(description=barrier_list['ISIN'][i],
value=barrier_list['Product Name'][i],
disabled=False,
layout = {'width':'350px'})
dropdown.style.description_width = 'initial'
dd_list.append(dropdown)
dd_list1 = []
for i in range(len(barrier_list['Product Name'])):
dropdown1 = ipw.FloatText(description='Units',
value=0,
layout = {'width':'200px'})
dd_list1.append(dropdown1)
dd_list2 = []
for i in range(len(barrier_list['Product Name'])):
dropdown2 = ipw.FloatText(description='Price',
value=0,
layout = {'width':'200px'})
dd_list2.append(dropdown2)
dd_list3 = []
for i in range(len(barrier_list['Product Name'])):
dropdown3 = ipw.FloatText(description='Value',
value=0,
layout = {'width':'200px'})
dd_list3.append(dropdown3)
VBox1 = ipw.VBox(dd_list)
VBox2 = ipw.VBox(dd_list1)
VBox3 = ipw.VBox(dd_list2)
VBox4 = ipw.VBox(dd_list3)
HBox = widgets.HBox([VBox1, VBox2, VBox3, VBox4])
solved this one by looping through the VBoxes one by one and then concatenating the dataframes into one main one.
product_df = pd.DataFrame()
for i in range(len(dd_list)):
product_name_df = pd.DataFrame([[VBox1.children[i].value]],columns=
['Product Name'])
product_df = product_df.append(product_name_df)
unit_df = pd.DataFrame()
for i in range(len(dd_list)):
unit_amount_df = pd.DataFrame([[VBox2.children[i].value]],columns=
['Units'])
unit_df = unit_df.append(unit_amount_df)
price_df = pd.DataFrame()
for i in range(len(dd_list)):
price_amount_df = pd.DataFrame([[VBox3.children[i].value]],columns=
['Price'])
price_df = price_df.append(price_amount_df)
value_df = pd.DataFrame()
for i in range(len(dd_list)):
value_amount_df = pd.DataFrame([[VBox4.children[i].value]],columns=
['Value'])
value_df = value_df.append(value_amount_df)
df_list = [product_df.reset_index(drop=True),unit_df.reset_index(drop=True),
price_df.reset_ind ex(drop=True),value_df.reset_index(drop=True)]
portfolio_df = pd.concat((df_list), axis=1)
portfolio_df
I'm aware that my code isn't very clean, , my primary focus at the moment is to make the program work.
I’m working with Tkinter and I created a search- and listbox based on a column in Excel. The Excelfile is imported by pandas, as a dataframe. The idea is that people can search for something (for example ‘Eiffel Tower’), that the value (‘Eiffel Tower’) is selected and that Python gives the construction date as output (so for example the year 1889) in the interface.
You search and make sure that the value is visible in the entrybox, and then you click on a button. After clicking on the button, you will see ‘1889’.
Both the buildings as the construction dates are listed in an Excelfile. Column A contains the buildings, column B contains the construction dates.
The search and listbox works. But I’m not ably to connect column A to column B, or to get an output based on the input that the uses gives.
The 'output_Startdate' was to test if the if-statement worked (what it does). The 'def connectie()' is me trying to find a solution.
My code:
import tkinter as tk
from tkinter import *
from tkinter import Listbox
from tkinter import ttk
import pandas as pd
interface = tk.Tk()
interface.configure(bg="#60c1c9")
interface.geometry('1500x750')
interface.title('Construction Dates')
title = Label(interface, text='1. BUILDINGS')
title.configure(bg="#60c1c9", fg="#000000", font=("Calibri", 20, "bold"))
title.place(relx=0.15, rely=0, anchor=N)
file_name = “List_Buildings.xlsx”
xl_workbook = pd.ExcelFile(file_name)
df = xl_workbook.parse(“Buildings”)
alist = df['MONUMENT'].tolist()
Startdate = df['Startdate'].tolist()
Enddate = df['Enddate'].tolist()
Label(
text="Select what you see on the picture.",
bg="#60c1c9",
fg="#000000",
font=("Calibri", 12)
).place(relx=0.29, rely=0.05, anchor=N)
def update(data):
my_list_1.delete(0, END)
for entry in data:
my_list_1.insert(END, entry)
def check(e):
typed = entry_1.get()
if typed == '':
data = alist
else:
data = []
for item in alist:
if typed.lower() in item.lower():
data.append(item)
update(data)
def fillout(e):
entry_1.delete(0, END)
entry_1.insert(0, my_list_1.get(ACTIVE))
entry_1 = Entry(interface, width=53)
entry_1.place(relx=0.205, rely=0.12, anchor=N)
entry_1.bind('<KeyRelease>', check)
my_list_1: Listbox = Listbox(interface, height=20, width=50)
my_list_1.place(relx=0.2, rely=0.15, anchor=N)
my_list_1.bind("<<ListboxSelect>>", fillout)
scrollbar_v = Scrollbar(interface, orient=VERTICAL, command=my_list_1.yview)
scrollbar_v.place(relx=0.301, rely=0.151, height=324)
scrollbar_h = Scrollbar(interface, orient=HORIZONTAL, command=my_list_1.xview)
scrollbar_h.place(relx=0.0985, rely=0.583, width=320.5)
#alist = df['MONUMENT'].tolist()
#output = df['Startdate'].tolist()
#df2 = pd.DataFrame(columns=['MONUMENT', 'Startdate', 'Enddate'])
#df2 = df.apply(lambda x: df['MONUMENT'] == df['Startdate'])
#print(df2)
def connectie():
value = entry_1.get()
for i in df['MONUMENT']:
if value == alist:
BLOCK_NAME.set(output)
return
def output_Startdate():
if entry_1.get() == ‘Eiffeltower’:
tekst = tk.Label(interface, text="good")
tekst.place(relx=0.3, rely=0.8)
else:
tekst = tk.Label(interface, text="this value doesn't excist")
tekst.place(relx=0.3, rely=0.8)
button = Button(interface, text='click here', command=output_Startdate)
button.place(relx=0.29, rely=0.7)
interface.mainloop()
I'm not sure what your data looks like (you didn't hand us a sample), so I hope I did it right. There are two parts to my answer, the first is for loading the file and setting the index column (I hope the names are all unique), and the second part is how to loc for the data you are looking for.
file_name = 'List_Buildings.xlsx' # file name
# read the file's Sheet1 and create dataframe with column 'MONUMENT' as index
df = pd.read_excel(file_name, 'Sheet1', index_col='MONUMENT')
# create alist from the index
alist = df.index.tolist()
def output_Startdate():
# get the entry_1 value
monument = entry_1.get()
# use monument (the entry_1 value) as index for dataframe loc and 'Startdate' as the column
start_date = df.loc[monument, 'Startdate']
# set the text for the label
tekst = tk.Label(interface, text=f"Start date: {start_date}")
tekst.place(relx=0.3, rely=0.8)
I wrote a python script that should have a data frame as output, but it does not show any output. Below is the python code:
import pandas as pd
import numpy as np
import ipywidgets as widgets
import datetime
from ipywidgets import interactive
from IPython.display import display, Javascript
from datetime import date, timedelta
from random import choices
books = ["Book_1","Book_2","Book_3","Book_4","Book_5"]
counterparties = ["Counterparty_1","Counterparty_2","Counterparty_3","Counterparty_4","Counterparty_5"]
book = choices(books, k = 100)
counterparty = choices(counterparties, k = 100)
date1, date2 = date(2018, 8, 1), date(2023, 8, 3)
res_dates = [date1]
while date1 != date2:
date1 += timedelta(days=1)
res_dates.append(date1)
ldd = choices(res_dates, k=100)
dict = {'book': book, 'counterparty': counterparty, 'last_trading_date': ldd}
df = pd.DataFrame(dict)
books = pd.Categorical(df['book'])
books = books.categories
books_dropdown = widgets.Dropdown(
options=books,
value=books[0],
description='Book:',
disabled=False,
)
counterparty = pd.Categorical(df['counterparty'])
counterparty = counterparty.categories
counter_dropdown = widgets.Dropdown(
options=counterparty,
value=counterparty[0],
description='Counterparty:',
disabled=False,
)
date_picker = widgets.DatePicker(
description='Pick a Date',
disabled=False,
)
date_picker.add_class("start-date")
script = Javascript("\
const query = '.start-date > input:first-of-type'; \
document.querySelector(query).setAttribute('min', '2020-12-01'); \
document.querySelector(query).setAttribute('max', '2025-01-01'); \
")
box = widgets.VBox([books_dropdown, counter_dropdown, date_picker])
display(box)
def filter_function(bookcode, cpartycode, datecode):
filtered = df[(df['book'] == bookcode) & (df['counterparty'] == cpartycode)]
x = datetime.date(datecode.value)
filtered = filtered[filtered['last_trading_date'] < x]
with report_output:
report_output.clear_output()
display(filtered)
interactive(filter_function, bookcode=books_dropdown, cpartycode=counter_dropdown, datecode=date_picker)
report_output = widgets.Output()
display(report_output)
What this does is basically take a data frame, subset the said data frame into a smaller data frame based on categories of two variables, and truncate the resulting data frame based on a date selected by the user.
Did I make a mistake somewhere? If so, can someone point to me where? Thank you in advance.
Edit:
After many attempts I came to the conclusion that the problem is related to the DatePicker widget. So you can focus on that when trying to solve the problem.
Here is the code I used to reproduce the issue if I understand it correctly:
from datetime import date, timedelta
from random import choices
import pandas as pd
import ipywidgets as widgets
import datetime
from ipywidgets import interactive
from IPython.display import display, Javascript
books = ["Book_1","Book_2","Book_3","Book_4","Book_5"]
counterparties = ["Counterparty_1","Counterparty_2","Counterparty_3","Counterparty_4","Counterparty_5"]
book = choices(books, k = 100)
counterparty = choices(counterparties, k = 100)
date1, date2 = date(2018, 8, 1), date(2023, 8, 3)
res_dates = [date1]
while date1 != date2:
date1 += timedelta(days=1)
res_dates.append(date1)
ldd = choices(res_dates, k=100)
dict = {'book': book, 'counterparty': counterparty, 'last_trading_date': ldd}
df = pd.DataFrame(dict)
df['last_trading_date'] = pd.to_datetime(df['last_trading_date'], format = '%Y-%m-%d').dt.date
books = pd.Categorical(df['book'])
books = books.categories
books_dropdown = widgets.Dropdown(
options=books,
value=books[0],
description='Book:',
disabled=False,)
counterparty = pd.Categorical(df['counterparty'])
counterparty = counterparty.categories
counter_dropdown = widgets.Dropdown(
options=counterparty,
value=counterparty[0],
description='Counterparty:',
disabled=False,
)
date_picker = widgets.DatePicker(
description='Pick a Date',
disabled=False,
)
date_picker.add_class("start-date")
script = Javascript("\
const query = '.start-date > input:first-of-type'; \
document.querySelector(query).setAttribute('min', '2020-12-01'); \
document.querySelector(query).setAttribute('max', '2025-01-01'); \
")
def filter_function(bookcode, cpartycode, datecode):
filtered = df[(df['book'] == bookcode) & (df['counterparty'] == cpartycode)]
filtered = filtered[filtered['last_trading_date'] < datecode]
with report_output:
report_output.clear_output()
display(filtered)
w = interactive(filter_function, bookcode=books_dropdown, cpartycode=counter_dropdown, datecode=date_picker)
display(w)
report_output = widgets.Output()
display(report_output)
Using the widget that's displayed when the code is run in Jupyter Notebook, I get the following output:
Only changes that I made in the code provided by you are:
Remove the code for VBox.
Store interactive widget as a variable and use display() to display it.
Directly use datecode argument to filtered_function for creating filtered instead of using datetime.date(datecode.value).
I am using ipywidgets in a class to clean the dataset. Using the ipywidgets output tab, I am able to print the cleaned dataframe, but I am unable to access the returned dataframe variable (df_clean) in the next code cells. I am not sure what am missing here, spent a day exploring...
Notebook colab file
code_query_pic
from ipywidgets import Button
from IPython.display import display,clear_output
import pandas as pd
train = pd.read_csv('https://raw.githubusercontent.com/taknev83/datasets/master/credit_risk_train.csv')
class Clean_data():
def __init__(self, df):
self.df = df
def missing_value(self):
import pandas as pd
df_clean = self.df.copy(deep=True)
mis_val = widgets.Dropdown(
options = [('Yes', '1'), ('No', '2')],
value = '1',
description = 'Impute Missing Value',
style = {'description_width': 'initial'},
disabled=False)
display(mis_val)
button = widgets.Button(description = 'Run Impute')
out = widgets.Output()
def on_button_clicked(_):
with out:
clear_output()
if mis_val.value == '1':
categorical_cols = df_clean.select_dtypes('object').columns.to_list()
for col in categorical_cols:
df_clean[col].fillna(df_clean[col].mode()[0], inplace=True)
numeric_cols = df_clean.select_dtypes(['float64', 'int64']).columns.to_list()
for col in numeric_cols:
df_clean[col].fillna(df_clean[col].mean(), inplace=True)
# print(self.df) print is working
print('Completed imputation')
return df_clean # where to access this return dataframe?
if mis_val.value == '2':
return df_clean
button.on_click(on_button_clicked)
a = widgets.VBox([button, out])
display(a)
I would like to access df_clean in the next code cells for further use...
Here you go,
Following you can put in your utils.py or whatever you want to call this module.
from ipywidgets import Button
import ipywidgets as widgets
from IPython.display import display,clear_output
import pandas as pd
train = pd.read_csv('https://raw.githubusercontent.com/taknev83/datasets/master/credit_risk_train.csv')
train.iloc[0, :] = pd.NA
train.isna().sum().sum()
class Clean_data():
def __init__(self, df):
self.df = df
def missing_value(self):
import pandas as pd
self.df = self.df.copy(deep=True)
mis_val = widgets.Dropdown(
options = [('Yes', '1'), ('No', '2')],
value = '1',
description = 'Impute Missing Value',
style = {'description_width': 'initial'},
disabled=False)
display(mis_val)
button = widgets.Button(description = 'Run Impute')
out = widgets.Output()
def on_button_clicked(_):
with out:
clear_output()
if mis_val.value == '1':
categorical_cols = self.df.select_dtypes('object').columns.to_list()
for col in categorical_cols:
self.df[col].fillna(self.df[col].mode()[0], inplace=True)
numeric_cols = self.df.select_dtypes(['float64', 'int64']).columns.to_list()
for col in numeric_cols:
self.df[col].fillna(self.df[col].mean(), inplace=True)
# print(self.df) print is working
print('Completed imputation')
return self.df # where to access this return dataframe?
if mis_val.value == '2':
return self.df
button.on_click(on_button_clicked)
a = widgets.VBox([button, out])
display(a)
In the notebook where you want to import, you can do as follows:
from utils import *
# include some NAs just for testing the functionality
train.iloc[0, :] = pd.NA
print(train.isna().sum().sum()) # should print 21
# call the cleaning functionality
c = Clean_data(train)
c.missing_value()
# another check
print(c.df.isna().sum().sum()) # should be 0
Hope this solves your problem.
Sorry for the potentially confusing phrasing of my question. Essentially, I am trying to make it so that every time I press the 'Add Data' command button there is only one DataFrame displayed. The one that should be displayed is the DF that is modified when the button is pressed. Currently, though, it will append the output with the recently modified DF, on top of the older versions that were created from earlier clicks of the button.
I'm using this code as part of a larger program for performing Monte Carlo simulations and back testing. My goal for these widgets is to input all the option positions I take on certain assets. That way, I can have a consolidated DF of my positions to speed up my analysis in later sections of this program and be available for other programs. The 'Add Data' button will input the values of the other widgets into a dictionary and concat that dictionary with the existing portfolio DF (which is saved in a CSV file).
I believe my problem is caused by me not properly utilizing the ipywidget Output() function, but have not been able to find a workable solution to my problem.
Also, I am writing in a Jupyter Notebook.
import pandas as pd
import datetime
from datetime import *
import ipywidgets as widgets
from ipywidgets import *
############################################## The following section is usually in a seperate cell so I can
df = { # refresh my portfolio every day, but still add to the DF throughout the day
'Datetime' : [],
'Expire' : [],
'Type' : [],
'Quantity' : [],
'Strike' : [],
'Spot' : []
}
df = pd.DataFrame(df)
df.to_csv("portfolio.csv", index=False)
##############################################
Type = widgets.Dropdown(
options = ['Call', 'Put'],
value = 'Call',
description= 'Select Binary Type',
disabled=False,
layout={'width': 'max-content'},
style={'description_width': 'max-content'}
)
Quantity = widgets.BoundedIntText(value=1,
min=1,
max=10,
step=1,
description='Quantity:',
disabled=False,
layout={'width': 'max-content'},
style={'description_width': 'max-content'}
)
Strike = widgets.BoundedIntText(
min=1500,
max=3500,
step=1,
description='Strike:',
disabled=False,
layout={'width': 'max-content'},
style={'description_width': 'max-content'}
)
Spot = widgets.BoundedIntText(
min=1500,
max=3500,
step=1,
description='Spot:',
disabled=False,
layout={'width': 'max-content'},
style={'description_width': 'max-content'}
)
Add = widgets.Button(description="Add Data")
out = Output()
def add_on_click(b):
dt = datetime.now()
option = Type.value
quant = Quantity.value
strike = Strike.value
spot = Spot.value
df = pd.read_csv("portfolio.csv")
now = datetime.now()
add = {
'Datetime' : dt,
'Expire' : datetime(now.year, now.month, now.day, 14, 15,0,1),
'Type' : option,
'Quantity': quant,
'Strike' : strike,
'Spot': spot
}
add = pd.DataFrame(add, index=[0])
df = pd.concat([df, add],sort=True) #ignore_index=True)
df.to_csv("portfolio.csv", index=False)
display(df, out)
Add.on_click(add_on_click)
items = [Type, Quantity, Strike, Spot, Add]
box_layout = Layout(display='flex',
flex_flow='row',
align_items='stretch',
width='100%')
box_auto = Box(children=items, layout=box_layout)
display_widgets = VBox([box_auto])
display_widgets
Change your last lines of add_on_click to:
out.clear_output()
with out:
display(df)
You can try
def add_on_click(b):
with out:
clear_output()
display(df)
#rest of the code goes here