I am developing a data app using dash. I am doing a file copy task for that I am showing the progress bar.
I have written this code from dash website as reference. This is pseduo code structure
import dash_bootstrap_components as dbc
from dash import Input, Output, dcc, html, ctx
progress = html.Div(
[
dcc.Interval(id="progress_interval", max_intervals=500, n_intervals=0, interval=2000),
dbc.Progress(id="progress"),
]
)
copy_button = dbc.Button(id="btn_copy", children="Copy", className="btn btn-success"),
#app.callback(
[Output("progress", "value"), Output("progress", "label")],
[
Input("progress_interval", "n_intervals"),
Input("btn_copy", "n_clicks")
],
)
def update_copy_progress(n):
triggered_id: Any = ctx.triggered_id
# use n_intervals constrained to be in 0-100
if triggered_id == "progress_interval":
progress = min(n % 110, 100)
# only add text after 5% progress to ensure text isn't squashed too much
return progress, f"{progress} %" if progress >= 5 else ""
if triggered_id == "btn_copy":
# start some background process
long_running_file_copy_task()
The progress bar starts running when clicked on button and display 5% 6% 7%....
say I am copying 100MB file size.
Issues, I noticed:
File copied is 10 MB on terminal but on UI progress bar displaying 40%
File copy is going on and once progress bar reaches 100% it gets refreshed and starts again from 1%
How in real web application the progress bar is handled say for file copying task for different file sizes and for any long running tasks. How to generalize the progress bar.
Please suggest some direction.
Related
I need to load a big file into a TextInput in a python GUI app built with kivy. What is the most efficient way to do this without blocking the UI?
For the sake of this example, I'm using a file named big_file.txt. War And Peace (the book) is big: 3.3 MB in plaintext, available from Project Gutenberg here:
https://www.gutenberg.org/ebooks/2600.txt.utf-8
Save that file to some directory:
wget -O big_file.txt https://www.gutenberg.org/ebooks/2600.txt.utf-8
Consider the following simplified application named main.py and located in the same directory as big_file.txt
import time
import kivy
from kivy.app import App
from kivy.uix.label import Label
from kivy.uix.button import Button
from kivy.uix.textinput import TextInput
from kivy.uix.boxlayout import BoxLayout
class MyApp(App):
def build(self):
self.count = 0
layout = BoxLayout()
# add the label
self.label1 = Label(id='label1', text='0')
layout.add_widget(self.label1)
# add the button
layout.add_widget( Button(text='Increment', on_release=self.increment) )
# add the textinput
self.textinput1 = TextInput(text='Click button to load file.')
layout.add_widget(self.textinput1)
# add the button
layout.add_widget( Button(text='Load File', on_release=self.load_file) )
return layout
def load_file(self, *args):
start = time.time()
print( "loading file" )
with open( 'big_file.txt' ) as big_file:
self.textinput1.text = big_file.read()
end = time.time()
print( "finished loading file in " + str(end-start) + " seconds" )
def increment(self, *args):
self.count+=1
self.label1.text = str(self.count)
if __name__ == "__main__":
MyApp().run()
This kivy application has:
An "Increment" Label
An "Increment" Button
A TextInput
A "Load File" Button
The problem is that it takes ~15 seconds to read() the 3.3 MB big_file.txt. And during that time, the UI is blocked.
For example, after clicking the Load File button, the user can't click the Increment button for ~15 seconds -- until the file is finished being read by the load_file() function.
How can I efficiently load the contents of the kivy TextInput with big_file.txt without blocking the UI?
Try using a RecycleView, which is designed for handling large amounts of data. Here is a modified version of your code that uses a RecycleView:
import threading
import time
from kivy.app import App
from kivy.lang import Builder
from kivy.uix.label import Label
from kivy.uix.button import Button
from kivy.uix.recycleview import RecycleView
from kivy.uix.boxlayout import BoxLayout
class RV(RecycleView):
pass
Builder.load_string('''
<MyLabel#Label>:
halign: 'center'
size_hint: 1, None
height: dp(25)
text_size: self.size
<RV>:
viewclass: 'MyLabel'
RecycleBoxLayout:
default_size: None, dp(25)
default_size_hint: 1, None
size_hint_y: None
height: self.minimum_height
orientation: 'vertical'
''')
class MyApp(App):
def build(self):
self.count = 0
layout = BoxLayout()
# add the label
self.label1 = Label(text='0', size_hint_x=0.1)
layout.add_widget(self.label1)
# add the button
layout.add_widget( Button(text='Increment', on_release=self.increment, size_hint_x=0.1))
# add the RecycleView
self.rv = RV(size_hint_x=0.7)
layout.add_widget(self.rv)
# add the button
layout.add_widget( Button(text='Load File', on_release=self.load_file, size_hint_x=0.1))
return layout
def load_file(self, *args):
threading.Thread(target=self.actual_load, daemon=True).start()
def actual_load(self):
start = time.time()
print( "loading file" )
with open( 'big_file.txt' ) as big_file:
text = big_file.read()
end = time.time()
print( "finished loading file in " + str(end-start) + " seconds" )
lines = []
start = time.time()
for line in text.splitlines(keepends=False):
lines.append({'text': line})
end = time.time()
print("finished loading lines in " + str(end-start) + " seconds")
self.rv.data = lines
def increment(self, *args):
self.count+=1
self.label1.text = str(self.count)
if __name__ == "__main__":
MyApp().run()
This uses a Label for each line of text. The RecycleView creates a fixed number of Labels, and just reuses them as you scroll.
The read() isn't actually what's slow. What's slow is the call to update the TextInput.text with a lot of data.
Threaded Solution
You can prevent from locking the UI by creating a background thread that runs asynchronously
import time, threading
from kivy.app import App
from kivy.clock import Clock
from kivy.uix.label import Label
from kivy.uix.button import Button
from kivy.uix.textinput import TextInput
from kivy.uix.boxlayout import BoxLayout
class MyApp(App):
def build(self):
self.count = 0
self.update_textinput1_bg = None
layout = BoxLayout()
# add the label
self.label1 = Label(text='0')
layout.add_widget(self.label1)
# add the button
layout.add_widget(
Button(text='Increment', on_release=self.increment)
)
# add the textinput
self.textinput1 = TextInput(
text='Click button to load file.',
readonly=True
)
layout.add_widget(self.textinput1)
# add the button
layout.add_widget(
Button(text='Load File', on_release=self.load_file)
)
return layout
# function that's called when the "Load File" button is pressed
def load_file(self, *args):
# is there already a thread running attempting to update the textinput?
if self.update_textinput1_bg != None:
# a thread is already running; tell the user to be patient
self.textinput1.text = "Still Loading File. Please be patient."
else:
# no background thread is running yet; start one now
start = time.time()
print( "loading file" )
self.textinput1.text = "Loading File. Please wait.."
with open( 'big_file.txt' ) as big_file:
file_contents = big_file.read()
end = time.time()
print( "finished loading file in " + str(end-start) + " seconds" )
# we use treading.Thread() instead of multiprocessing.Process
# because it can update the widget's contents directly without
# us having to pass data in-memory between the child process.
# Con: We can't kill threads, so it should only be used for
# short-running background tasks that won't get stuck
self.update_textinput1_bg = threading.Thread(
target = self.update_textinput1,
args = (file_contents,)
)
self.start = time.time()
self.update_textinput1_bg.start()
# Register the update_textinput1_tick() function as a callback to be
# executed every second, and we'll use that to update the UI with a
# status message from the update_textinput1() thread and check to see
# if it finished running
Clock.schedule_interval(self.update_textinput1_tick, 1)
# very simple function that updates the contents of the TextInput
# this is intended to be called in a background Thread so the UI doesn't
# get locked when "contents" is very large
def update_textinput1( self, contents, *args ):
self.textinput1.text = contents
# callback function that's executed every ~1 second after being secheduled
# by load_file()
def update_textinput1_tick( self, dt ):
print( "called update_textinput1_tick()" )
# is the background thread still running?
if not self.update_textinput1_bg.is_alive():
# the background thread finished running; calculate runtime and
# unschedule this callback function from being called every second
end = time.time()
print( "finished udpdating widget text in " + str(end-self.start) + " seconds" )
self.update_textinput1_bg = None
Clock.unschedule( self.update_textinput1_tick )
# increase the integer displayed in the text of the label widget by one
def increment(self, *args):
self.count+=1
self.label1.text = str(self.count)
if __name__ == "__main__":
MyApp().run()
Here's an example execution. Note that it takes:
less that 0.2 seconds to read from the file, and
over 18 seconds to update the TextInput
user#buskill:~/tmp/kivy_file_textinput$ /tmp/kivy_appdir/opt/python3.7/bin/python3.7 main.py
...
[INFO ] [Kivy ] v1.11.1
...
[INFO ] [Python ] v3.7.8 (default, Jul 4 2020, 10:00:57)
[GCC 9.3.1 20200408 (Red Hat 9.3.1-2)]
...
[INFO ] [Base ] Start application main loop
loading file
finished loading file in 0.01690673828125 seconds
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
called update_textinput1_tick()
finished udpdating widget text in 18.336608171463013 seconds
Kivy v2.1.0 TextInput Improvements
Note that last year (in 2021), Kivy made some improvements to the slow loading of large data into a TextInput. These were released earlier this year (2022-03-06) in Kivy v2.1.0.
Kivy v2.1.0 Release Notes
Issue #7642: TextInput loading time optimisation for large texts
That said, these optimizations appear to be pretty limited. I tested the original code example in both Kivy v1.11.1 (~15 seconds to load the data into the TextInput) and v2.1.0 (~12 seconds)
Why is it so slow?
I mentioned this to issue to the Kivy developers in the link above, and Gabriel Pettier (tshirtman) provided some insight.
Using py-spy top -- python main.py, you can profile the code and get something like this
%Own %Total OwnTime TotalTime Function (filename)
0.00% 0.00% 3.53s 16.71s _create_line_label (kivy/uix/textinput.py)
0.00% 0.00% 3.10s 22.70s _refresh_text (kivy/uix/textinput.py)
0.00% 0.00% 2.75s 10.12s refresh (kivy/core/text/__init__.py)
0.00% 0.00% 2.23s 6.13s render (kivy/core/text/__init__.py)
0.00% 0.00% 1.47s 2.32s _get_font_id (kivy/core/text/text_sdl2.py)
0.00% 0.00% 1.39s 3.71s get_extents (kivy/core/text/text_sdl2.py)
0.00% 0.00% 1.22s 1.64s __init__ (kivy/core/text/__init__.py)
0.00% 0.00% 1.11s 1.11s __init__ (kivy/weakmethod.py)
Or produce a flame graph with py-spy top -- python main.py
This shows that the majority of the time is actually spent trying to figure out how wide each line needs to be, which is especially evident at how much the UI locks-up when you resize the window.
so almost all the time is spent in _refresh_text unsurprisingly, and _create_line_label in it.
My guess, looking at the code from _create_line_label is that most of the time is spent looking for the ideal length of the line, while we start with the logical line length if it's too long, we cut it in half, until it fits and then grows again (half the length) until it does't fit anymore, etc, following a bisection until the stop is less than 2. Any improvement to the initial guess of the line's length could make a significant difference, for example, just by enlarging the window of the example, so most logical lines fit in one display line, the rendering time was cut by more than 50%.
Another idea could be to be able to compute how many words of the line can fit, without rendering the full line, that could be achieved by rendering + caching each unique word encountered, with line options as well (font, etc), so the words texture could be looked up from cache, adding their width until we don't fit the current line anymore, allowing us to either render the line from these words at this point, or reworking the rendering system so a line can just use these textures directly through multiple rectangle instructions, in this technique it would be important to properly keep track of the space between words as well, which depends on the specific character(s) used.
I'm sure there is research about how to do these things efficiently as well, that could be worth looking into (famously, Knuth spent a lot of time on efficient and correct ways to layout text).
I am currently creating a dashboard using dash with figures using plotly. I am using the bootswatch themes (https://bootswatch.com/) with bootstrap components to style the dashboard and I was wondering if there was a way to retrieve the primary, secondary, success etc. colours into either RGB or HEX so that I don't have to hard code it in and can then parse these into plotly functions to style the graphs in the same theme.
As of writing this there isn't a way to do it with Python only.
A dash bootstrap components theme is just a uri pointing to a stylesheet.
>>> dbc.themes.BOOTSTRAP
https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css
For the stylesheets in external_stylesheets link elements are generated and added to the document.
So there isn't a property on a theme or on the app that holds the value of the css variables.
A workaround solution is to use clientside callbacks:
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Output, Input
import dash_bootstrap_components as dbc
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = html.Div([dcc.Store(id="intermediate-store"), dcc.Store(id="color-store")])
app.clientside_callback(
"""
function(val) {
const bodyStyles = window.getComputedStyle(document.body);
return {
primary: bodyStyles.getPropertyValue("--primary"),
secondary: bodyStyles.getPropertyValue("--secondary"),
success: bodyStyles.getPropertyValue("--success"),
};
}
""",
Output("intermediate-store", "data"),
Input("intermediate-store", "data"),
)
#app.callback(
Output("color-store", "data"),
Input("intermediate-store", "data"),
)
def update_output_div(data):
# data equals: {'primary': '#007bff', 'secondary': '#6c757d', 'success': '#28a745'}
return data
if __name__ == "__main__":
app.run_server(debug=True)
So the idea in the code above is to use Javascript in clientside callbacks to retrieve the css variables and store them in an intermediate Store. When it's stored we can use another callback to retrieve the value as a dictionary and do something with it.
You could use it by setting a variable when the second callback is triggered or you could use State("color-store", "data") in other callbacks.
I have table with a large number of rows, and I want to show everything inside my app.
By default, dash_table.DataTable adds a scrollbar when the table is too long. I would like to remove that scrollbar.
Let's say you have the following application, where the height is set to 300px and overflow is automatic:
import dash
import dash_table
import dash_html_components as html
import pandas as pd
df = pd.read_csv(
"https://raw.githubusercontent.com/plotly/datasets/master/1962_2006_walmart_store_openings.csv"
)
app = dash.Dash(__name__)
table = dash_table.DataTable(
id="table",
columns=[{"name": i, "id": i} for i in df.columns],
data=df.to_dict("records"),
style_table={"height": "300px", "overflowY": "auto"},
)
app.layout = html.Div([html.H1("Header"), table, html.Button("Click here")])
if __name__ == "__main__":
app.run_server(debug=True)
You will get this resulting app:
In your case, you want to hide the scroll bar. You might be tempted to change the style_table to:
style_table={"height": "300px", "overflowY": "show"}
Although the entire table will be shown, this unfortunately means the button will be hidden since the table is overflowing beyond the designated height:
So the correct change is to simply set the height of the table to be unlimited:
style_table={"height": None}
And the button will show up correctly:
Controlling the height of the table is thoroughly documented in Dash Table docs. It will show you different ways to use overflowY.
add this argument into dash_table.DataTable:
virtualization=False
and you don't need to touch the height
Following this part of the docs: https://dash-bootstrap-components.opensource.faculty.ai/l/components/modal I've created a modal in my Dash app. The trigger for the modal will be dynamically rendered thumbnails. When any of them is clicked, the modal should open and display the image from the thumbnail as it's body.
Is is possible, inside Dash, to have multiple buttons (I don't know how many will there be, depending on how many thumbnails in the database) that will all open the same modal dialog and pass some of their data to the modal (such as img src in my case)?
The input in the example above is simple:
[
Input("open", "n_clicks"), Input("close", "n_clicks")
],
but in reality I don't know how many will there be and can't hardcode an ID.
Any suggestions?
Yes, you can have multiple buttons open a modal. Just as you showed, the callback would have an Input for each one. No, you cannot create them dynamically. Dash does not play well with any ID that is not in the layout at the start of running the app.
Create a set of buttons dynamically using the below list comprehension:
[dcc.Button(x, id={'type': 'thumbnail_button', 'index': x}) for x in thumbnail_list]
Use the pattern-matching callback to open modal when any of these buttons are clicked:
#app.callback(
Output('your-modal', 'is_open'),
[Input({'type': 'thumbnail_button', 'index': ALL}, 'n_clicks')]
)
def handle_button_click(n_clicks):
invoker = [p['prop id'] for p in dash.callback_context.triggered][0]
invoker_type = json.loads(invoker.split('.')[0])['type']
invoker_index = json.loads(invoker.split('.')[0])['index']
if invoker_type == "thumbnail_button":
return not is_open
else:
return is_open
Lastly the imports:
from dash.dependencies import Input, Output, ALL
In my dash I have a set of tabs with a graph inside. Each tab has one graph. I have a 'clickdata' callback that runs a function every time you click on one of the graphs, and it works so far I don't change the tab. But once I change the tab the 'clickdata' callback to the graphs stop working. Any idea?
In case it help, this is the structure of my code:
app = dash.Dash(__name__)
app.layout = html.Div([
... #deleted code
html.Button(id='Calculate_button',
n_clicks=0,
children='Calculate',
style={'fontSize':18,
'width':'100%'}),
html.Div([
dcc.Tabs(id="tabs",
value='tab-1',
children=[dcc.Tab([dcc.Graph(id='LapLabels',
style={'height':1000,
'paddingTop':30})],
label='Lap Labels',
value='tab-1',
id='tab-1'),
dcc.Tab([dcc.Graph(id='RacePlot',
style={'height':1000,
'paddingTop':30})],
label='Raceplot',
value='tab-2',
id='tab-2'),])])])
#app.callback(Output('LapLabels','figure'),
[Input('Calculate_button','n_clicks')],
[State('input1','value'),
State('input2','value'),
State('csvcontainer','value')])
def update_Labels(n_clicks,Traffic_Trigger,Slow_Percent2best,path):
return LapLabels(Traffic_Trigger,Slow_Percent2best,path) #this function returns a figure
#app.callback(Output('Calculate_button','n_clicks'),
[Input('LapLabels','clickData'),
Input('RacePlot','clickData')],
[State('csvcontainer','value')])
def modsc_Labels(hoverData,hoverDataRplot,path):
return myfunc(hoverData,hoverDataRplot,path) #this function updates the file that LapLabels reads