How to make a racing Bar Chart Visualization in Python - python

how Animated Bar Chart Race Python : How to make a bar change its position automatically. For example, in the below code example while for countries like USA having more values, the bar should gradually move up.
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import numpy as np
url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
def read_file(url):
df = pd.read_csv(url)
return df
def filter_specific_country(df, selected_countries):
df1=df[df['Country/Region'].isin(selected_countries) ]
countrywise_grouped_df = df1.groupby(df['Country/Region']).sum().drop(['Lat','Long'], axis=1)
countrywise_grouped_df
return countrywise_grouped_df
def transpose_and_reformat_data(df):
df_t=df.transpose()
df_t.reset_index(inplace=True)
df_t.rename(columns={'Country/Region':'Index_Col', 'index':'Dates'}, inplace=True)
return df_t
confirmed_dataset = read_file(url)
selected_countries=['India','China','Italy','Spain','France','Australia','Germany','Japan','Korea, South','Pakistan',
'Russia','United Kingdom','Canada','Iran','Brazil','Singapore','South Africa','US']
ds=filter_specific_country(confirmed_dataset,selected_countries)
data=transpose_and_reformat_data(ds).melt(id_vars=["Dates"], var_name="Country", value_name="Confirmed_Count")
#plot_title="Global Spread of Covid-19 : (Selected Countries)"
plot_title='Visualizing the spread of Novel Coronavirus COVID-19 (2019-nCoV) - Created by Dibyendu Banerjee'
fig = px.bar(data, y="Country", x="Confirmed_Count", color="Country",
animation_frame="Dates", range_x=[1,14000000], orientation='h' )
fig.update_layout(title=plot_title,yaxis_title='Countries', xaxis_tickangle=90, font=dict(family="Arial",size=10,color="#7f7f7f"))
fig.show()

As far as I know, bar chart tracing using potly is not feasible. There is already a dedicated library that I will use to answer your question. Since the data is at the daily level, it will take a long time to play back, so I will need to resample or summarize the data into years.
from raceplotly.plots import barplot
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
def read_file(url):
df = pd.read_csv(url)
return df
def filter_specific_country(df, selected_countries):
df1=df[df['Country/Region'].isin(selected_countries) ]
countrywise_grouped_df = df1.groupby(df['Country/Region']).sum().drop(['Lat','Long'], axis=1)
countrywise_grouped_df
return countrywise_grouped_df
def transpose_and_reformat_data(df):
df_t=df.transpose()
df_t.reset_index(inplace=True)
df_t.rename(columns={'Country/Region':'Index_Col', 'index':'Dates'}, inplace=True)
return df_t
confirmed_dataset = read_file(url)
selected_countries=['India','China','Italy','Spain','France','Australia','Germany','Japan','Korea, South','Pakistan',
'Russia','United Kingdom','Canada','Iran','Brazil','Singapore','South Africa','US']
ds=filter_specific_country(confirmed_dataset,selected_countries)
data=transpose_and_reformat_data(ds).melt(id_vars=["Dates"], var_name="Country", value_name="Confirmed_Count")
covid_race = barplot(data, item_column='Country', value_column='Confirmed_Count',time_column='Dates')
covid_race.plot(item_label='Top 10 crops', value_label = 'Covid Confirmed Count', date_format='%Y/%m/%d', frame_duration=800)

Related

Mixing Plotly/ipywidgets to modify the x axis of a scatter plot

I want to mix Plotly with a dropdown widget, the idea being to make some scatter plots and modify the x axis through the widget. Let's say that my dataset is the following :
import sea born as sns
import plotly.graph_objects as go
import pandas as pd
import ipywidgets as widgets
import seaborn as sns
df = sns.load_dataset('diamonds')
And my target is the column carat. What I tried so far is to create the scatters, include them into the widget and display it :
predictors = df.columns.tolist()
predictors.remove("carat")
target = df["carat"]
data = []
for predictor in predictors:
chart = go.Scatter(x = df[predictor],
y = target,
mode="markers")
fig = go.Figure(data=chart)
data.append((predictor,fig))
widgets.Dropdown(options = [item[0] for item in data],
value = [item[0] for item in data][0],
description = "Select :",
disabled=False)
Yet, I am new to ipywidgets/plotly and don't understand what is not working here, since it displays the widget but not the charts even when I change its value. How can I modify the code so that it finally displays the charts when selecting a predictor ?
You can use interact to read the values from the DropDown and plot your graph:
import plotly.graph_objects as go
import pandas as pd
import seaborn as sns
from ipywidgets import widgets
from ipywidgets import interact
import plotly.express as px
df = sns.load_dataset('diamonds')
predictors = df.columns.tolist()
predictors.remove("carat")
target = df["carat"]
#interact
def read_values(
predictor=widgets.Dropdown(
description="Select :", value="clarity", options=predictors
)
):
fig = px.scatter(df, x = predictor, y = target)
go.FigureWidget(fig.to_dict()).show()

Interactive plotly boxplot with ipywidgets

I am trying to create an interactive boxplot with ipywidgets and Plotly.
I started by looking at this example
While this is fine, I'd like to change the groupings of the boxplot based on a dropdown input.
With interact I can do this:
import datetime
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from ipywidgets import widgets
df = pd.read_csv(
'https://raw.githubusercontent.com/yankev/testing/master/datasets/nycflights.csv')
df = df.drop(df.columns[[0]], axis=1)
from ipywidgets import interact
def view_image(col):
fig = go.FigureWidget()
for val in df[col].unique():
groupData = df.query(f'{col} == "{val}"')
fig.add_trace(
go.Box(y = groupData['distance'],
name = val)
)
fig.show()
interact(view_image, col = ['origin', 'carrier'])
And the result is that I can change the column based on which the data is grouped.
However, I would like to have more control on the widgets, like in the official example.
This is what I am trying (and failing):
# Assign an empty figure widget with two traces
gdata = []
for origin in df.origin.unique():
groupData = df.query(f'origin == "{origin}"')
gdata.append(
go.Box(y = groupData['distance'],
name = origin)
)
g = go.FigureWidget(data=gdata,
layout=go.Layout(
title=dict(
text='NYC FlightDatabase'
),
barmode='overlay'
))
def response_box(change):
col = column.value
with g.batch_update():
gdata = []
for val in df[col].unique():
groupData = df.query(f'{col} == "{val}"')
gdata.append(
go.Box(y = groupData['distance'],
name = val)
)
g.data = gdata
column = widgets.Dropdown(
options=['origin','carrier']
)
column.observe(response_box, 'value')
container2 = widgets.HBox([column])
widgets.VBox([container2,
g])
Note that since I have new groupings, I cannot just go into g.data[index].y and change per index, but I have to re-generate the figure as in the interact function.
This particular iteration gives me a "you cannot update data directly" error. I tried in a few different ways, but I don't seem to find one that works.
Any idea?
it's not clear how you want to interact with the dimensions of data. So I've gone with defining x and color of figure, plus filtering by origin, dest, carrier
box plots are far simpler to create using Plotly Express so have used that
it then really simplifies to passing parameters. Have used https://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html with decorator
import datetime
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from ipywidgets import widgets
from ipywidgets import interact
df = pd.read_csv(
"https://raw.githubusercontent.com/yankev/testing/master/datasets/nycflights.csv"
)
df = df.drop(df.columns[[0]], axis=1)
#interact
def view_image(
col=widgets.Dropdown(
description="Plot:", value="carrier", options=["origin", "carrier"]
),
filtercol=widgets.Dropdown(
description="Filter by:", value="carrier", options=["origin", "dest", "carrier"]
),
filter=widgets.Text(
description="Filter:", value=""
),
):
# check if filter results in any rows... if not all data...
if df[filtercol].eq(filter).any():
dfp = df.loc[df[filtercol].eq(filter)]
else:
dfp = df
fig = px.box(dfp, x=col, y="distance", color=col)
go.FigureWidget(fig.to_dict()).show()

Plotly Animated Bar Graph Showing 1 subgroup only in Jupyter

Issue: When I run my code only one status (sub group) shows. The data set is very simple, create date, status and count. I can only think something might be wrong with my data set at this point. Why will it only show one status of the three I have or possibly it works better with a hosted file? It seems to just iterate through the list and not keep each data point in tact until the end. The other code block works fine on github.
Sample of my data set:
Status,Create Date,Count
None,17-Apr-12,8
None,30-Apr-12,9
None,23-Aug-12,10
None,3-Oct-12,11
None,9-Jan-13,12
None,29-Jan-13,13
QBOS,31-Jan-13,1
QBDS,1-Feb-13,1
My code:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
df = pd.read_csv('qb7.csv')
df.columns = ['Status','Create Date','Count']
includes=['None','QBDS', 'QBOS']
df=df[df['Status'].isin(includes)]
df['Create Date']= pd.to_datetime(df['Create Date']).dt.strftime('%Y-%m-%d')
fig = px.bar(df,
x="Status",
y="Count",
color="Status",
animation_frame="Create Date", hover_name="Status",
range_y=[0,8000])
fig.show()
``
Sample of what I want to make:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
df = pd.read_csv('https://raw.githubusercontent.com/shinokada/covid-19-stats/master/data/daily-new-
confirmed-cases-of-covid-19-tests-per-case.csv')
df.columns = ['Country','Code','Date','Confirmed','Days since confirmed']
includes=['United States','Russia', 'India','Brazil']
df=df[df['Country'].isin(includes)]
df['Date']= pd.to_datetime(df['Date']).dt.strftime('%Y-%m-%d')
fig = px.bar(df, x="Country", y="Confirmed", color="Country",
animation_frame="Date", animation_group="Country", range_y=[0,35000])
fig.show()`
I think the reason it doesn't show the intended graph is because of the different number of data. The intended result is achieved when the number of data is aligned.
import pandas as pd
import numpy as np
import io
data = '''
Status,Create Date,Count
None,17-Apr-12,8
None,30-Apr-12,9
None,23-Aug-12,10
None,3-Oct-12,11
None,9-Jan-13,12
None,29-Jan-13,13
QBOS,17-Apr-12,8
QBOS,30-Apr-12,9
QBOS,23-Aug-12,10
QBOS,3-Oct-12,11
QBOS,9-Jan-13,12
QBOS,29-Jan-13,13
QBDS,17-Apr-12,8
QBDS,30-Apr-12,9
QBDS,23-Aug-12,10
QBDS,3-Oct-12,11
QBDS,9-Jan-13,12
QBDS,29-Jan-13,13
'''
df = pd.read_csv(io.StringIO(data), sep=',')
import plotly.graph_objects as go
import plotly.express as px
# df = pd.read_csv('qb7.csv')
df.columns = ['Status','Create Date','Count']
includes=['None','QBDS', 'QBOS']
df=df[df['Status'].isin(includes)]
df['Create Date']= pd.to_datetime(df['Create Date']).dt.strftime('%Y-%m-%d')
fig = px.bar(df,
x="Status",
y="Count",
color="Status",
animation_frame="Create Date", hover_name="Status",
range_y=[0,30])
fig.show()

Setting the range for the x axis value for different time values - Plotly Python

Even when the range is set to a specific time value, all the time values are getting displayed on the graph. The code snippet is available below.
import datetime as dt
import cufflinks as cf
import pandas as pd # dataframe/matrix library
from pandas_summary import DataFrameSummary # nice summaries for pandas dataframes
import numpy as np # matrix math
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot # interactive charts
import plotly.graph_objs as go
import plotly.figure_factory as ff
x = 10;
rng = pd.date_range('1/1/2012', periods=48, freq='H')
time = rng.strftime('%H:%M:%S')
df = pd.DataFrame({ 'Date1': rng, 'Val': x*np.random.randn(len(rng))})
df.set_index('Val')
df['time'] = df['Date1'].dt.time
df['date'] = df['Date1'].dt.date
startTime = df.iloc[5]['time']
endTime = df.iloc[10]['time']
data = [
go.Scatter(
x=df[df['date'] == date]['time'],
y=df[df['date'] == date]['Val'],
name=str(date)
)for date in df.date.unique()]
Layout = go.Layout(title="Test VS Time",xaxis=dict(title='Time',range=[startTime,endTime]),yaxis=dict(title='Value'));
df.iplot(layout=Layout.to_plotly_json(),data=data)
The following images show the output graph and the data frame.
Plotly graph
Dataframe

Matplotlib plot is a no-show

When I run this code
import pandas as pd
import numpy as np
def add_prop(group):
births = group.births.astype(float)
group['prop'] = births/births.sum()
return group
pieces = []
columns = ['name', 'sex', 'births']
for year in range(1880, 2012):
path = 'yob%d.txt' % year
frame = pd.read_csv(path, names = columns)
frame['year'] = year
pieces.append(frame)
names = pd.concat(pieces, ignore_index = True)
total_births = names.pivot_table('births', rows = 'year', cols = 'sex', aggfunc = sum)
total_births.plot(title = 'Total Births by sex and year')
I get no plot. This is from Wes McKinney's book on using Python for data analysis.
Can anyone point me in the right direction?
Put
import matplotlib.pyplot as plt
at the top, and
plt.show()
at the end.
In the IPython notebook you could also use %matplotlib inline at the top of the notebook to automatically display the created plots in the output cells.
your code is correct.
just put:
import matplotlib as plt
for displaying your plot:
plt.show()

Categories