Plotly line, changing colour to NaN segments - python

I have the following code
import plotly.express as px
import pandas as pd
import numpy as np
df = pd.DataFrame([1, None, None, 4, 6, None], columns=["y"])
df["x"] = [1, 2, 3, 4, 5, 6]
df["completed"] = [1, 0, 0, 1, 1, 0]
fig = px.line(df, x="x", y="y", markers=True, color="completed")
fig.show()
That results in the following plot
But I have to highlight (change the color line to red and add a dot point) in the cases that the dataframe has NaN value like in the following plot
Is there any way to do that easily? I have been looking for it but I'm not able to find a suitable solution.
Thanks in advance!

Found a solution using this https://community.plotly.com/t/change-color-of-continuous-line-based-on-value/68938/2
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import itertools as it
df = pd.DataFrame([1, None, None, 4, 6, None, 2, 1], columns=["y"])
df["x"] = [1, 2, 3, 4, 5, 6, 7, 8]
df["completed"] = [1, 0, 0, 1, 1, 0, 1, 1]
fig = go.Figure()
# generate color list
df.loc[df["y"].isna(), "line_color"] = "red"
df.loc[df["y"].isna(), "line_type"] = "dash"
df.loc[df["y"].notna(), "line_color"] = "black"
df.loc[df["y"].notna(), "line_type"] = "solid"
df["y"] = df["y"].interpolate(method="index")
# create coordinate pairs
x_pairs = it.pairwise(df["x"])
y_pairs = it.pairwise(df["y"])
for x, y, line_color, line_type in zip(
x_pairs,
y_pairs,
df["line_color"].values,
df["line_type"].values,
):
# create trace
fig.add_trace(
go.Scatter(
x=x,
y=y,
mode="lines",
line=dict(color=line_color, dash=line_type),
)
)
fig.show()
This is the new output for the plot.

Related

Plot number of persons in each car

I have a pandas dataframe which looks like this:
car,id
1,1
1,2
2,3
2,4
2,5
and so on
What I want to do is make a lineplot in seaborn that shows how many ids there are in each car ( I dont care for which id that are in the car). So on the x axis I want to have the unique number of cars (so here [1,2]) and y-axis I want the "number" of cars that are repeated (so here [2,3]). I would like to use seaborn to plot.
What I have tried now is:
import seaborn as sns
#the df is the one above
sns.lineplot(x='car', y='car'.count(), data=df) #which is not working for obvious reasons
Any tips to do this?
If you specifically need a lineplot then this would work:
import pandas as pd
import seaborn as sns
data = {"car": [1, 1, 2, 2, 2], "id": [1, 2, 3, 4, 5]}
df = pd.DataFrame(data)
sns.lineplot(x="car", y="id", data=df.groupby('car').nunique())
Or could use value_counts() too:
car_count = df['car'].value_counts()
sns.lineplot(x=car_count.index, y=car_count.values)
import pandas as pd
MyDic = {"car": [1, 1, 2, 2, 2, 3], "id": [1, 2, 3, 4, 5, 6]}
MyDf = pd.DataFrame(MyDic)
print(MyDf)
>> car id
>> 0 1 1
>> 1 1 2
>> 2 2 3
>> 3 2 4
>> 4 2 5
>> 5 3 6
from collections import Counter
carCounter = Counter(MyDf["car"])
x, y = list(carCounter.keys()), list(carCounter.values())
print(f"{x=}, {y=}")
>>x=[1, 2, 3], y=[2, 3, 1]
Line plot in a seaborn needs both axis. The below code will run fine.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
cars = {"id": [1, 2, 3, 4, 5, 6], "car": [1, 1, 2, 2, 2, 3]}
dataset = pd.DataFrame(cars)
car_counts = dataset["car"].value_counts()
car_counts.sort_index(inplace=True)
sns.lineplot(x=car_counts.index, y=car_counts)
plt.show()

Creating subplots with a for loop Pandas Python

I want to create 3 subplots below with the subplot with the coordinates stated in the for loop parameters as add_plot. The format of add_plot is nrows, ncols ,cells. But I get an error when I try to implement it. How can I modify the contents of the for loop within Graphing() to achieve this?
Error:
ValueError: Single argument to subplot must be a three-digit integer, not (2, 2, 1)
Code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6], 'col2': [6, 2, 1, 7, 3, 5, 3, 3, 9],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
def Graphing():
#Size of the figure
fig = plt.figure(figsize=(12, 7))
#Creating the dataframe
df = pd.DataFrame({
'date' : datetime,
'col1': data['col1']
})
for subplot_,add_plot in (zip(
['sub1','sub2','sub3'],
[(2,2,1), (2,2,1), (2,1,2)])):
subplot_ = fig.add_subplot(add_plot)
# Show Graph
plt.show()
Graphing()
This still raises a NameError: name 'datetime' is not defined as you didn't define datetime in your code, but shouldn't raise any error for the subplots.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6], 'col2': [6, 2, 1, 7, 3, 5, 3, 3, 9],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
def Graphing():
#Size of the figure
fig = plt.figure(figsize=(12, 7))
#Creating the dataframe
df = pd.DataFrame({
'date' : datetime,
'col1': data['col1']
})
for subplot_,add_plot in (zip(
['sub1','sub2','sub3'],
[(2,2,1), (2,2,1), (2,1,2)])):
subplot_ = fig.add_subplot(*add_plot)
# Show Graph
plt.show()
Graphing()

How to plot a two-dimensional chart with a boolean attribute defining colors

I have the following DataFrame:
LATITUDE LONGITUDE STATE
... ... True
With the code bellow I can plot the graph with coordinates
import matplotlib.pyplot as plt
plt.scatter(x=df['LAT'], y=df['LONG'])
plt.show()
graph
However, I want to define two different colors for each point according to the 'state' attribute
How to do this?
What you're looking for is the c parameter, taking your example and adding the STATUS column
import matplotlib.pyplot as plt
df = {'LAT': [1, 2, 3, 4, 5], 'LONG': [3, 2, 4, 5, 3], 'STATUS': [0, 1, 0, 0, 1] }
plt.scatter(x=df['LAT'], y=df['LONG'], c=df['STATUS'])
plt.show()
it shows a bicoloured chart

Plotting multiple 3d lines in one figure using plotly

I have many 2d sequences with variable length, i.e. lists of list where each sublist is a sequence. I want to project these sequences/lines/sublists in a 3d visualisation adding time-step as another dimension. So far I am failing to plot all the 3d lines using plotly.express.
import plotly.express as px
t = [[ii+1 for ii in range(len(features[i]))] for i in range(len(labels))]
x0 = [[x[0] for x in features[i]] for i in range(len(labels))]
x1 = [[x[1] for x in features[i]] for i in range(len(labels))]
df = pd.DataFrame(dict(
X=[tii for ti in t for tii in ti],
Y=[xii for xi in x0 for xii in xi],
Z=[xii for xi in x1 for xii in xi],
color=[aa for a in labels for aa in a]
))
fig = px.line_3d(df, x="X", y="Y", z="Z", color="color")
fig.show
This is what I get, which is not really what I want. It is treating all the cases/sublists with common label as one single sequence, thus we see at the end of each line it goes back the where it starts. I have looked up on how to iteratively plotting this in a for-loop (just like matplotlib) (basically creating a new pandas dataframe at each iteration and plot it), however with no success. Does anyone have any experience on this please? Much appreciated!
A mcve is as below:
import plotly.express as px
import numpy as np
import pandas as pd
features = [np.random.rand(4,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(6,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(9,2).tolist()]
labels = [[1, 1, 1, 1], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2], [0, 0, 0, 0, 0, 0, 0, 0, 0]]
t = [[ii+1 for ii in range(len(features[i]))] for i in range(len(labels))]
x0 = [[x[0] for x in features[i]] for i in range(len(labels))]
x1 = [[x[1] for x in features[i]] for i in range(len(labels))]
df2 = pd.DataFrame(dict(
X=[tii for ti in t for tii in ti],
Y=[xii for xi in x0 for xii in xi],
Z=[xii for xi in x1 for xii in xi],
color=[aa for a in labels for aa in a]
))
fig1 = px.line_3d(df2, x="X", y="Y", z="Z", color="color")
fig1.show()
You see basically 3 lines instead of 5.
Your problems is that you are using the same label for different traces. Here a workaround with a loop
import numpy as np
import plotly.graph_objs as go
features = [np.random.rand(4,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(6,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(9,2).tolist()]
labels = [[1, 1, 1, 1],
[1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0]]
fig = go.Figure()
for i, feat in enumerate(features):
feat = np.array(feat)
fig.add_trace(
go.Scatter3d(
x=np.arange(len(feat)),
y=feat[:,0],
z=feat[:,1],
mode='lines',
hovertext=labels[i]
)
)
fig.show()
You might need to play with trace names.
Update
Hoefully it's not too overcomplicated but it is meant to be as generic as possible
import numpy as np
import plotly.graph_objs as go
from itertools import cycle
def plotly_color_map(names):
# From https://stackoverflow.com/a/44727682
plotly_colors = cycle(['#1f77b4', # muted blue
'#ff7f0e', # safety orange
'#2ca02c', # cooked asparagus green
'#d62728', # brick red
'#9467bd', # muted purple
'#8c564b', # chestnut brown
'#e377c2', # raspberry yogurt pink
'#7f7f7f', # middle gray
'#bcbd22', # curry yellow-green
'#17becf' # blue-teal
])
return dict(zip(names, plotly_colors))
features = [np.random.rand(4,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(6,2).tolist(),
np.random.rand(5,2).tolist(),
np.random.rand(9,2).tolist()]
labels = [[1, 1, 1, 1],
[1, 1, 1, 1, 1],
[2, 2, 2, 2, 2, 2],
[2, 2, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0]]
legend_groups = [l[0] for l in labels]
traces = [False if (len(legend_groups[:i])>0 and l in legend_groups[:i])
else True for i, l in enumerate(legend_groups)]
cm = plotly_color_map(set(legend_groups))
fig = go.Figure()
for i, feat in enumerate(features):
feat = np.array(feat)
fig.add_trace(
go.Scatter3d(
x=np.arange(len(feat)),
y=feat[:,0],
z=feat[:,1],
mode='lines',
line={"color":cm[legend_groups[i]]},
legendgroup=legend_groups[i],
hovertext=labels[i],
showlegend=traces[i],
name="label_{}".format(legend_groups[i])
)
)
fig.show()

Creating a series of pie charts from a dataframe with color linked to indexes's values

This is an example dataframe:
import pandas as pd
import numpy as np
values = np.array([
[0, 1, 2, 0, 0, 4],
[1, 0, 0, 1, 1, 0 ],
[0, 4, 0, 0, 2, 1],
[2, 0, 2, 0, 4, 0],
])
indexes= 'a','b','c','d'
columns='ab','bc','cd','de','ef','fg'
df = pd.DataFrame(index=indexes,columns=columns, data=values)
print(df)
from this dataframe I need to create a series of pie charts, one for every column, shown on the same figure, where the slices dimension is fixed (equal to 100/len(indexes)) and the color of the slices depends on the value of the index, in particular: white if 0, green if 1, yellow if 2, red if 4.
What suggestions can you give me?
I found that:
df.plot(kind='pie', subplots=True, figsize=(len(columns)*2, 2))
it creates a series, but I can't control the input values...
I've created a pie for a column, but then I wasn't able to link the color to the value of index:
labels = indexes
sizes = np.linspace(100/len(labels),100/len(labels), num=len(labels))
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels)
ax1.axis('equal')
plt.show()
ImportanceOfBeingErnest answer has helped me giving to the piechart the wanted look:
fig1, ax1 = plt.subplots()
labels = indexes
sizes = np.linspace(100/len(labels),100/len(labels), num=len(labels))
coldic = {0 : "w", 1 : "g", 2 : "y", 4 : "r" }
colors = [coldic[v] for v in values[:,0]]
ax1.pie(sizes, labels=labels, colors=colors,counterclock=False, startangle=90)
ax1.axis('equal')
plt.show()
Now the colors a linked to the values, and the dimensions of the slices are fixed. I just need to have the same pie chart for all the columns and in the same image.
The importance of these charts is given by the colors, not the dimensions of the slices, which I want to be always equal.
Thanks for your time!
Not relying on pandas internal plotting functions (which are of course limited) one can use matplotlib' pie function to plot the diagrams.
The colors can be set as a list, which is generated from the values according to some mapping dictionary.
import numpy as np
import matplotlib.pyplot as plt
coldic = {0 : "w", 1 : "g", 2 : "y", 4 : "r" }
values = np.array([
[0, 1, 2, 0, 0, 4],
[1, 0, 0, 1, 1, 0 ],
[0, 4, 0, 0, 2, 1],
[2, 0, 2, 0, 4, 0],
])
labels= ['a','b','c','d']
fig1, axes = plt.subplots(ncols=values.shape[1], )
for i in range(values.shape[1]):
colors = [coldic[v] for v in values[:,i]]
labs = [l if values[j,i] > 0 else "" for j, l in enumerate(labels)]
axes[i].pie(values[:,i], labels=labs, colors=colors)
axes[i].set_aspect("equal")
plt.show()
For fixed wedge sizes you just use a fixed array to supply to pie.
import numpy as np
import matplotlib.pyplot as plt
coldic = {0 : "w", 1 : "g", 2 : "y", 4 : "r" }
values = np.array([
[0, 1, 2, 0, 0, 4],
[1, 0, 0, 1, 1, 0 ],
[0, 4, 0, 0, 2, 1],
[2, 0, 2, 0, 4, 0],
])
labels= ['a','b','c','d']
fig1, axes = plt.subplots(ncols=values.shape[1], )
for i in range(values.shape[1]):
colors = [coldic[v] for v in values[:,i]]
axes[i].pie(np.ones(values.shape[0]), labels=labels, colors=colors,
wedgeprops=dict(linewidth=1, edgecolor="k"))
axes[i].set_aspect("equal")
axes[i].set_title("".join(list(map(str,values[:,i]))))
plt.show()

Categories