Creating subplots with a for loop Pandas Python - python

I want to create 3 subplots below with the subplot with the coordinates stated in the for loop parameters as add_plot. The format of add_plot is nrows, ncols ,cells. But I get an error when I try to implement it. How can I modify the contents of the for loop within Graphing() to achieve this?
Error:
ValueError: Single argument to subplot must be a three-digit integer, not (2, 2, 1)
Code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6], 'col2': [6, 2, 1, 7, 3, 5, 3, 3, 9],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
def Graphing():
#Size of the figure
fig = plt.figure(figsize=(12, 7))
#Creating the dataframe
df = pd.DataFrame({
'date' : datetime,
'col1': data['col1']
})
for subplot_,add_plot in (zip(
['sub1','sub2','sub3'],
[(2,2,1), (2,2,1), (2,1,2)])):
subplot_ = fig.add_subplot(add_plot)
# Show Graph
plt.show()
Graphing()

This still raises a NameError: name 'datetime' is not defined as you didn't define datetime in your code, but shouldn't raise any error for the subplots.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6], 'col2': [6, 2, 1, 7, 3, 5, 3, 3, 9],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
def Graphing():
#Size of the figure
fig = plt.figure(figsize=(12, 7))
#Creating the dataframe
df = pd.DataFrame({
'date' : datetime,
'col1': data['col1']
})
for subplot_,add_plot in (zip(
['sub1','sub2','sub3'],
[(2,2,1), (2,2,1), (2,1,2)])):
subplot_ = fig.add_subplot(*add_plot)
# Show Graph
plt.show()
Graphing()

Related

Plotly line, changing colour to NaN segments

I have the following code
import plotly.express as px
import pandas as pd
import numpy as np
df = pd.DataFrame([1, None, None, 4, 6, None], columns=["y"])
df["x"] = [1, 2, 3, 4, 5, 6]
df["completed"] = [1, 0, 0, 1, 1, 0]
fig = px.line(df, x="x", y="y", markers=True, color="completed")
fig.show()
That results in the following plot
But I have to highlight (change the color line to red and add a dot point) in the cases that the dataframe has NaN value like in the following plot
Is there any way to do that easily? I have been looking for it but I'm not able to find a suitable solution.
Thanks in advance!
Found a solution using this https://community.plotly.com/t/change-color-of-continuous-line-based-on-value/68938/2
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import itertools as it
df = pd.DataFrame([1, None, None, 4, 6, None, 2, 1], columns=["y"])
df["x"] = [1, 2, 3, 4, 5, 6, 7, 8]
df["completed"] = [1, 0, 0, 1, 1, 0, 1, 1]
fig = go.Figure()
# generate color list
df.loc[df["y"].isna(), "line_color"] = "red"
df.loc[df["y"].isna(), "line_type"] = "dash"
df.loc[df["y"].notna(), "line_color"] = "black"
df.loc[df["y"].notna(), "line_type"] = "solid"
df["y"] = df["y"].interpolate(method="index")
# create coordinate pairs
x_pairs = it.pairwise(df["x"])
y_pairs = it.pairwise(df["y"])
for x, y, line_color, line_type in zip(
x_pairs,
y_pairs,
df["line_color"].values,
df["line_type"].values,
):
# create trace
fig.add_trace(
go.Scatter(
x=x,
y=y,
mode="lines",
line=dict(color=line_color, dash=line_type),
)
)
fig.show()
This is the new output for the plot.

Plot number of persons in each car

I have a pandas dataframe which looks like this:
car,id
1,1
1,2
2,3
2,4
2,5
and so on
What I want to do is make a lineplot in seaborn that shows how many ids there are in each car ( I dont care for which id that are in the car). So on the x axis I want to have the unique number of cars (so here [1,2]) and y-axis I want the "number" of cars that are repeated (so here [2,3]). I would like to use seaborn to plot.
What I have tried now is:
import seaborn as sns
#the df is the one above
sns.lineplot(x='car', y='car'.count(), data=df) #which is not working for obvious reasons
Any tips to do this?
If you specifically need a lineplot then this would work:
import pandas as pd
import seaborn as sns
data = {"car": [1, 1, 2, 2, 2], "id": [1, 2, 3, 4, 5]}
df = pd.DataFrame(data)
sns.lineplot(x="car", y="id", data=df.groupby('car').nunique())
Or could use value_counts() too:
car_count = df['car'].value_counts()
sns.lineplot(x=car_count.index, y=car_count.values)
import pandas as pd
MyDic = {"car": [1, 1, 2, 2, 2, 3], "id": [1, 2, 3, 4, 5, 6]}
MyDf = pd.DataFrame(MyDic)
print(MyDf)
>> car id
>> 0 1 1
>> 1 1 2
>> 2 2 3
>> 3 2 4
>> 4 2 5
>> 5 3 6
from collections import Counter
carCounter = Counter(MyDf["car"])
x, y = list(carCounter.keys()), list(carCounter.values())
print(f"{x=}, {y=}")
>>x=[1, 2, 3], y=[2, 3, 1]
Line plot in a seaborn needs both axis. The below code will run fine.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
cars = {"id": [1, 2, 3, 4, 5, 6], "car": [1, 1, 2, 2, 2, 3]}
dataset = pd.DataFrame(cars)
car_counts = dataset["car"].value_counts()
car_counts.sort_index(inplace=True)
sns.lineplot(x=car_counts.index, y=car_counts)
plt.show()

How to plot a two-dimensional chart with a boolean attribute defining colors

I have the following DataFrame:
LATITUDE LONGITUDE STATE
... ... True
With the code bellow I can plot the graph with coordinates
import matplotlib.pyplot as plt
plt.scatter(x=df['LAT'], y=df['LONG'])
plt.show()
graph
However, I want to define two different colors for each point according to the 'state' attribute
How to do this?
What you're looking for is the c parameter, taking your example and adding the STATUS column
import matplotlib.pyplot as plt
df = {'LAT': [1, 2, 3, 4, 5], 'LONG': [3, 2, 4, 5, 3], 'STATUS': [0, 1, 0, 0, 1] }
plt.scatter(x=df['LAT'], y=df['LONG'], c=df['STATUS'])
plt.show()
it shows a bicoloured chart

Plot Delaney triangulation grouped by value

I'm trying to plot a delaunay triangulation from a pandas df. I'm hoping to group the points by Time. At present, I'm getting an error when attempting to plot the point from the first time point.
QhullError: QH6214 qhull input error: not enough points(2) to construct initial simplex (need 6)
While executing: | qhull d Q12 Qt Qc Qz Qbb
Options selected for Qhull 2019.1.r 2019/06/21:
run-id 768388270 delaunay Q12-allow-wide Qtriangulate Qcoplanar-keep
Qz-infinity-point Qbbound-last _pre-merge _zero-centrum Qinterior-keep
_maxoutside 0
It appears it's only passing those two arrays as a single points.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import Delaunay
df = pd.DataFrame({
'Time' : [1,1,1,1,2,2,2,2],
'A_X' : [5, 5, 6, 6, 4, 3, 3, 4],
'A_Y' : [5, 6, 6, 5, 5, 6, 5, 6],
})
fig, ax = plt.subplots(figsize = (6,6))
ax.set_xlim(0,10)
ax.set_ylim(0,10)
ax.grid(False)
points_x1 = df.groupby("Time")["A_X"].agg(list).tolist()
points_y1 = df.groupby("Time")["A_Y"].agg(list).tolist()
points = list(zip(points_x1, points_y1))
tri = Delaunay(points[0])
#plot triangulation
plt.triplot(points[:,0], points[:,1], tri.simplices)
plt.plot(points[:,0], points[:,1], 'o')
You can take advantage of the apply method which allows to perform operation on Series.
def make_points(x):
return np.array(list(zip(x['A_X'], x['A_Y'])))
c = df.groupby("Time").apply(make_points)
Result is properly shaped array of points for each time bucket:
Time
1 [[5, 5], [5, 6], [6, 6], [6, 5]]
2 [[4, 5], [3, 6], [3, 5], [4, 6]]
dtype: object
Finally it suffices to compute the Delaunay triangulation for each time bucket and plot it:
fig, axe = plt.subplots()
for p in c:
tri = Delaunay(p)
axe.triplot(*p.T, tri.simplices)
You can even make it in a single call:
def make_triangulation(x):
return Delaunay(np.array(list(zip(x['A_X'], x['A_Y']))))
c = df.groupby("Time").apply(make_triangulation)
fig, axe = plt.subplots()
for tri in c:
axe.triplot(*tri.points.T, tri.simplices)

Animation multiple columns as dots with matplotlib very slow for large dataset with networkx graph as background

In my previous question, (How to Animate multiple columns as dots with matplotlib from pandas dataframe with NaN in python), I managed to animate multiple dots from a dataframe as an animation.
However, I wanted to set a background for the animation as a network graph, so that it seems that the dots are moving on the lines of the network.
Using the code from How to Animate multiple columns as dots with matplotlib from pandas dataframe with NaN in python
I've created a new MCV example,
the code is listed below:
import random
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
from matplotlib import animation
#from JSAnimation import IPython_display
%matplotlib inline
# initialise graph object
G = nx.Graph()
color_map =[]
G.add_node(1, pos=(1, 0)); color_map.append('r')
G.add_node(2, pos=(2, 0)); color_map.append('r')
G.add_node(3, pos=(3, -1)); color_map.append('r')
G.add_node(4, pos=(3, 1)); color_map.append('r')
G.add_node(5, pos=(4, -1)) ;color_map.append('r')
G.add_node(6, pos=(4, 1)); color_map.append('r')
G.add_node(7, pos=(5, 0)); color_map.append('r')
G.add_node(8, pos=(6, 0)); color_map.append('r')
e = [(1, 2, 1),
(2, 3, 1),
(2, 4, 2),
(3, 5, 5),
(4, 6, 2),
(5, 7, 1),
(6, 7, 2),
(7, 8, 1)]
G.add_weighted_edges_from(e)
labels = nx.get_edge_attributes(G,'weight')
nx.draw(G,nx.get_node_attributes(G, 'pos'))
nx.draw_networkx_edge_labels(G,nx.get_node_attributes(G, 'pos'),edge_labels=labels)
nx.draw_networkx_labels(G,nx.get_node_attributes(G, 'pos'))
df_x = pd.DataFrame(data=
np.array(
[[np.NaN, np.NaN, np.NaN, np.NaN],
[1, np.nan, np.NaN,np.NaN],
[1.5, 4, np.NaN,np.NaN],
[2, 5, 3,4]]
), index= [1, 2, 3, 4], columns=[1, 2, 3, 4])
print(df_x)
df_y = pd.DataFrame(data=np.array(
[[np.NaN, np.NaN, np.NaN, np.NaN],
[0, np.nan, np.NaN,np.NaN],
[0, -1, np.NaN,np.NaN],
[0, 0, 1,1]]
), index= [1, 2, 3, 4], columns=[1, 2, 3, 4])
%matplotlib notebook
from matplotlib import animation
#from JSAnimation import IPython_display
#from IPython.display import HTML
fig = plt.figure(figsize=(10,10))
ax = plt.axes()
nx.draw(G,nx.get_node_attributes(G, 'pos'),node_size = 10)
n_steps = df_x.index
graph, = plt.plot([],[],'o')
def get_data_x(i):
return df_x.loc[i]
def get_data_y(i):
return df_y.loc[i]
def animate(i):
x = get_data_x(i)
y= get_data_y(i)
graph.set_data(x,y)
return graph,
animation.FuncAnimation(fig, animate, frames=n_steps, repeat=True, blit = True)
This creates a workable animation, which works. But however, when I use a very large dataset ( pandas dataframe index is ~8000 rows * 800 columns instead of the example pandas dataset I posted), the animation takes very long(hour or so) to render and most of the times my browser( google chrome) crashes.
So I thought is maybe due to it needs to redraw the networks graph each frame? How can I set the background as the networkx graph? From there on it is just plotting points right? My actual graph is a bit larger (~5000 nodes, ~6000 edges).
Hopes anyone can help me speed the rendering of the animation up!
After some digging around, I found no 'easy' solution to this problem when trying to animate large datasets into an animation with matplotlib in a jupyter notebook. I just decided to write everything to an mp4 file, which works just as good for animations.
My code for this including the MVC example:
import random
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
from matplotlib import animation
#from JSAnimation import IPython_display
%matplotlib inline
# initialise graph object
G = nx.Graph()
color_map =[]
G.add_node(1, pos=(1, 0)); color_map.append('r')
G.add_node(2, pos=(2, 0)); color_map.append('r')
G.add_node(3, pos=(3, -1)); color_map.append('r')
G.add_node(4, pos=(3, 1)); color_map.append('r')
G.add_node(5, pos=(4, -1)) ;color_map.append('r')
G.add_node(6, pos=(4, 1)); color_map.append('r')
G.add_node(7, pos=(5, 0)); color_map.append('r')
G.add_node(8, pos=(6, 0)); color_map.append('r')
e = [(1, 2, 1),
(2, 3, 1),
(2, 4, 2),
(3, 5, 5),
(4, 6, 2),
(5, 7, 1),
(6, 7, 2),
(7, 8, 1)]
G.add_weighted_edges_from(e)
labels = nx.get_edge_attributes(G,'weight')
nx.draw(G,nx.get_node_attributes(G, 'pos'))
nx.draw_networkx_edge_labels(G,nx.get_node_attributes(G, 'pos'),edge_labels=labels)
nx.draw_networkx_labels(G,nx.get_node_attributes(G, 'pos'))
df_x = pd.DataFrame(data=
np.array(
[[np.NaN, np.NaN, np.NaN, np.NaN],
[1, np.nan, np.NaN,np.NaN],
[1.5, 4, np.NaN,np.NaN],
[2, 5, 3,4]]
), index= [1, 2, 3, 4], columns=[1, 2, 3, 4])
print(df_x)
df_y = pd.DataFrame(data=np.array(
[[np.NaN, np.NaN, np.NaN, np.NaN],
[0, np.nan, np.NaN,np.NaN],
[0, -1, np.NaN,np.NaN],
[0, 0, 1,1]]
), index= [1, 2, 3, 4], columns=[1, 2, 3, 4])
def get_data_x(i):
return df_x.loc[i]
def get_data_y(i):
return sdf_y.loc[i]
def animate(i):
x = get_data_x(i)
y= get_data_y(i)
graph.set_data(x,y)
return graph,
# Set up formatting for the movie files
Writer = animation.writers['ffmpeg']
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
fig = plt.figure(figsize=(20,20))
ax = plt.axes()
nx.draw(G,nx.get_node_attributes(G, 'pos'),node_size = 1)
n_steps = df_x.index
graph, = plt.plot([],[],'o')
ani = animation.FuncAnimation(fig, animate, frames= n_steps, interval=1, repeat=True, blit = True)
ani.save('path/file.mp4', writer=writer)

Categories