Related
This question already has answers here:
Color by Column Values in Matplotlib
(6 answers)
Closed 1 year ago.
I am trying to make a simple scatter plot in pyplot using a Pandas DataFrame object, but want an efficient way of plotting two variables but have the symbols dictated by a third column (key). I have tried various ways using df.groupby, but not successfully. A sample df script is below. This colours the markers according to 'key1', but Id like to see a legend with 'key1' categories. Am I close? Thanks.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
plt.show()
You can use scatter for this, but that requires having numerical values for your key1, and you won't have a legend, as you noticed.
It's better to just use plot for discrete categories like this. For example:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend()
plt.show()
If you'd like things to look like the default pandas style, then just update the rcParams with the pandas stylesheet and use its color generator. (I'm also tweaking the legend slightly):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
plt.rcParams.update(pd.tools.plotting.mpl_stylesheet)
colors = pd.tools.plotting._get_standard_colors(len(groups), color_type='random')
fig, ax = plt.subplots()
ax.set_color_cycle(colors)
ax.margins(0.05)
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend(numpoints=1, loc='upper left')
plt.show()
This is simple to do with Seaborn (pip install seaborn) as a oneliner
sns.scatterplot(x_vars="one", y_vars="two", data=df, hue="key1")
:
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(1974)
df = pd.DataFrame(
np.random.normal(10, 1, 30).reshape(10, 3),
index=pd.date_range('2010-01-01', freq='M', periods=10),
columns=('one', 'two', 'three'))
df['key1'] = (4, 4, 4, 6, 6, 6, 8, 8, 8, 8)
sns.scatterplot(x="one", y="two", data=df, hue="key1")
Here is the dataframe for reference:
Since you have three variable columns in your data, you may want to plot all pairwise dimensions with:
sns.pairplot(vars=["one","two","three"], data=df, hue="key1")
https://rasbt.github.io/mlxtend/user_guide/plotting/category_scatter/ is another option.
With plt.scatter, I can only think of one: to use a proxy artist:
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
x=ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ccm=x.get_cmap()
circles=[Line2D(range(1), range(1), color='w', marker='o', markersize=10, markerfacecolor=item) for item in ccm((array([4,6,8])-4.0)/4)]
leg = plt.legend(circles, ['4','6','8'], loc = "center left", bbox_to_anchor = (1, 0.5), numpoints = 1)
And the result is:
You can use df.plot.scatter, and pass an array to c= argument defining the color of each point:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
colors = np.where(df["key1"]==4,'r','-')
colors[df["key1"]==6] = 'g'
colors[df["key1"]==8] = 'b'
print(colors)
df.plot.scatter(x="one",y="two",c=colors)
plt.show()
From matplotlib 3.1 onwards you can use .legend_elements(). An example is shown in Automated legend creation. The advantage is that a single scatter call can be used.
In this case:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ax.legend(*sc.legend_elements())
plt.show()
In case the keys were not directly given as numbers, it would look as
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = list("AAABBBCCCC")
labels, index = np.unique(df["key1"], return_inverse=True)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = index, alpha = 0.8)
ax.legend(sc.legend_elements()[0], labels)
plt.show()
You can also try Altair or ggpot which are focused on declarative visualisations.
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
Altair code
from altair import Chart
c = Chart(df)
c.mark_circle().encode(x='x', y='y', color='label')
ggplot code
from ggplot import *
ggplot(aes(x='x', y='y', color='label'), data=df) +\
geom_point(size=50) +\
theme_bw()
It's rather hacky, but you could use one1 as a Float64Index to do everything in one go:
df.set_index('one').sort_index().groupby('key1')['two'].plot(style='--o', legend=True)
Note that as of 0.20.3, sorting the index is necessary, and the legend is a bit wonky.
seaborn has a wrapper function scatterplot that does it more efficiently.
sns.scatterplot(data = df, x = 'one', y = 'two', data = 'key1'])
I would like to plot points to Walls Tab 3D Scatter Plots with Matplotlib, something like below.
May I know, how can the code below can be modified or if someone can share with me any good resource to achieve the aforementioned objective
import re, seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap
data = {'th': [1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2],
'pdvalue': [0.5, 0.5, 0.5, 0.5, 0.2,0.2,0.2,0.2,0.3,0.3,0.4,0.1,1,1.1,3,1],
'my_val': [1.2,3.2,4,5.1,1,2,5.1,1,2,4,1,3,6,6,2,3],
'name':['a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d']}
df = pd.DataFrame(data)
# convert unique str into unique int
order_dict = {k: i for i, k in enumerate ( df ['name'])}
df ['name_int'] = df ['name'].map ( order_dict )
data_np=df.to_numpy()
# generate data
x = data_np[:,0]
y = data_np[:,1]
z = data_np[:,2]
# axes instance
fig = plt.figure(figsize=(10,6))
ax = Axes3D(fig)
# get colormap from seaborn
cmap = ListedColormap(sns.color_palette("husl", 256).as_hex())
# plot
sc = ax.scatter(x, y, z, s=40, c=data_np[:,4], marker='o', cmap=cmap, alpha=1)
ax.set_xlabel('th')
ax.set_ylabel('pdvalue')
ax.set_zlabel('my_val')
# legend
plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
I created a simple example below. It will give you some hints.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
data={'x':[2,3,4,5],
'y':[4,6,3,2],
'z':[5,12,4,7]
}
df = pd.DataFrame(data)
data_np=df.to_numpy()
x = data_np[:,0]
y = data_np[:,1]
z = data_np[:,2]
z2=np.ones(shape=x.shape)*min(z)
fig = plt.figure(figsize=(10,6))
ax = Axes3D(fig)
#scatter
sc = ax.scatter(x, y, z, s=40, marker='o',color='red', alpha=1)
#lines
for i,j,k,h in zip(x,y,z,z2):
ax.plot([i,i],[j,j],[k,h], color='red')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
Extension to the suggestion made by JohanC and by #Akroma, these two suggestion can be combine as below.
import seaborn as sns
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Create an example dataframe
data = {'th': [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2],
'pdvalue': [0.5, 0.5, 0.5, 0.5, 0.2, 0.2, 0.2, 0.2, 0.3, 0.3, 0.4, 0.1, 1, 1.1, 3, 1],
'my_val': [1.2, 3.2, 4, 5.1, 1, 2, 5.1, 1, 2, 4, 1, 3, 6, 6, 2, 3],
'name': ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b', 'c', 'd']}
df = pd.DataFrame ( data )
# axes instance
fig = plt.figure ( figsize=(10, 6) )
ax = Axes3D ( fig, auto_add_to_figure=False )
fig.add_axes ( ax )
# find all the unique labels in the 'name' column
labels = np.unique ( df['name'])
# get palette from seaborn
palette = sns.color_palette ( "husl", len ( labels ) )
# plot
for label, color in zip ( labels, palette ):
df1 = df[df['name'] == label]
ax.scatter ( df1['th'], df1['pdvalue'], df1['my_val'],
s = 40, marker = 'o', color = color, alpha = 1, label = label)
z2 = np.ones ( shape=df1['my_val'].shape[0] ) *min ( df1['my_val'] )
for i, j, k, h in zip ( df1 ['th'], df1 ['pdvalue'], df1 ['my_val'], z2 ):
ax.plot ( [i, i], [j, j], [k, h], color = color )
ax.set_xlabel ( 'th' )
ax.set_ylabel ( 'pdvalue' )
ax.set_zlabel ( 'my_val' )
# legend
plt.legend ( bbox_to_anchor=(1.05, 1), loc=2 )
plt.show ()
I created this tree map using Matplotlib and Squarify. Is there a way to display information about each axes when the mouse hovers over the axis?
The mplcursors library can be used to create custom annotations while hovering. Here is an example with a tree map:
import matplotlib.pyplot as plt
import matplotlib as mpl
import squarify
import mplcursors
sizes = [5, 20, 30, 25, 10, 12]
sumsizes = sum(sizes)
labels = ['A', 'B', 'C', 'D', 'E', 'F']
cmap = plt.cm.get_cmap('Greens')
norm = plt.Normalize(vmin=min(sizes), vmax=max(sizes))
colors = [cmap(norm(s)) for s in sizes]
squarify.plot(sizes=sizes, label=labels, color=colors)
plt.colorbar(plt.cm.ScalarMappable(cmap=cmap, norm=norm))
cursor = mplcursors.cursor(hover=True)
cursor.connect("add", lambda sel: sel.annotation.set_text(
f"ID:{sel.target.index} '{labels[sel.target.index]}'\nSize:{sizes[sel.target.index]} ({sizes[sel.target.index] * 100.0 / sumsizes:.1f} %)"))
plt.show()
This question already has answers here:
Color by Column Values in Matplotlib
(6 answers)
Closed 1 year ago.
I am trying to make a simple scatter plot in pyplot using a Pandas DataFrame object, but want an efficient way of plotting two variables but have the symbols dictated by a third column (key). I have tried various ways using df.groupby, but not successfully. A sample df script is below. This colours the markers according to 'key1', but Id like to see a legend with 'key1' categories. Am I close? Thanks.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
plt.show()
You can use scatter for this, but that requires having numerical values for your key1, and you won't have a legend, as you noticed.
It's better to just use plot for discrete categories like this. For example:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend()
plt.show()
If you'd like things to look like the default pandas style, then just update the rcParams with the pandas stylesheet and use its color generator. (I'm also tweaking the legend slightly):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
plt.rcParams.update(pd.tools.plotting.mpl_stylesheet)
colors = pd.tools.plotting._get_standard_colors(len(groups), color_type='random')
fig, ax = plt.subplots()
ax.set_color_cycle(colors)
ax.margins(0.05)
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend(numpoints=1, loc='upper left')
plt.show()
This is simple to do with Seaborn (pip install seaborn) as a oneliner
sns.scatterplot(x_vars="one", y_vars="two", data=df, hue="key1")
:
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(1974)
df = pd.DataFrame(
np.random.normal(10, 1, 30).reshape(10, 3),
index=pd.date_range('2010-01-01', freq='M', periods=10),
columns=('one', 'two', 'three'))
df['key1'] = (4, 4, 4, 6, 6, 6, 8, 8, 8, 8)
sns.scatterplot(x="one", y="two", data=df, hue="key1")
Here is the dataframe for reference:
Since you have three variable columns in your data, you may want to plot all pairwise dimensions with:
sns.pairplot(vars=["one","two","three"], data=df, hue="key1")
https://rasbt.github.io/mlxtend/user_guide/plotting/category_scatter/ is another option.
With plt.scatter, I can only think of one: to use a proxy artist:
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
x=ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ccm=x.get_cmap()
circles=[Line2D(range(1), range(1), color='w', marker='o', markersize=10, markerfacecolor=item) for item in ccm((array([4,6,8])-4.0)/4)]
leg = plt.legend(circles, ['4','6','8'], loc = "center left", bbox_to_anchor = (1, 0.5), numpoints = 1)
And the result is:
You can use df.plot.scatter, and pass an array to c= argument defining the color of each point:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
colors = np.where(df["key1"]==4,'r','-')
colors[df["key1"]==6] = 'g'
colors[df["key1"]==8] = 'b'
print(colors)
df.plot.scatter(x="one",y="two",c=colors)
plt.show()
From matplotlib 3.1 onwards you can use .legend_elements(). An example is shown in Automated legend creation. The advantage is that a single scatter call can be used.
In this case:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ax.legend(*sc.legend_elements())
plt.show()
In case the keys were not directly given as numbers, it would look as
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = list("AAABBBCCCC")
labels, index = np.unique(df["key1"], return_inverse=True)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = index, alpha = 0.8)
ax.legend(sc.legend_elements()[0], labels)
plt.show()
You can also try Altair or ggpot which are focused on declarative visualisations.
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
Altair code
from altair import Chart
c = Chart(df)
c.mark_circle().encode(x='x', y='y', color='label')
ggplot code
from ggplot import *
ggplot(aes(x='x', y='y', color='label'), data=df) +\
geom_point(size=50) +\
theme_bw()
It's rather hacky, but you could use one1 as a Float64Index to do everything in one go:
df.set_index('one').sort_index().groupby('key1')['two'].plot(style='--o', legend=True)
Note that as of 0.20.3, sorting the index is necessary, and the legend is a bit wonky.
seaborn has a wrapper function scatterplot that does it more efficiently.
sns.scatterplot(data = df, x = 'one', y = 'two', data = 'key1'])
I have a Pandas dataframe, and i want to plot it as matplotlib table. So far i have that part working with following code:
import numpy as np
randn = np.random.randn
from pandas import *
idx = Index(arange(1,11))
df = DataFrame(randn(10, 5), index=idx, columns=['A', 'B', 'C', 'D', 'E'])
vals = np.around(df.values,2)
fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(111, frameon=True, xticks=[], yticks=[])
the_table=plt.table(cellText=vals, rowLabels=df.index, colLabels=df.columns,
colWidths = [0.03]*vals.shape[1], loc='center')
table_props = the_table.properties()
table_cells = table_props['child_artists']
clm = cm.hot(vals)
for cell in table_cells:
cell.set_height(0.04)
# now i would like to set the backgroundcolor of the cell
At the end of this i would like to set the background-color of the cell according to the colormap - but how do i look it up in the clm array without an index?
Another question: can i somehow pass a format string to the table, so that it formats the text to 2 decimal places?
Any hints appreciated,
Andy
You can use plt.Normalize() to normalize your data, and the pass the normalized data to a Colormap object, for example plt.cm.hot().
plt.table() has an argument cellColours, which will be used to set the cells' background color accordingly.
Because cm.hot maps black to the minimal value, I increased the value range when creating the normalization object.
Here is the code:
from matplotlib import pyplot as plt
import numpy as np
randn = np.random.randn
from pandas import *
idx = Index(np.arange(1,11))
df = DataFrame(randn(10, 5), index=idx, columns=['A', 'B', 'C', 'D', 'E'])
vals = np.around(df.values,2)
norm = plt.Normalize(vals.min()-1, vals.max()+1)
colours = plt.cm.hot(normal(vals))
fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(111, frameon=True, xticks=[], yticks=[])
the_table=plt.table(cellText=vals, rowLabels=df.index, colLabels=df.columns,
colWidths = [0.03]*vals.shape[1], loc='center',
cellColours=colours)
plt.show()
The Andy's code working:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# sudo apt-get install python-pandas
# sudo apt-get install python-matplotlib
#
# python teste.py
from matplotlib import pyplot
from matplotlib import cm
import numpy
from pandas import *
idx = Index(numpy.arange(1, 11))
df = DataFrame(
numpy.random.randn(10, 5),
index=idx,
columns=['A', 'B', 'C', 'D', 'E']
)
vals = numpy.around(df.values, 2)
normal = pyplot.normalize(vals.min()-1, vals.max()+1)
fig = pyplot.figure(figsize=(15, 8))
ax = fig.add_subplot(111, frameon=True, xticks=[], yticks=[])
the_table = pyplot.table(
cellText=vals,
rowLabels=df.index,
colLabels=df.columns,
colWidths = [0.03]*vals.shape[1],
loc='center',
cellColours=pyplot.cm.hot(normal(vals))
)
pyplot.show()