Python: making column/row labels of matplotlib table bold

Python: making column/row labels of matplotlib table bold - python

I'm trying to figure out how to bold the column and row labels for a matplotlib table I'm making.
I've gone through the different table properties, and I can figure out how to style the individual cells, but not the actual columns or row labels.
Further, I'm not able to find out how to bold anything.. just font size, actual font, and color.
Any help?
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
fig, axs =plt.subplots(figsize = (10,6))
clust_data = np.random.random((10,3))
collabel=("col 1", "col 2", "col 3")
axs.axis('tight')
axs.axis('off')
df = pd.DataFrame(np.random.randn(10, 4),
columns=['a','b','c','d'],
index = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])
table = axs.table(cellText=df.values, colLabels = df.columns, rowLabels = df.index, loc='center')
plt.show()
EDIT:
Figured it out, though it's kind of clunky. You can find the columns/row labels in the "celld" property. You can then set it to bold using .set_text_props(fontproperties = FontProperties(weight = 'bold'). i.e.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import pandas as pd
fig, axs =plt.subplots(figsize = (10,6))
clust_data = np.random.random((10,3))
collabel=("col 1", "col 2", "col 3")
axs.axis('tight')
axs.axis('off')
df = pd.DataFrame(np.random.randn(10, 4),
columns=['a','b','c','d'],
index = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])
table = axs.table(cellText=df.values, colLabels = df.columns, rowLabels = df.index, loc='center')
P = []
for key, cell in table.get_celld().items():
row, col = key
P.append(cell)
for x in P[40:]:
x.set_text_props(fontproperties=FontProperties(weight='bold'))
plt.show()

A slightly better method, following the documentation:
from matplotlib.font_manager import FontProperties
for (row, col), cell in table.get_celld().items():
if (row == 0) or (col == -1):
cell.set_text_props(fontproperties=FontProperties(weight='bold'))

Related

How to create a grouped scatter plot in python [duplicate]

This question already has answers here:
Color by Column Values in Matplotlib
(6 answers)
Closed 1 year ago.
I am trying to make a simple scatter plot in pyplot using a Pandas DataFrame object, but want an efficient way of plotting two variables but have the symbols dictated by a third column (key). I have tried various ways using df.groupby, but not successfully. A sample df script is below. This colours the markers according to 'key1', but Id like to see a legend with 'key1' categories. Am I close? Thanks.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
plt.show()

You can use scatter for this, but that requires having numerical values for your key1, and you won't have a legend, as you noticed.
It's better to just use plot for discrete categories like this. For example:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend()
plt.show()
If you'd like things to look like the default pandas style, then just update the rcParams with the pandas stylesheet and use its color generator. (I'm also tweaking the legend slightly):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
plt.rcParams.update(pd.tools.plotting.mpl_stylesheet)
colors = pd.tools.plotting._get_standard_colors(len(groups), color_type='random')
fig, ax = plt.subplots()
ax.set_color_cycle(colors)
ax.margins(0.05)
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend(numpoints=1, loc='upper left')
plt.show()

This is simple to do with Seaborn (pip install seaborn) as a oneliner
sns.scatterplot(x_vars="one", y_vars="two", data=df, hue="key1")
:
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(1974)
df = pd.DataFrame(
np.random.normal(10, 1, 30).reshape(10, 3),
index=pd.date_range('2010-01-01', freq='M', periods=10),
columns=('one', 'two', 'three'))
df['key1'] = (4, 4, 4, 6, 6, 6, 8, 8, 8, 8)
sns.scatterplot(x="one", y="two", data=df, hue="key1")
Here is the dataframe for reference:
Since you have three variable columns in your data, you may want to plot all pairwise dimensions with:
sns.pairplot(vars=["one","two","three"], data=df, hue="key1")
https://rasbt.github.io/mlxtend/user_guide/plotting/category_scatter/ is another option.

With plt.scatter, I can only think of one: to use a proxy artist:
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
x=ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ccm=x.get_cmap()
circles=[Line2D(range(1), range(1), color='w', marker='o', markersize=10, markerfacecolor=item) for item in ccm((array([4,6,8])-4.0)/4)]
leg = plt.legend(circles, ['4','6','8'], loc = "center left", bbox_to_anchor = (1, 0.5), numpoints = 1)
And the result is:

You can use df.plot.scatter, and pass an array to c= argument defining the color of each point:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
colors = np.where(df["key1"]==4,'r','-')
colors[df["key1"]==6] = 'g'
colors[df["key1"]==8] = 'b'
print(colors)
df.plot.scatter(x="one",y="two",c=colors)
plt.show()

From matplotlib 3.1 onwards you can use .legend_elements(). An example is shown in Automated legend creation. The advantage is that a single scatter call can be used.
In this case:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ax.legend(*sc.legend_elements())
plt.show()
In case the keys were not directly given as numbers, it would look as
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = list("AAABBBCCCC")
labels, index = np.unique(df["key1"], return_inverse=True)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = index, alpha = 0.8)
ax.legend(sc.legend_elements()[0], labels)
plt.show()

You can also try Altair or ggpot which are focused on declarative visualisations.
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
Altair code
from altair import Chart
c = Chart(df)
c.mark_circle().encode(x='x', y='y', color='label')
ggplot code
from ggplot import *
ggplot(aes(x='x', y='y', color='label'), data=df) +\
geom_point(size=50) +\
theme_bw()

It's rather hacky, but you could use one1 as a Float64Index to do everything in one go:
df.set_index('one').sort_index().groupby('key1')['two'].plot(style='--o', legend=True)
Note that as of 0.20.3, sorting the index is necessary, and the legend is a bit wonky.

seaborn has a wrapper function scatterplot that does it more efficiently.
sns.scatterplot(data = df, x = 'one', y = 'two', data = 'key1'])

How to generate back to back chart in D3?

I am using Matplotlib for a back to back plot:
import numpy as np
import matplotlib.pyplot as plt
# create data
A = np.array([3,6,9,4,2,5])
B = np.array([2,8,1,9,7,3])
X = np.arange(6)
# plot the bars
plt.barh(X, A, align='center',
alpha=0.9, color = 'y')
plt.barh(X, -B, align='center',
alpha=0.6, color = 'c')
plt.yticks([0, 1, 2,3,4,5], ['A', 'B', 'C', 'D', 'E', 'F'])
plt.xticks([], [])
plt.show()
I am wondering how to generate d3 code using python?
Is Altair the right option?

Here is an example of generating a similar chart with Altair:
import pandas as pd
import altair as alt
df = pd.DataFrame({
"A": np.array([3,6,9,4,2,5]),
"B": np.array([2,8,1,9,7,3]),
"X": ['A', 'B', 'C', 'D', 'E', 'F'],
})
alt.Chart(df).transform_calculate(
A=-alt.datum.A
).transform_fold(
["A", "B"], as_=["key", "value"]
).mark_bar().encode(
x=alt.X("value:Q", axis=None),
y='X:N',
color="key:N"
).properties(
width=300,
height=200
)

Missing labels in matplotlib pie chart

I try to plot a pie chart using Python 3 Matplotlib v2.2.2-4build1 on Ubuntu 18.10. Everything seems to be ok except labels - they are missing. Tried to add it according to official documentation (https://matplotlib.org/api/_as_gen/matplotlib.pyplot.pie.html), tried to use an example from the web (https://pythonspot.com/matplotlib-pie-chart/) - same result, no labels.
Here is a simplified version of my code:
import numpy as np
import matplotlib.pyplot as plt
import sys
headers = ['a', 'b', 'c', 'd', 'e']
values = [5, 4, 3, 2, 1]
sum = sum(values)
labels = []
for v in values:
labels.append('{:.1f}%'.format(100 * v / sum))
fig, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
wedges, texts = ax.pie(values, labels=labels, textprops=dict(color="w"))
plt.show()
Here is what I see - no labels:
Tried to use a tuple instead of a list - same thing.
Could anybody help me?

You might want to make the color of your labels non-white on a white background :)
Also using sum as a variable name overwrites the function, so your're better off choosing something else.
import numpy as np
import matplotlib.pyplot as plt
import sys
headers = ['a', 'b', 'c', 'd', 'e']
values = [5, 4, 3, 2, 1]
sumT = sum(values)
labels = []
for v in values:
labels.append('{:.1f}%'.format(100 * v / sumT))
fig, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
wedges, texts = ax.pie(values, labels=labels, textprops=dict(color="k"))
plt.show()
Or if you want the labels to be inside:
import numpy as np
import matplotlib.pyplot as plt
import sys
def func(pct, allvals):
absolute = int(pct/100.*sum(allvals))
return "{:.1f}%)".format(pct)
headers = ['a', 'b', 'c', 'd', 'e']
values = [5, 4, 3, 2, 1]
sumT = sum(values)
labels = []
for v in values:
labels.append('{:.1f}%'.format(100 * v / sumT))
fig, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
wedges, texts = ax.pie(values, autopct=lambda pct: func(pct,
values), textprops=dict(color="w"))
plt.show()

import numpy as np
import matplotlib.pyplot as plt
import sys
headers = ['a', 'b', 'c', 'd', 'e']
values = [5, 4, 3, 2, 1]
colors=['yellow','blue','red','pink','green']
plt.pie(values,labels=headers,
colors=colors,autopct='%1.2f%%',
shadow=True,startangle=90)
plt.title('pie chart')
plt.show()

Adding the plt.legend() statement before the plt.show() will do the job.
import numpy as np
import matplotlib.pyplot as plt
import sys
headers = ['a', 'b', 'c', 'd', 'e']
values = [5, 4, 3, 2, 1]
labels = []
for v in values:
labels.append('{:.1f}%'.format(100 * v / sum))
fig, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
wedges, texts = ax.pie(values, labels=labels, textprops=dict(color="w"))
plt.legend()
plt.show()

matplotlib subplot - colors are alternate

I have a code here which prints various subplots. However, the second subplot is always alternate in color. How do I fix this such that all colors are consistent?
As you can see, the second subplot has its colors opposite of the first and third. This is consistent for every column
hrlist = [hrdata2015, hrdata2016, hrdata2017]
titles = ["2015", "2016", "2017"]
columns = ["Sex","Education Level","Salary Plan","Grade",
"Contract Type","Citizenship", "Division"]
for h in columns:
plt.figure(figsize=(40,40))
j = 0
for i in range(len(hrlist)):
j +=1
plt.subplot(2,2,j)
ax1 = sns.countplot(data=hrlist[i],x= h,hue="HR Status", order = hrlist[i][h].value_counts().index)
ax1.set_title(titles[i])
ax1.legend(loc = "upper right", prop={'size': 12})
if(h=="Education Level" or h=="Grade"):
plt.xticks(fontsize = 9)
elif (h == "Division"):
plt.xticks(rotation = 60, fontsize = 8)
else:
plt.xticks(fontsize = 12)
for p in ax1.patches:
height = p.get_height()
ax1.text(p.get_x()+p.get_width()/2,
height + 1,
'{:1.0f}'.format(height,0),
ha="center",rotation=0)
plt.tight_layout()
plt.subplots_adjust(top=0.948,
bottom=0.115,
left=0.052,
right=0.986,
hspace=0.533,
wspace=0.128)
plt.show()

My guess is your hrdata2016 happen to have the first row being "Inactive", while your hrdata2015 and hrdata2017 both have the first row being "Active". Since you didn't define the hue(color) order, the order in the DataFrame was used. Define hue(color) order by hue_order argument like this:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df1 = pd.DataFrame([['F', 'Active'],
['M', 'Inactive'],
['F', 'Inactive'],
['M', 'Active'],
['F', 'Inactive'],
['M', 'Inactive'],
['F', 'Active']], columns=['Sex', 'HR Status'])
df2 = df1.drop(0)
df3 = df2.drop(1)
hrlist = [df1, df2, df3]
h = 'Sex'
for i in range(len(hrlist)):
plt.subplot(2,2,i+1)
# ax1 = sns.countplot(data=hrlist[i], x=h, hue="HR Status", order=hrlist[i][h].value_counts().index)
ax1 = sns.countplot(data=hrlist[i], x=h, hue="HR Status",
order=hrlist[i][h].value_counts().index,
hue_order=hrlist[i]["HR Status"].value_counts().index)
plt.show()

Scatter plots in Pandas/Pyplot: How to plot by category [duplicate]

This question already has answers here:
Color by Column Values in Matplotlib
(6 answers)
Closed 1 year ago.
I am trying to make a simple scatter plot in pyplot using a Pandas DataFrame object, but want an efficient way of plotting two variables but have the symbols dictated by a third column (key). I have tried various ways using df.groupby, but not successfully. A sample df script is below. This colours the markers according to 'key1', but Id like to see a legend with 'key1' categories. Am I close? Thanks.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
plt.show()

You can use scatter for this, but that requires having numerical values for your key1, and you won't have a legend, as you noticed.
It's better to just use plot for discrete categories like this. For example:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend()
plt.show()
If you'd like things to look like the default pandas style, then just update the rcParams with the pandas stylesheet and use its color generator. (I'm also tweaking the legend slightly):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
groups = df.groupby('label')
# Plot
plt.rcParams.update(pd.tools.plotting.mpl_stylesheet)
colors = pd.tools.plotting._get_standard_colors(len(groups), color_type='random')
fig, ax = plt.subplots()
ax.set_color_cycle(colors)
ax.margins(0.05)
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=name)
ax.legend(numpoints=1, loc='upper left')
plt.show()

This is simple to do with Seaborn (pip install seaborn) as a oneliner
sns.scatterplot(x_vars="one", y_vars="two", data=df, hue="key1")
:
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(1974)
df = pd.DataFrame(
np.random.normal(10, 1, 30).reshape(10, 3),
index=pd.date_range('2010-01-01', freq='M', periods=10),
columns=('one', 'two', 'three'))
df['key1'] = (4, 4, 4, 6, 6, 6, 8, 8, 8, 8)
sns.scatterplot(x="one", y="two", data=df, hue="key1")
Here is the dataframe for reference:
Since you have three variable columns in your data, you may want to plot all pairwise dimensions with:
sns.pairplot(vars=["one","two","three"], data=df, hue="key1")
https://rasbt.github.io/mlxtend/user_guide/plotting/category_scatter/ is another option.

With plt.scatter, I can only think of one: to use a proxy artist:
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig1 = plt.figure(1)
ax1 = fig1.add_subplot(111)
x=ax1.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ccm=x.get_cmap()
circles=[Line2D(range(1), range(1), color='w', marker='o', markersize=10, markerfacecolor=item) for item in ccm((array([4,6,8])-4.0)/4)]
leg = plt.legend(circles, ['4','6','8'], loc = "center left", bbox_to_anchor = (1, 0.5), numpoints = 1)
And the result is:

You can use df.plot.scatter, and pass an array to c= argument defining the color of each point:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3), index = pd.date_range('2010-01-01', freq = 'M', periods = 10), columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
colors = np.where(df["key1"]==4,'r','-')
colors[df["key1"]==6] = 'g'
colors[df["key1"]==8] = 'b'
print(colors)
df.plot.scatter(x="one",y="two",c=colors)
plt.show()

From matplotlib 3.1 onwards you can use .legend_elements(). An example is shown in Automated legend creation. The advantage is that a single scatter call can be used.
In this case:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = (4,4,4,6,6,6,8,8,8,8)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = df['key1'], alpha = 0.8)
ax.legend(*sc.legend_elements())
plt.show()
In case the keys were not directly given as numbers, it would look as
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.normal(10,1,30).reshape(10,3),
index = pd.date_range('2010-01-01', freq = 'M', periods = 10),
columns = ('one', 'two', 'three'))
df['key1'] = list("AAABBBCCCC")
labels, index = np.unique(df["key1"], return_inverse=True)
fig, ax = plt.subplots()
sc = ax.scatter(df['one'], df['two'], marker = 'o', c = index, alpha = 0.8)
ax.legend(sc.legend_elements()[0], labels)
plt.show()

You can also try Altair or ggpot which are focused on declarative visualisations.
import numpy as np
import pandas as pd
np.random.seed(1974)
# Generate Data
num = 20
x, y = np.random.random((2, num))
labels = np.random.choice(['a', 'b', 'c'], num)
df = pd.DataFrame(dict(x=x, y=y, label=labels))
Altair code
from altair import Chart
c = Chart(df)
c.mark_circle().encode(x='x', y='y', color='label')
ggplot code
from ggplot import *
ggplot(aes(x='x', y='y', color='label'), data=df) +\
geom_point(size=50) +\
theme_bw()

It's rather hacky, but you could use one1 as a Float64Index to do everything in one go:
df.set_index('one').sort_index().groupby('key1')['two'].plot(style='--o', legend=True)
Note that as of 0.20.3, sorting the index is necessary, and the legend is a bit wonky.

seaborn has a wrapper function scatterplot that does it more efficiently.
sns.scatterplot(data = df, x = 'one', y = 'two', data = 'key1'])

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: making column/row labels of matplotlib table bold - python

A slightly better method, following the documentation: from matplotlib.font_manager import FontProperties for (row, col), cell in table.get_celld().items(): if (row == 0) or (col == -1): cell.set_text_props(fontproperties=FontProperties(weight='bold'))

Related

How to create a grouped scatter plot in python [duplicate]

How to generate back to back chart in D3?

Missing labels in matplotlib pie chart

matplotlib subplot - colors are alternate

Scatter plots in Pandas/Pyplot: How to plot by category [duplicate]

Categories

Resources