Have been trying to modify me plot such that the xlabels can be wrapped.
Have looked at few suggestions from similar questions.
But am unable to use them on this.
The ax.set_xticklabels code does not wrap the labels.
The plt.xticks code throws an error -
AttributeError: 'Text' object has no attribute 'expandtabs'
plt.figure(figsize = (7,5))
ax = sns.countplot(data = df3, x = df3.PaymentMethod, hue = df3.Churn)
#ax.set_xticklabels(ax.get_xticklabels(), ha="right", horizontalalignment = 'center', wrap = True)
plt.xticks([textwrap.fill(label, 10) for label in ax.get_xticklabels()],
rotation = 10, fontsize=8, horizontalalignment="center")
Image of plot with overlapping xlabels
textwrap works as expected with the code suggested in the comments:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import seaborn as sns # v 0.11.0
import textwrap
# Create sample dataset
rng = np.random.default_rng(seed=1)
cat_names = ['Short name', 'Slightly longer name', 'Rather much longer name',
'Longest name of them all by far']
counts = rng.integers(10, 100, len(cat_names))
var_cat = np.repeat(cat_names, counts)
var_bool = rng.choice(['True', 'False'], size=len(var_cat))
df = pd.DataFrame(dict(vcat=var_cat, vbool=var_bool))
# Plot seaborn countplot with wrapped tick labels
ax = sns.countplot(data=df, x='vcat', hue='vbool')
labels = [textwrap.fill(label.get_text(), 12) for label in ax.get_xticklabels()]
ax.set_xticklabels(labels);
Related
I have done the following timeline chart in Python. Where the data is in quarterly format by datetimeindex. However, I need to translate the graph into my local language and therefore replace "Q1", "Q2", "Q3", "Q4" with "kv1", "kv2", "kv3", "kv4". Is this possible? So I need the x axsis to be kv3, kv4, kv1 2022, kv2 instead of Q3, Q4, Q1 2022, Q2 and so fourth.
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import pandas
plt.style.use('seaborn-whitegrid')
matplotlib.rcParams['font.sans-serif'] = "Arial"
matplotlib.rcParams['font.family'] = "Arial"
categories = ['Car','Train','Boat', 'Plane', 'Walk' ]
cat_dict = dict(zip(categories, range(1, len(categories)+1)))
val_dict = dict(zip(range(1, len(categories)+1), categories))
dates = pandas.DatetimeIndex(freq='Q', start='2021-09-30', end='2023-12-31')
values = [random.choice(categories) for _ in range(len(dates))]
df = pandas.DataFrame(data=values, index=dates, columns=['category'])
df['plotval'] = [float('NaN'),1,1,3,1,float('NaN'),5,2,1,float('NaN')]
df['plotval'][0] = np.nan
plt.rcParams["figure.figsize"] = 4,3.5
plt.figure(dpi=1000)
fig, ax = plt.subplots()
df['plotval'].plot(ax=ax, style='^',color='darkblue', label = "Renteheving", markersize=12)
ax.margins(0.2)
ax.spines['top'].set_visible(False)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: val_dict.get(x)))
plt.yticks( weight = 'bold')
I tried to add
plt.xlabel(["kv1", "kv2", "kv3", "kv4"])
Which gave me
Help is as always highly appreciated.
Try to add this to your code:
# Call draw to populate tick labels
plt.draw()
# Change major labels
new_major_labels = []
for label in ax.get_xticklabels(minor=False):
s = label.get_text()
label.set_text(s.replace('Q', 'kv'))
new_major_labels.append(label)
ax.set_xticklabels(new_major_labels, minor=False)
# Change minor labels
new_minor_labels = []
for label in ax.get_xticklabels(minor=True):
s = label.get_text()
label.set_text(s.replace('Q', 'kv'))
new_minor_labels.append(label)
ax.set_xticklabels(new_minor_labels, minor=True)
It throws a warning which I don't understand, but I think it does what you want.
I could not test it because I can't reproduce your graph, but this should work:
D = {'Q1':'kv1', 'Q2':'kv2', 'Q3':'kv3', 'Q4':'kv4'} # define a dictionnary to replace labels
labels = [i.get_text() for i in ax.get_xticklabels()] # get former labels
labels = [i if i not in D.keys() else D[i] for i in labels] # replace it if in dictionnary
ax.set_xticklabels(labels) # apply the new labels
I would like to plot two dataframes with a 'long' representation, and differing axis, to one plot using sns.lineplot(). Yet, I am failing plot it with a single legend containing the elements of both lineplots.
The issue is similar to this: Secondary axis with twinx(): how to add to legend?, though I'd like to use seaborn.
A minimal working example up to the point I got stuck is given below.
import pandas as pd
import seaborn as sns
import numpy as np
import itertools
# mock dataset
lst = range(1,11)
steps1 = list(itertools.chain.from_iterable(itertools.repeat(x, 4) for x in lst))
labels1 = ['A','B']*20
values1 = list(np.random.uniform(0,1,40))
df1 = pd.DataFrame({'steps':steps1, 'lab':labels1, 'vals':values1})
lst = range(6,11)
steps2 = list(itertools.chain.from_iterable(itertools.repeat(x, 4) for x in lst))
labels2 = ['C','D']*10
values2 = list(np.random.uniform(10,20,20))
df2 = pd.DataFrame({'steps':steps2, 'lab2':labels2, 'others':values2})
# plotting
fig, ax = plt.subplots()
fig = sns.lineplot(x='steps',y='vals', data=df1, hue='lab',palette='bright', legend='brief')
ax2 = ax.twinx()
fig2 = sns.lineplot(x='steps',y='others', hue='lab2', data=df2 ,palette='dark', legend='brief')
# How do I merge the legends into one?
# the solution below gives me one merged and one separate legend
h1,l1 = fig.get_legend_handles_labels()
h2,l2 = fig2.get_legend_handles_labels()
ax.legend(loc=3, handles=h1+h2, labels = l1+l2)
I just resolved it by removing the obsolete legend by ax2.get_legend().remove().
I want seaborn heatmap to display multiple values in each cell of the heatmap. Here is a manual example of what I want to see, just to be clear:
data = np.array([[0.000000,0.000000],[-0.231049,0.000000],[-0.231049,0.000000]])
labels = np.array([['A\nExtra Stuff','B'],['C','D'],['E','F']])
fig, ax = plt.subplots()
ax = sns.heatmap(data, annot = labels, fmt = '')
Here as an example to get seaborn.heat to display flightsRoundUp values in the cells.
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
def RoundUp(x):
return int(np.ceil(x/10)*10)
# Load the example flights dataset and conver to long-form
flights_long = sns.load_dataset("flights")
flights = flights_long.pivot("month", "year", "passengers")
flightsRoundUp = flights.applymap(RoundUp)
# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(9, 6))
sns.heatmap(flights, annot=flightsRoundUp, fmt="", linewidths=.5, ax=ax)
What is the best way to display both flightsRoundUp and flights in all cells? Something like the first manual example above, but for all the cells in a vectorized-like way...
Rotail's answer didn't work for me, I got an error when applying that lambda function.
However, I found a solution that exploits the fact that seaborn plots sequential figures on top of each other. All you have to do is use one call to heatmap to establish the figure, and then a subsequent call for each of the annotations. Use the annot_kws arg to make sure the text aren't written over eachother.
X = pd.DataFrame({'a':[1, 2, 3], 'b':[4, 5, 6]})
Y = pd.DataFrame({'A':['A', 'B', 'C'], 'B':['E', 'F', 'G']})
Z = pd.DataFrame({'A':['(Extra Stuff)', '(Extra Stuff)', '(Extra Stuff)'], 'B':['(Extra Stuff)', '(Extra Stuff)', '(Extra Stuff)']})
sns.heatmap(X, annot=False)
sns.heatmap(X, annot=Y, annot_kws={'va':'bottom'}, fmt="", cbar=False)
sns.heatmap(X, annot=Z, annot_kws={'va':'top'}, fmt="", cbar=False)
Following works for me too:
X = pd.DataFrame({'a':[1, 2, np.nan], 'b':[10, 20, 30]})
Y = pd.DataFrame({'A':[11, 222, np.nan], 'B':[110, np.nan, 330]})
# convert to string
X_value_ann = (X).astype('|S5').reset_index()
Y_value_ann = (Y).astype('|S5').reset_index()
# define () and new line to glue on later
br = np.char.array(pd.DataFrame('\n(', index=X_value_ann.index, columns=X_value_ann.columns))
cl = np.char.array(pd.DataFrame(')', index=X_value_ann.index, columns=X_value_ann.columns))
# convert to chararray
X_value_ann = np.char.array(X_value_ann)
Y_value_ann = np.char.array(Y_value_ann)
# glue and reshape
my_annotation = pd.DataFrame(X_value_ann+br+Y_value_ann+cl)
my_annotation = my_annotation.applymap(lambda x: x.decode('utf-8'))
my_annotation = my_annotation.drop(columns=[0])
my_annotation
you should be able to set fmt="" and format you labels with appropriate "\n" to have multiple lines of annotations.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(0)
sns.set_theme()
uniform_data = np.random.rand(4, 4)
fig,ax = plt.subplots(figsize=(50,20))
uniform_data_labels = \[\]
for i in uniform_data:
tmp_arr=\[\]
for j in i:
tmp_arr.append('Example\nExample')
uniform_data_labels.append(tmp_arr)
sns.heatmap(uniform_data, vmin=0, vmax=1, annot=uniform_data_labels ,ax=ax,fmt="",annot_kws={"fontsize":30})
plt.show()
1
I have a pandas dataframe containing 20 columns containing a mixture of numeric and categorical data. I want to plot a 5x4 matrix of plots of the data. Using matplotlib and subplots I now have plots for all the numeric data but for the life of me I can't work out how to include the categorical data.
I want something like
df['RBC'].value_counts().plot(kind='bar')
But in a subplot.
Here is some of the code (I've omitted some of the repetitions for brevity).
from rdkit.Chem import AllChem as Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import PandasTools
from rdkit.Chem import Draw
import pybel
import pandas as pd
import matplotlib
import matplotlib.pyplot as p
import matplotlib.ticker as plticker
%matplotlib inline
#commandline application to calculate properties
output = !/Applications/ChemAxon/MarvinBeans/bin/evaluate /Users/username/Desktop/SampleFiles/Fragments.sdf -g -e "field('IDNUMBER'); molString('smiles'); logp(); logd('7.4'); apka('1'); bpka('1'); atomCount(); mass(); acceptorcount(); donorcount(); topologicalPolarSurfaceArea(); rotatablebondcount(); refractivity(); ASAHydrophobic('7.4'); ASAPolar('7.4'); atomCount()-atomCount('1');aromaticAtomCount()/(atomCount()-atomCount('1'))"
[line.split(';') for line in output]
cols = ['ID', 'smiles', 'logP', 'logD', 'apKa', 'bpKa', 'atomCount', 'mass', 'HBA', 'HBD', 'TPSA', 'RBC', 'MR', 'ASAh', 'ASAp', 'HAC', 'FractionAromatic']
df = pd.DataFrame([line.split(';') for line in output], columns=cols)
df = df.convert_objects(convert_numeric=True)
#series of calculations using the calculated data to add several categorical numeric and text fields to dataframe.
myLogP = df['logP']
myLogD = df['logD']
myMass = df['mass']
myTPSA = df['TPSA']
myRBC = df['RBC']
myRBCmax = max(myRBC) +1
myHBA = df['HBA']
myHBAmax = max(myHBA) +1
myHBD = df['HBD']
myHBDmax = max(myHBD) +1
myHAC = df['HAC']
myHACmax= range(min(myHAC), max(myHAC) + 1)
myFraromatic = df['FractionAromatic']
fig, axes = plt.subplots(nrows=5, ncols=4)
ax0, ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9, ax10, ax11, ax12, ax13, ax14, ax15, ax17, ax18, ax19 = axes.flat
axis_font = {'fontname':'Arial', 'size':'14'}
title_font = {'fontname':'Arial', 'size':'14', 'color' :'blue'}
loc = plticker.MultipleLocator(base=1.0)
ax0.hist(myLogP, histtype='bar')
ax0.set_title('LogP', title_font)
ax0.set_xlabel('Range of LogP', axis_font)
ax0.set_ylabel('Count')
ax1.hist(myLogD, histtype='bar')
ax1.set_title('LogD', title_font)
ax1.set_xlabel('Range of LogD', axis_font)
ax1.set_ylabel('Count', axis_font)
ax2.hist(myMass, histtype='bar', color = 'red')
ax2.set_title('Mass', title_font)
ax2.set_xlabel('Range of MWt', axis_font)
ax2.set_ylabel('Count', axis_font)
ax3.hist(myTPSA, histtype='bar', color = 'yellow')
ax3.set_title('TPSA', title_font)
ax3.set_xlabel('Range of TPSA', axis_font)
ax3.set_ylabel('Count', axis_font)
#etc.
#ax8 'AZBN' is a categorical text field
ax9.hist(myFraromatic, bins= 10, histtype='bar')
ax9.set_title('Aromatic', title_font)
ax9.set_xlabel('Fraction of Aromatic atoms', axis_font)
ax9.set_ylabel('Count', axis_font)
#further categorical plots
fig.set_size_inches(20, 15)
plt.tight_layout()
plt.show()
You should really post the code you've tried and some sample data. It's impossible to know the best approach otherwise. However, I think you might want to try the following approach which use the matplotlib API rather than pandas and gives you more control over what goes into each plot:
from matplotlib import pyplot as plt
fig, axes = plt.subplots(5, 4) # axes is a numpy array of pyplot Axes
axes = iter(axes.ravel()) # set up an iterator for the set of axes.
categoricals = df.columns[df.dtypes == 'category']
numeric = df.columns[df.dtypes != 'category']
for col in categoricals:
ax = df[col].value_counts().plot(kind='bar', ax=axes.next())
# do other stuff with ax, formatting etc.
# the plot method returns the axis used for the plot for further manipulation
for col in numeric:
ax = df[col].plot(ax=axes.next())
# etc.
This is just to give you ideas as I don't know the specifics of your data and how you want to plot each column, what kind of data types you have etc.
I'm trying to duplicate my y axis so that it appears on both the left and the right side of my graph (same scale on each side). I believe the correct way to do this is through the twiny method, but cannot get my head round it. Here is my current code:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def bar(data_df,
colour_df=None,
method='default',
ret_obj=False):
height = len(data_df.columns)*4
width = len(data_df.index)/4
ind = np.arange(len(data_df.index))
dat = data_df[data_df.columns[0]]
bar_width = 0.85
fig, ax = plt.subplots(figsize=(width,height))
ax1 = ax.bar(ind,dat,bar_width,color='y',log=True)
ax2 = ax1.twiny()
ax.tick_params(bottom='off', top='off', left='on', right='on')
plt.xticks(np.arange(len(data_df.index)) + bar_width,
data_df.index, rotation=67,ha='right')
ylab = 'Region Length (base pairs, log10)'
figname = 'bar' + method + '.png'
if ret_obj==False:
fig.savefig(figname,bbox_inches='tight',dpi=250)
print "Output figure:", figname
plt.close()
if ret_obj==True:
return fig
Which returns the following error when passed a dataframe:
AttributeError: 'BarContainer' object has no attribute 'twiny'
Having looked into it a bit further I believe that using the host/parasite methods would also work, but I'm a bit lost how I could fit it into my current code. Advice would be gratefully appreciated!
You don't have to use twiny in this case. It suffices to draw the labels on all sides:
bars = ax.bar(ind,dat,bar_width,color='y',log=True)
ax.tick_params(axis='both', which='both', labelbottom=True, labeltop=True,
labelleft=True, labelright=True)
I get following result with dummy data:
df = pd.DataFrame({"a": np.logspace(1,10,20)})
bar(df)