Pandas style.background_gradient ignore NaN

Pandas style.background_gradient ignore NaN - python

I have the following code to dump the dataframe results into a table in HTML, such that the columns in TIME_FRAMES are colored according to a colormap from seaborn.
import seaborn as sns
TIME_FRAMES = ["24h", "7d", "30d", "1y"]
# Set CSS properties for th elements in dataframe
th_props = [
('font-size', '11px'),
('text-align', 'center'),
('font-weight', 'bold'),
('color', '#6d6d6d'),
('background-color', '#f7f7f9')
]
# Set CSS properties for td elements in dataframe
td_props = [
('font-size', '11px')
]
cm = sns.light_palette("green", as_cmap=True)
s = (results.style.background_gradient(cmap=cm, subset=TIME_FRAMES)
.set_table_styles(styles))
a = s.render()
with open("test.html", "w") as f:
f.write(a)
From this, I get the warning:
/python3.7/site-packages/matplotlib/colors.py:512: RuntimeWarning:
invalid value encountered in less xa[xa < 0] = -1
And, as you can see in the picture below, the columns 30d and 1y don't get rendered correctly, as they have NaN's. How can I just make it so that the NaN's are ignored and the colors are rendered only using the valid values? Setting the NaN's to 0 is not a valid option, as NaN's here have a meaning by themselves.

A bit late, but for future reference.
I had the same problem, and here is how I solved it:
import pandas as pd
import numpy as np
dt = pd.DataFrame({'col1': [1,2,3,4,5], 'col2': [4,5,6,7,np.nan], 'col3': [8,2,6,np.nan,np.nan]})
First fill in the nas with a big value
dt.fillna(dt.max().max()+1, inplace=True)
Function to color the font of this max value white
def color_max_white(val, max_val):
color = 'white' if val == max_val else 'black'
return 'color: %s' % color
Function to color the background of the maximum value white
def highlight_max(data, color='white'):
attr = 'background-color: {}'.format(color)
if data.ndim == 1: # Series from .apply(axis=0) or axis=1
is_max = data == data.max()
return [attr if v else '' for v in is_max]
else: # from .apply(axis=None)
is_max = data == data.max().max()
return pd.DataFrame(np.where(is_max, attr, ''),
index=data.index, columns=data.columns)
Putting everything together
max_val = dt.max().max()
dt.style.format("{:.2f}").background_gradient(cmap='Blues', axis=None).applymap(lambda x: color_max_white(x, max_val)).apply(highlight_max, axis=None)
This link helped me for the answer

this works fine for me
df.style.applymap(lambda x: 'color: transparent' if pd.isnull(x) else '')

#quant 's answer almost worked for me but my background gradient would still use the max value to calculate the color gradient. I implemented #night-train 's suggestion to set the color map, then used two functions:
import copy
cmap = copy.copy(plt.cm.get_cmap("Blues"))
cmap.set_under("white")
def color_nan_white(val):
"""Color the nan text white"""
if np.isnan(val):
return 'color: white'
def color_nan_white_background(val):
"""Color the nan cell background white"""
if np.isnan(val):
return 'background-color: white'
And then applied them to my dataframe again borrowing from #quant with a slight modification for ease:
(df.style
.background_gradient(axis='index')
.applymap(lambda x: color_nan_white(x))
.applymap(lambda x: color_nan_white_background(x))
)
Then it worked perfectly.

Related

using applymap method to color cells based on condition

I would like to color some cells (in a data frame) based on their content using pandas, I did some tries but with no required results
this is my last failed try :
import pandas as pd
import dataframe_image as dfi
df = pd.read_excel('splice traitment.xlsx', sheet_name='Sheet4', usecols="B,C,D,E,F,G,H,I,J,K")
def color_cells(val):
color = 'red' if val == 7 else ''
return 'background-color: {}'.format(color)
df.style.applymap(color_cells)
dfi.export(df,"table.png")
Thank you very much

You can pass styled DataFrame to export method:
dfi.export(df.style.applymap(color_cells),"table.png")
Or asign to variable styled:
styled = df.style.applymap(color_cells)
dfi.export(styled,"table.png")

comparing two columns and highlighting differences in dataframe

What would be the best way to compare two columns and highlight if there is a difference between two columns in dataframe?
df = pd.DataFrame({'ID':['one2', 'one3', 'one3', 'one4' ],
'Volume':[5.0, 6.0, 7.0, 2.2],
'BOX':['one','two','three','four'],
'BOX2':['one','two','five','one hundred']})
I am trying to compare the BOX column and BOX2 column and I'd like to highlight the differences between them.

Maybe you can do something like this:
df.style.apply(lambda x: (x != df['BOX']).map({True: 'background-color: red; color: white', False: ''}), subset=['BOX2'])
Output (in Jupyter):

You might try something like:
def hl(d):
df = pd.DataFrame(columns=d.columns, index=d.index)
df.loc[d['BOX'].ne(d['BOX2']), ['BOX', 'BOX2']] = 'background: yellow'
return df
df.style.apply(hl, axis=None)
output:
for the whole row:
def hl(d):
df = pd.DataFrame(columns=d.columns, index=d.index)
df.loc[d['BOX'].ne(d['BOX2'])] = 'background: yellow'
return df
df.style.apply(hl, axis=None)
output:

Apply multiple styles to cell in pandas

So, basically I want to apply styles to cell and render to html.
This is what I have done so far:
import pandas as pd
import numpy as np
data = [dict(order='J23-X23', num=5, name='foo', value='YES'),
dict(order='Y23-X24', num=-4, name='bar', value='NO'),
dict(order='J24-X24', num=6, name='baz', value='NO'),
dict(order='X25-X24', num=-2, name='foobar', value='YES')]
df = pd.DataFrame(data)
def condformat(row):
cellBGColor = 'green' if row.value == 'YES' else 'blue'
color = 'background-color: {}'.format(cellBGColor)
return (color, color, color, color)
s = df.style.apply(condformat, axis=1)
with open('c:/temp/test.html', 'w') as f:
f.write(s.render())
My problem is two fold:
(1) Am I using the right rendering? Or is there a better way to do this? As in what does to_html give me?
(2) What if I have to add another style, say make -ve numbers in the format (35) in red color?

So, after battling with trial-error:
s = df.style.apply(condformat, axis=1).applymap(dataframe_negative_coloring)
with a new method:
def dataframe_negative_coloring(value):
''' Colors Dataframe rows with alternate colors.'''
color = 'red' if value < 0 else 'black'
return 'color: %s' % color
Any purists out there, feel free to advice if I can make this better

Highlight the maximum value in the dataframe and save as csv

enter code here I have code, where I am able to generate the maximum value, but I need to highlight that maximum value (or at least cell address of that value).
I tried to apply the style, but it is throwing some error:
AttributeError: module 'numpy' has no attribute 'style'
My code would be like this :
import pandas as pd
import numpy as np
dataset = pd.read_csv("ALL_CURVES.csv")
dataset['groups'] = dataset.index//192
Results=dataset.groupby('groups').max()
Results['groups'] = Results.index//20
outgrid=Results.iloc[:,1].values
#####
X=outgrid
x=np.array(X)
output=np.reshape(x, (9,-1 ))
max_output=output.max()
np.amax(output, axis=None, out=None)
np.style.apply(highlight_max, color='darkorange', axis=1)
#####
#print(output)
np.savetxt('output.csv', output,fmt="%2.2f", delimiter=',')

You can do something like this:
df = pd.DataFrame({'A':['15.45','78.7456','79.24', '75.45'],
'B':['78.6','45.23','45.4', '34.45'],
'C':['8.6','5.23','5.4', '4.45']})
print (df)
def highlight_max(data, color='red'):
attr = 'background-color: {}'.format(color)
data = data.astype(float)
if data.ndim == 1:
is_max = data == data.max()
return [attr if v else '' for v in is_max]
else:
is_max = data == data.max().max()
return pd.DataFrame(np.where(is_max, attr, ''),
index=data.index, columns=data.columns)
This highlights the maximum values in the dataframe
And with this you can save it to excel.
df = df.style.apply(highlight_max)
print(df)
dfPercent.to_excel('file.xlsx')
A similar answer is here Python Pandas - Highlighting maximum value in column

Pandas style: How to highlight diagonal elements

I was wondering how to highlight diagonal elements of pandas dataframe using df.style method.
I found this official link where they discuss how to highlight maximum value, but I am having difficulty creating function to highlight the diagonal elements.
Here is an example:
import numpy as np
import pandas as pd
df = pd.DataFrame({'a':[1,2,3,4],'b':[1,3,5,7],'c':[1,4,7,10],'d':[1,5,9,11]})
def highlight_max(s):
'''
highlight the maximum in a Series yellow.
'''
is_max = s == s.max()
return ['background-color: yellow' if v else '' for v in is_max]
df.style.apply(highlight_max)
This gives following output:
I am wanting a yellow highlight across the diagonal elements 1,3,7,11 only.
How to do that?

Using axis=None we can use numpy to easily set the diagonal styles (Credit for this goes to #CJR)
import numpy as np
import pandas as pd
def highlight_diag(df):
a = np.full(df.shape, '', dtype='<U24')
np.fill_diagonal(a, 'background-color: yellow')
return pd.DataFrame(a, index=df.index, columns=df.columns)
df.style.apply(highlight_diag, axis=None)
Original, really hacky solution
a = np.full(df.shape, '', dtype='<U24')
np.fill_diagonal(a, 'background-color: yellow')
df_diag = pd.DataFrame(a,
index=df.index,
columns=df.columns)
def highlight_diag(s, df_diag):
return df_diag[s.name]
df.style.apply(highlight_diag, df_diag=df_diag)

The trick is to use the axis=None parameter of the df.style.apply function in order to access the entire dataset:
import numpy as np
import pandas as pd
df = pd.DataFrame({'a':[1,2,3,4],'b':[1,3,5,7],'c':[1,4,7,10],'d':[1,5,9,11]})
def highlight_diag(data, color='yellow'):
'''
highlight the diag values in a DataFrame
'''
attr = 'background-color: {}'.format(color)
# create a new dataframe of the same structure with default style value
df_style = data.replace(data, '')
# fill diagonal with highlight color
np.fill_diagonal(df_style.values, attr)
return df_style
df.style.apply(highlight_diag, axis=None)

The other answer is pretty good but I already wrote this so....
def style_diag(data):
diag_mask = pd.DataFrame("", index=data.index, columns=data.columns)
min_axis = min(diag_mask.shape)
diag_mask.iloc[range(min_axis), range(min_axis)] = 'background-color: yellow'
return diag_mask
df = pd.DataFrame({'a':[1,2,3,4],'b':[1,3,5,7],'c':[1,4,7,10],'d':[1,5,9,11]})
df.style.apply(style_diag, axis=None)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pandas style.background_gradient ignore NaN - python

this works fine for me df.style.applymap(lambda x: 'color: transparent' if pd.isnull(x) else '')

Related

using applymap method to color cells based on condition

comparing two columns and highlighting differences in dataframe

Apply multiple styles to cell in pandas

Highlight the maximum value in the dataframe and save as csv

Pandas style: How to highlight diagonal elements

Categories

Resources