So, basically I want to apply styles to cell and render to html.
This is what I have done so far:
import pandas as pd
import numpy as np
data = [dict(order='J23-X23', num=5, name='foo', value='YES'),
dict(order='Y23-X24', num=-4, name='bar', value='NO'),
dict(order='J24-X24', num=6, name='baz', value='NO'),
dict(order='X25-X24', num=-2, name='foobar', value='YES')]
df = pd.DataFrame(data)
def condformat(row):
cellBGColor = 'green' if row.value == 'YES' else 'blue'
color = 'background-color: {}'.format(cellBGColor)
return (color, color, color, color)
s = df.style.apply(condformat, axis=1)
with open('c:/temp/test.html', 'w') as f:
f.write(s.render())
My problem is two fold:
(1) Am I using the right rendering? Or is there a better way to do this? As in what does to_html give me?
(2) What if I have to add another style, say make -ve numbers in the format (35) in red color?
So, after battling with trial-error:
s = df.style.apply(condformat, axis=1).applymap(dataframe_negative_coloring)
with a new method:
def dataframe_negative_coloring(value):
''' Colors Dataframe rows with alternate colors.'''
color = 'red' if value < 0 else 'black'
return 'color: %s' % color
Any purists out there, feel free to advice if I can make this better
Related
I would like to color some cells (in a data frame) based on their content using pandas, I did some tries but with no required results
this is my last failed try :
import pandas as pd
import dataframe_image as dfi
df = pd.read_excel('splice traitment.xlsx', sheet_name='Sheet4', usecols="B,C,D,E,F,G,H,I,J,K")
def color_cells(val):
color = 'red' if val == 7 else ''
return 'background-color: {}'.format(color)
df.style.applymap(color_cells)
dfi.export(df,"table.png")
Thank you very much
You can pass styled DataFrame to export method:
dfi.export(df.style.applymap(color_cells),"table.png")
Or asign to variable styled:
styled = df.style.applymap(color_cells)
dfi.export(styled,"table.png")
I am using a jupyter notebook for this project. I am trying to add conditional formatting to my data frame. I would like to give the negative numbers a red background and the positive numbers a green background and if possible get rid of the row numbers. The code I am trying to use down at the bottom does not give back any errors.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data
tickers = ['SPY', 'TLT', 'XLY', 'XLF', 'XLV', 'XLK', 'XLP', 'XLI', 'XLB', 'XLE', 'XLU', 'XLRE', 'XLC']
df_list = []
for ticker in tickers:
prices = data.DataReader(ticker, 'yahoo', '2021')['Close']
# get all timestamps for specific lookups
today = prices.index[-1]
yest= prices.index[-2]
start = prices.index[0]
week = today - pd.tseries.offsets.Week(weekday=0)
month = today - pd.tseries.offsets.BMonthBegin()
quarter = today - pd.tseries.offsets.BQuarterBegin(startingMonth=1)
# calculate percentage changes
close = prices[today]
daily = (close - prices[yest]) / prices[yest]*100
wtd = (close - prices[week]) / prices[week]*100
mtd = (close - prices[month]) / prices[month]*100
qtd = (close - prices[quarter]) / prices[quarter]*100
ytd = (close - prices[start]) / prices[start]*100
# create temporary frame for current ticker
df = pd.DataFrame(data=[[ticker, close, daily, wtd, mtd, qtd, ytd]],
columns=['Stock', 'Close', 'Daily%', 'WTD%', 'MTD%', 'QTD%', 'YTD%'])
df_list.append(df)
# stack all frames
df = pd.concat(df_list, ignore_index=True)
#conditional formatting highlight negative numbers red background and positive numbers green background in return data
def color_negative_red(value):
"""
Colors elements in a dateframe
green if positive and red if
negative. Does not color NaN
values.
"""
if value < 0:
background_color = 'red'
elif value > 0:
background_color = 'green'
else:
background_color = ''
return 'background_color: %s' % background_color
df.style.applymap(color_negative_red, subset=['Daily%', 'WTD%', 'MTD%', 'QTD%', 'YTD%']).format({
'Close': '{:,.2f}'.format,
'Daily%': '{:,.2f}%'.format,
'WTD%': '{:,.2f}%'.format,
'MTD%': '{:,.2f}%'.format,
'QTD%': '{:,.2f}%'.format,
'YTD%': '{:,.2f}%'.format,
})
output:
desired output something like this with red and green instead of yellow:
In some regards this specific issue with the styling is simple. The CSS property is background-color not background_color.
The only change strictly necessary is to get the styling working is:
return 'background-color: %s' % background_color
However, in regards to the general approach, as well as the question of how to remove the "row numbers", there are a few changes to be made.
Columns can be programmatically filtered with something like str.endswith instead of manually typing out the columns. Also a variable can be helpful when re-using the same column subset.
It is more common to create a 2d structure representing all the styles with something like np.select which accepts a list of conditions and a list of outputs.
The Styler.format can be subset. So instead of lots of copies of the same format string in a dictionary take advantage of this.
The Styler.hide_index method can be used to remove the "row numbers", also known as the DataFrame index.
import numpy as np
def color_negative_red(subset_df):
# Create Styles based on multiple conditions with np.select
return np.select(
[subset_df < 0, subset_df > 0],
['background-color: red', 'background-color: green'],
default=''
)
# Select Subset of columns
cols = df.columns[df.columns.str.endswith('%')]
# This can also be hard-coded
# cols = ['Daily%', 'WTD%', 'MTD%', 'QTD%', 'YTD%']
(
df.style.apply(
color_negative_red, subset=cols, axis=None
).format(
# This can be used to set general number styles without a fmt string
precision=2, thousands=",", subset='Close'
).format(
# Apply percent format to subset of cols
'{:,.2f}%', subset=cols
).hide_index() # To remove the "row numbers" hide the index
)
Sample DataFrame used:
df = pd.DataFrame({'Stock': ['SPY', 'TLT', 'XLY', 'XLF', 'XLV', 'XLK', 'XLP', 'XLI', 'XLB', 'XLE', 'XLU', 'XLRE', 'XLC'], 'Close': [453.0799865722656, 148.17999267578125, 183.52000427246094, 38.06999969482422, 136.85000610351562, 159.36000061035156, 72.83000183105469, 104.58999633789062, 85.37000274658203, 48.619998931884766, 69.48999786376953, 48.72999954223633, 85.47000122070312], 'Daily%': [-0.024275881759957655, -0.9094561130207558, -0.05990339673614984, -0.6264726438619272, 0.10973603410557096, 0.39690348375861295, -0.12341791024475374, -0.6081982311376585, -0.66325305868224, -0.5726024238382843, -0.8277463016708558, 0.0, 0.0], 'WTD%': [0.18795205211694732, -1.1144566965053964, 0.1692033989594774, -0.9882992472694656, 1.078371165114885, -0.2128968969240593, 1.0124873403369272, -0.20038807991586702, -0.5706939905500718, -0.22573488894909668, 1.2678447717357961, 2.8059028977694966, 0.19929444649862676], 'MTD%': [0.28331093711392863, -0.47686662420427856, -0.06534258200082742, -0.28811055388244855, 1.1829989674792052, 0.2894944817725163, 0.26156723981653324, 0.4610467051436526, -0.023418015235201397, 1.9500950554754906, 0.07199801354481171, 0.4742258602810889, -0.6740288326379547], 'QTD%': [5.262178352321592, 2.7814348045817643, 2.2053947247214483, 2.9475396594933923, 7.6965502732190245, 7.741191714387993, 4.4008021800644235, 1.4156880862558818, 3.1536980944675, -11.293563740764009, 8.714017294090489, 9.481018602199223, 4.781171436152855], 'YTD%': [22.855819321112786, -5.941348869649508, 15.117304215482205, 30.914717313288243, 21.15981390085853, 24.597340663914796, 9.157674608158786, 21.039222202640808, 19.065559804985433, 28.082192056143985, 13.69437021949676, 37.73317713704715, 28.56497729371572]})
I have a dataframe with several columns and a list with colors associated with each column. I want to highlight the non-blank cells in each column with the associated color.
I've tried iterating over the columns in various ways. The closest thing to success was to put a for loop in the styling function and apply it within a for loop. This correctly highlights the last column, but not the rest.
df=pd.DataFrame({'a':[1,2,3,4],'b':['','',1,''],'c':['a','b','c','']})
df_column_colors=['red','blue','green']
def highlight_cells(value):
if value=='':
background_color=None
else:
for v in range(len(df_column_colors)):
background_color=str(df_column_colors[v])
return 'background-color: %s' % background_color
for i in range(len(df.columns)):
df2=df.style.applymap(highlight_cells,subset=df.columns[i])
you can do this as below:
d= dict(zip(df.columns,['background-color:'+i for i in df_column_colors]))
#{'a': 'background-color:red', 'b': 'background-color:blue', 'c': 'background-color:green'}
def mycolor(x):
s=pd.DataFrame(d,index=x.index,columns=x.columns)
df1=x.mask(x.replace('',np.nan).notna(),s)
return df1
df.style.apply(mycolor,axis=None)
Try this:
df = pd.DataFrame({'a':[1,2,3,4],'b':['','',1,''],'c':['a','b','c','']})
df_column_colors=['red','blue','green']
def apply_color(cells):
color = df_column_colors[df.columns.get_loc(cells.name)]
colors = []
for cell in cells:
if cell == '':
colors.append('')
else:
colors.append('background-color: %s' % color)
return colors
df.style.apply(apply_color)
I was wondering how to highlight diagonal elements of pandas dataframe using df.style method.
I found this official link where they discuss how to highlight maximum value, but I am having difficulty creating function to highlight the diagonal elements.
Here is an example:
import numpy as np
import pandas as pd
df = pd.DataFrame({'a':[1,2,3,4],'b':[1,3,5,7],'c':[1,4,7,10],'d':[1,5,9,11]})
def highlight_max(s):
'''
highlight the maximum in a Series yellow.
'''
is_max = s == s.max()
return ['background-color: yellow' if v else '' for v in is_max]
df.style.apply(highlight_max)
This gives following output:
I am wanting a yellow highlight across the diagonal elements 1,3,7,11 only.
How to do that?
Using axis=None we can use numpy to easily set the diagonal styles (Credit for this goes to #CJR)
import numpy as np
import pandas as pd
def highlight_diag(df):
a = np.full(df.shape, '', dtype='<U24')
np.fill_diagonal(a, 'background-color: yellow')
return pd.DataFrame(a, index=df.index, columns=df.columns)
df.style.apply(highlight_diag, axis=None)
Original, really hacky solution
a = np.full(df.shape, '', dtype='<U24')
np.fill_diagonal(a, 'background-color: yellow')
df_diag = pd.DataFrame(a,
index=df.index,
columns=df.columns)
def highlight_diag(s, df_diag):
return df_diag[s.name]
df.style.apply(highlight_diag, df_diag=df_diag)
The trick is to use the axis=None parameter of the df.style.apply function in order to access the entire dataset:
import numpy as np
import pandas as pd
df = pd.DataFrame({'a':[1,2,3,4],'b':[1,3,5,7],'c':[1,4,7,10],'d':[1,5,9,11]})
def highlight_diag(data, color='yellow'):
'''
highlight the diag values in a DataFrame
'''
attr = 'background-color: {}'.format(color)
# create a new dataframe of the same structure with default style value
df_style = data.replace(data, '')
# fill diagonal with highlight color
np.fill_diagonal(df_style.values, attr)
return df_style
df.style.apply(highlight_diag, axis=None)
The other answer is pretty good but I already wrote this so....
def style_diag(data):
diag_mask = pd.DataFrame("", index=data.index, columns=data.columns)
min_axis = min(diag_mask.shape)
diag_mask.iloc[range(min_axis), range(min_axis)] = 'background-color: yellow'
return diag_mask
df = pd.DataFrame({'a':[1,2,3,4],'b':[1,3,5,7],'c':[1,4,7,10],'d':[1,5,9,11]})
df.style.apply(style_diag, axis=None)
I have the following code to dump the dataframe results into a table in HTML, such that the columns in TIME_FRAMES are colored according to a colormap from seaborn.
import seaborn as sns
TIME_FRAMES = ["24h", "7d", "30d", "1y"]
# Set CSS properties for th elements in dataframe
th_props = [
('font-size', '11px'),
('text-align', 'center'),
('font-weight', 'bold'),
('color', '#6d6d6d'),
('background-color', '#f7f7f9')
]
# Set CSS properties for td elements in dataframe
td_props = [
('font-size', '11px')
]
cm = sns.light_palette("green", as_cmap=True)
s = (results.style.background_gradient(cmap=cm, subset=TIME_FRAMES)
.set_table_styles(styles))
a = s.render()
with open("test.html", "w") as f:
f.write(a)
From this, I get the warning:
/python3.7/site-packages/matplotlib/colors.py:512: RuntimeWarning:
invalid value encountered in less xa[xa < 0] = -1
And, as you can see in the picture below, the columns 30d and 1y don't get rendered correctly, as they have NaN's. How can I just make it so that the NaN's are ignored and the colors are rendered only using the valid values? Setting the NaN's to 0 is not a valid option, as NaN's here have a meaning by themselves.
A bit late, but for future reference.
I had the same problem, and here is how I solved it:
import pandas as pd
import numpy as np
dt = pd.DataFrame({'col1': [1,2,3,4,5], 'col2': [4,5,6,7,np.nan], 'col3': [8,2,6,np.nan,np.nan]})
First fill in the nas with a big value
dt.fillna(dt.max().max()+1, inplace=True)
Function to color the font of this max value white
def color_max_white(val, max_val):
color = 'white' if val == max_val else 'black'
return 'color: %s' % color
Function to color the background of the maximum value white
def highlight_max(data, color='white'):
attr = 'background-color: {}'.format(color)
if data.ndim == 1: # Series from .apply(axis=0) or axis=1
is_max = data == data.max()
return [attr if v else '' for v in is_max]
else: # from .apply(axis=None)
is_max = data == data.max().max()
return pd.DataFrame(np.where(is_max, attr, ''),
index=data.index, columns=data.columns)
Putting everything together
max_val = dt.max().max()
dt.style.format("{:.2f}").background_gradient(cmap='Blues', axis=None).applymap(lambda x: color_max_white(x, max_val)).apply(highlight_max, axis=None)
This link helped me for the answer
this works fine for me
df.style.applymap(lambda x: 'color: transparent' if pd.isnull(x) else '')
#quant 's answer almost worked for me but my background gradient would still use the max value to calculate the color gradient. I implemented #night-train 's suggestion to set the color map, then used two functions:
import copy
cmap = copy.copy(plt.cm.get_cmap("Blues"))
cmap.set_under("white")
def color_nan_white(val):
"""Color the nan text white"""
if np.isnan(val):
return 'color: white'
def color_nan_white_background(val):
"""Color the nan cell background white"""
if np.isnan(val):
return 'background-color: white'
And then applied them to my dataframe again borrowing from #quant with a slight modification for ease:
(df.style
.background_gradient(axis='index')
.applymap(lambda x: color_nan_white(x))
.applymap(lambda x: color_nan_white_background(x))
)
Then it worked perfectly.