Formating Headers of dataframes with openpyxl

Formating Headers of dataframes with openpyxl - python

I am currently using the below code to write the first two dataframes into two different sheets in an excel workbook and then a loop to select dataframes from a list and write those to a seperate sheet.I want to format the headers of the dataframes in the list so i used :
book=load_workbook(desktop+'test.xlsx')
writer =pd.ExcelWriter(desktop+'/test.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = {ws.title: ws for ws in book.worksheets}
frameg.to_excel(writer,sheet_name='Sheet 1',startrow=1,index = False,header= False)
framed.to_excel(writer,sheet_name='Sheet 2',startrow=1,index=False,header=False )
for df in ls2:
df.to_excel(writer,sheet_name='test',startrow=writer.sheets['test'].max_row+1,header=True,index=False)
header_format = book.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': '#5C5C5C',
'font_color':'#ffffff',
'border': 1})
for col_num, value in enumerate(df2.columns.values):
ws.write(0, col_num , value, header_format)
writer.save()
I am getting an AttributeError: 'Workbook' object has no attribute 'add_format'
Any help would be appreciated

Related

pandas change column color of different sheets

i have a script that read multiple excel files and put them inside a final excel as sheets.
I also have a function that fill the background of column names with blue color for all sheets inside the workbook, but i want to have color green for some columns and blue for others for specific sheets, its posible to do that?
This is my script:
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
#THIS IS WHERE I CHANGE THE BG Color to Blue
workbook = xlwriter.book
cell_format = workbook.add_format({'bg_color': 'blue'})
cell_format.set_bold()
cell_format.set_font_color('black')
cell_format.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with succes!")
This is how its look:
And this is the expected output for this sheet:

IIUC, For all your sheets you want the first two column to be green and the others to be blue in color for your header row.
You can try the following code to color the header row where in the code #92D050 corresponds to color green and #00B0F0 corresponds to the light blue.
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
Output :
which gives us the expected output
EDIT :
As we discussed, you want only some columns from specific sheet to be colored green else all blue.
Here is the code for that
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
# UserDetails : all columns green
ws = xlwriter.sheets['UserDetails']
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitUsers : First two column Green
ws = xlwriter.sheets['GitUsers']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitGroupMembership : First two column Green
ws = xlwriter.sheets['GitGroupMembership']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoGroupAccess : All columns green
ws = xlwriter.sheets['GitRepoGroupAccess']
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitReposSize : mid two column Green
ws = xlwriter.sheets['GitReposSize']
ws.conditional_format('B1:C1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('D1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoLastChangeDate : First and third column green
ws = xlwriter.sheets['GitRepoLastChangeDate']
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('B1:B1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
which will give you the expected output :

Pandas Freeze column names

I want to freeze the column names and like from large files when I'm scrolling down to have always the name of the columns.
This is my script where I'm also creating the excel file with multiple excels as sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
import xlsxwriter
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnUsers.fillna("N/A", inplace=True)
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
for column in dfSvnUsers:
column_width = max(dfSvnUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfSvnUsers.columns.get_loc(column)
xlwriter.sheets['SvnUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['SvnGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['SvnRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['svnReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['svnRepoLastChangeDate'].set_column(col_idx, col_idx, column_width)
# xlwriter.freeze_columns(1, 0) # # Freeze the first row.
xlwriter.close()
#usage = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
#usage.style.set_table_styles([
# {'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:red;'},
# {'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:green;'}
#]).to_html()
print("UsageSvnAnalysis.xlsx a fost exportat cu succes continand ca sheet toate xlsx anterioare")
svnanalysis()
At the end of the script with the "#"(commented lines) its what I tried, on the part with table_styles... all works but its not freezing or changing the color for first row of each color (name of column)
This is the exported Excel:
and basically now when I'm scrolling down the column names should always appears and have "blue" background but as I say n the # line of code all works but its not applying idk why
for dfSvnUsers in xlwriter.sheets:
ws = xlwriter.sheets['SvnUsers']
ws.freeze_panes(1, 0)
ws.style.set_table_styles([
{'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:red;'},
{'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:green;'}
]).to_html()
I updated the script, now work and freeze the first row with column names for the specific sheet, but I tried to change also the color of that background in "red" and didn't work,

try:
with pd.ExcelWriter(Your_FilePath, engine='xlsxwriter') as writer:
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
# etc etc ...
for sht_name in writer.sheets:
ws = writer.sheets[sht_name]
ws.freeze_panes(1, 0)
print("UsageSvnAnalysis.xlsx a fost exportat cu succes continand ca sheet toate xlsx anterioare")
svnanalysis()

Coloring Columns with Pandas

I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()

Why if_sheet_exists='replace' creating new excel sheet. while df exporting to a existing excel sheet

I'm trying to export df value to a excel file without affecting other sheet. but above code is creating a new sheet instead replacing in existing sheet. Can anyone please help on this?
here is my code
import pandas as pd
import openpyxl
df1 = pd.read_excel(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily_level_data.xlsx',sheet_name = 'test')
with pd.ExcelWriter(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily Sales Master_test.xlsx', mode="a",
engine="openpyxl", on_sheet_exists ="replace") as writer:
df1.to_excel(writer, sheet_name="Day_level")
print('data imported in daily sales master - sucessfully')

Well looking at the documentation, it should be if_sheet_exists not on_sheet_exists. However, it still doesn't work correctly.
You could just pass the sheets to the writer with writer.sheets. Sort of annoying, but it works:
import pandas as pd
import openpyxl
book = openpyxl.load_workbook(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily Sales Master_test1.xlsx')
df1 = pd.read_excel(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily_level_data.xlsx',sheet_name = 'test')
with pd.ExcelWriter(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily Sales Master_test1.xlsx', mode="a",
engine="openpyxl", ) as writer:
writer.book = book
writer.sheets = {ws.title:ws for ws in book.worksheets}
df1.to_excel(writer, sheet_name="Day_level")
print('data imported in daily sales master - sucessfully')

Looping through a spreadsheet and plotting all columns with xlsxwriter, Pandas

I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Formating Headers of dataframes with openpyxl - python

Related

pandas change column color of different sheets

Pandas Freeze column names

Coloring Columns with Pandas

Why if_sheet_exists='replace' creating new excel sheet. while df exporting to a existing excel sheet

Looping through a spreadsheet and plotting all columns with xlsxwriter, Pandas

Categories

Resources