i have a script that read multiple excel files and put them inside a final excel as sheets.
I also have a function that fill the background of column names with blue color for all sheets inside the workbook, but i want to have color green for some columns and blue for others for specific sheets, its posible to do that?
This is my script:
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
#THIS IS WHERE I CHANGE THE BG Color to Blue
workbook = xlwriter.book
cell_format = workbook.add_format({'bg_color': 'blue'})
cell_format.set_bold()
cell_format.set_font_color('black')
cell_format.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with succes!")
This is how its look:
And this is the expected output for this sheet:
IIUC, For all your sheets you want the first two column to be green and the others to be blue in color for your header row.
You can try the following code to color the header row where in the code #92D050 corresponds to color green and #00B0F0 corresponds to the light blue.
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
Output :
which gives us the expected output
EDIT :
As we discussed, you want only some columns from specific sheet to be colored green else all blue.
Here is the code for that
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
# UserDetails : all columns green
ws = xlwriter.sheets['UserDetails']
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitUsers : First two column Green
ws = xlwriter.sheets['GitUsers']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitGroupMembership : First two column Green
ws = xlwriter.sheets['GitGroupMembership']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoGroupAccess : All columns green
ws = xlwriter.sheets['GitRepoGroupAccess']
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitReposSize : mid two column Green
ws = xlwriter.sheets['GitReposSize']
ws.conditional_format('B1:C1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('D1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoLastChangeDate : First and third column green
ws = xlwriter.sheets['GitRepoLastChangeDate']
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('B1:B1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
which will give you the expected output :
Related
I need to set a table header in Excel with the next date format: 'mmm-yy'.
Formatting I've set:
title_date_format = workbook.add_format({
'text_wrap': True,
'font_size': 11,
'num_format': 'mmm-yy'
})
Column settings:
column_settings = []
index = 0
for column in df.columns:
if index < 3:
dct = {}
dct['header'] = column
column_settings.append(dct)
else:
dct = {}
formula = '=[#[Value]]*[#Qty]'
dct['header'] = column
dct['formula'] = formula
dct['header_format'] = title_date_format
column_settings.append(dct)
index += 1
Table creation:
# Create a table
worksheet.add_table(0, 0, max_row + 2, max_col - 1, {
'columns': column_settings
})
The problem is that only the 'text_wrap' and font_size' sub-properties work fine. The column header, which is a date, stays in '1/24/2022' format instead of 'Jan-22', so 'num_format': 'mmm-yy' doesn't apply.
Full example:
import datetime as dt
import pandas as pd
import numpy as np
import xlsxwriter
initial_data = {
'Category': ['catA', 'catB', 'catC', 'catC'],
'Item': ['item1', 'item2', 'item3', 'item4']
}
df = pd.DataFrame(initial_data)
# Add columns with month-year
for year in range(2,4):
if year == 2:
for month in range(11,13):
date_str = str(month) + '/1/202' + str(year)
df[date_str] = ''
else:
for month in range(1,4):
date_str = str(month) + '/1/202' + str(year)
df[date_str] = ''
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', header=False, startrow=1, index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
title_date_format = workbook.add_format({
'text_wrap': True,
'font_name': 'Calibri',
'font_size': 10,
'num_format': 'mmm-yy'
})
column_settings = []
for column in df.columns:
dct = {}
dct['header'] = column
dct['header_format'] = title_date_format
column_settings.append(dct)
(max_row, max_col) = df.shape
worksheet.add_table(0, 0, max_row, max_col - 1, {
'columns': column_settings,
'style': 'Table Style Light 9'
})
writer.save()
Any ideas on how to make it work?
Thank you
The issue is that the column headers are strings and the date number format only applies to numbers. So the solution would be to turn the column headers into datetime numbers so that the format can be applied. However, as far as I can see Table column headers in Excel need to be strings, so that isn't an option.
So as a workaround you could format the header strings that you are currently using into the format that you want:
# ...
from datetime import datetime
# ...
for year in range(2,4):
if year == 2:
for month in range(11,13):
date_str = datetime(2022, month, 1).strftime("%b-%y")
df[date_str] = ''
else:
for month in range(1,4):
date_str = datetime(2024, month, 1).strftime("%b-%y")
df[date_str] = ''
Output:
I want to freeze the column names and like from large files when I'm scrolling down to have always the name of the columns.
This is my script where I'm also creating the excel file with multiple excels as sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
import xlsxwriter
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnUsers.fillna("N/A", inplace=True)
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
for column in dfSvnUsers:
column_width = max(dfSvnUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfSvnUsers.columns.get_loc(column)
xlwriter.sheets['SvnUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['SvnGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['SvnRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['svnReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['svnRepoLastChangeDate'].set_column(col_idx, col_idx, column_width)
# xlwriter.freeze_columns(1, 0) # # Freeze the first row.
xlwriter.close()
#usage = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
#usage.style.set_table_styles([
# {'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:red;'},
# {'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:green;'}
#]).to_html()
print("UsageSvnAnalysis.xlsx a fost exportat cu succes continand ca sheet toate xlsx anterioare")
svnanalysis()
At the end of the script with the "#"(commented lines) its what I tried, on the part with table_styles... all works but its not freezing or changing the color for first row of each color (name of column)
This is the exported Excel:
and basically now when I'm scrolling down the column names should always appears and have "blue" background but as I say n the # line of code all works but its not applying idk why
for dfSvnUsers in xlwriter.sheets:
ws = xlwriter.sheets['SvnUsers']
ws.freeze_panes(1, 0)
ws.style.set_table_styles([
{'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:red;'},
{'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:green;'}
]).to_html()
I updated the script, now work and freeze the first row with column names for the specific sheet, but I tried to change also the color of that background in "red" and didn't work,
try:
with pd.ExcelWriter(Your_FilePath, engine='xlsxwriter') as writer:
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
# etc etc ...
for sht_name in writer.sheets:
ws = writer.sheets[sht_name]
ws.freeze_panes(1, 0)
print("UsageSvnAnalysis.xlsx a fost exportat cu succes continand ca sheet toate xlsx anterioare")
svnanalysis()
I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()
I am currently using the below code to write the first two dataframes into two different sheets in an excel workbook and then a loop to select dataframes from a list and write those to a seperate sheet.I want to format the headers of the dataframes in the list so i used :
book=load_workbook(desktop+'test.xlsx')
writer =pd.ExcelWriter(desktop+'/test.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = {ws.title: ws for ws in book.worksheets}
frameg.to_excel(writer,sheet_name='Sheet 1',startrow=1,index = False,header= False)
framed.to_excel(writer,sheet_name='Sheet 2',startrow=1,index=False,header=False )
for df in ls2:
df.to_excel(writer,sheet_name='test',startrow=writer.sheets['test'].max_row+1,header=True,index=False)
header_format = book.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': '#5C5C5C',
'font_color':'#ffffff',
'border': 1})
for col_num, value in enumerate(df2.columns.values):
ws.write(0, col_num , value, header_format)
writer.save()
I am getting an AttributeError: 'Workbook' object has no attribute 'add_format'
Any help would be appreciated
I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()