i have a script that read multiple excel files and put them inside a final excel as sheets.
I also have a function that fill the background of column names with blue color for all sheets inside the workbook, but i want to have color green for some columns and blue for others for specific sheets, its posible to do that?
This is my script:
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
#THIS IS WHERE I CHANGE THE BG Color to Blue
workbook = xlwriter.book
cell_format = workbook.add_format({'bg_color': 'blue'})
cell_format.set_bold()
cell_format.set_font_color('black')
cell_format.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with succes!")
This is how its look:
And this is the expected output for this sheet:
IIUC, For all your sheets you want the first two column to be green and the others to be blue in color for your header row.
You can try the following code to color the header row where in the code #92D050 corresponds to color green and #00B0F0 corresponds to the light blue.
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
Output :
which gives us the expected output
EDIT :
As we discussed, you want only some columns from specific sheet to be colored green else all blue.
Here is the code for that
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
# UserDetails : all columns green
ws = xlwriter.sheets['UserDetails']
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitUsers : First two column Green
ws = xlwriter.sheets['GitUsers']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitGroupMembership : First two column Green
ws = xlwriter.sheets['GitGroupMembership']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoGroupAccess : All columns green
ws = xlwriter.sheets['GitRepoGroupAccess']
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitReposSize : mid two column Green
ws = xlwriter.sheets['GitReposSize']
ws.conditional_format('B1:C1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('D1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoLastChangeDate : First and third column green
ws = xlwriter.sheets['GitRepoLastChangeDate']
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('B1:B1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
which will give you the expected output :
I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()
How can I add borders to whole rows and columns with openpyxl?
I tried:
import openpyxl
from openpyxl.styles import borders
from openpyxl.styles.borders import Border
wb = openpyxl.Workbook()
ws = wb.active
border1 = borders.Side(style = None, color = Color(indexed = 0), border_style = 'thin')
border0 = borders.Side(style = None, color = None, border_style = None)
thin = Border(left = border1, right = border0, bottom = border0, top = border 0)
ws.column['C'].border = thin
I then got the Error:
Worksheet object has no attribute column
Is there a possibility to assign the border to whole row/column or do I need to apply it to the cells one by one?
Here an example how to iterate through the cells to aplly the border to each cell. min_col = 3 and max_col = 3 leads to column 'C' and with max_row you can set till which row you want the border.
import openpyxl
from openpyxl.styles import borders
from openpyxl.styles.borders import Border
wb = openpyxl.load_workbook('border.xlsx')
ws = wb.active
border1 = borders.Side(style = None, color = 'FF000000', border_style = 'thin')
border0 = borders.Side(style = None, color = None, border_style = None)
thin = Border(left = border1, right = border0, bottom = border0, top = border0)
for row in ws.iter_rows(min_row=1, min_col=3, max_row=20, max_col=3):
for cell in row:
cell.border = thin
wb.save('border_new.xlsx')
I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()
import pandas as pd
import numpy as np
df = pd.read_excel(r"C:\Users\venkagop\Subbu\promo validation testing\P 02. Promotions-UK C1.xls")
df = df[['Promotions', 'Promotions: AE', 'Promotions: Anaplan ID', 'Promotions: Is Optima Scenario?', 'Promotions: SIDs', 'Set Inactive?', 'Start Date', 'End Date', 'Promo Period', 'Promo Optima Status', 'Change Promo Status']]
df = df[(df['Promo Period'] == 'FY1819')]
df = df[(df['Set Inactive?'] == 0 ) & (df['Promotions: Is Optima Scenario?'] == 1)]
df.dropna(subset=['Promotions: SIDs'], inplace=True)
df['Optima vs Anaplan Promo Status Validation'] = ""
df['Optima vs Anaplan Promo Status Validation'] = np.where(df['Promo Optima Status'] == df['Change Promo Status'], 'True', 'False')
df.to_excel(r"C:\Users\venkagop\Subbu\mytest.xls", index = False)
#after this i want to change sheeet1 name to some other name#
There are 2 ways you can approach this problem.
Approach 1
Save the excel file to the correct worksheet name from the beginning, by using the sheet_name argument.
import pandas as pd
writer = pd.ExcelWriter(r'C:\Users\venkagop\Subbu\mytest.xls')
df.to_excel(writer, sheet_name='MySheetName', index=False)
writer.save()
Approach 2
If Approach 1 is not possible, change the worksheet name at a later stage using openpyxl. The advantage of this method is you remove the cost of converting pandas dataframe to Excel format again.
import openpyxl
file_loc = r'C:\Users\venkagop\Subbu\mytest.xls'
ss = openpyxl.load_workbook(file_loc)
ss_sheet = ss.get_sheet_by_name('Sheet1')
ss_sheet.title = 'MySheetName'
ss.save(file_loc)