Pandas Freeze column names - python

I want to freeze the column names and like from large files when I'm scrolling down to have always the name of the columns.
This is my script where I'm also creating the excel file with multiple excels as sheets.
import numpy as np
import pandas as pd
from timestampdirectory import createdir
import openpyxl
import xlsxwriter
from openpyxl import workbook
from openpyxl import worksheet
import os
import time
def svnanalysis():
dest = createdir()
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnUsers.fillna("N/A", inplace=True)
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))
dfSvnRepoGroupAccess = pd.read_excel(os.path.join(dest, "SvnRepoGroupAccess.xlsx"))
dfsvnReposSize = pd.read_excel(os.path.join(dest, "svnReposSize.xlsx"))
dfsvnRepoLastChangeDate = pd.read_excel(os.path.join(dest, "svnRepoLastChangeDate.xlsx"))
dfUserDetails = pd.read_excel(r"D:\GIT-files\Automate-Stats\SVN_sample_files\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
dfSvnRepoGroupAccess.to_excel(xlwriter, sheet_name='SvnRepoGroupAccess', index = False)
dfsvnReposSize.to_excel(xlwriter, sheet_name='svnReposSize', index = False)
dfsvnRepoLastChangeDate.to_excel(xlwriter, sheet_name='svnRepoLastChangeDate',index= False)
for column in dfSvnUsers:
column_width = max(dfSvnUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfSvnUsers.columns.get_loc(column)
xlwriter.sheets['SvnUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['SvnGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['SvnRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['svnReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['svnRepoLastChangeDate'].set_column(col_idx, col_idx, column_width)
# xlwriter.freeze_columns(1, 0) # # Freeze the first row.
xlwriter.close()
#usage = pd.read_excel(os.path.join(dest,f'{timestr}Usage-SvnAnalysis.xlsx'))
#usage.style.set_table_styles([
# {'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:red;'},
# {'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:green;'}
#]).to_html()
print("UsageSvnAnalysis.xlsx a fost exportat cu succes continand ca sheet toate xlsx anterioare")
svnanalysis()
At the end of the script with the "#"(commented lines) its what I tried, on the part with table_styles... all works but its not freezing or changing the color for first row of each color (name of column)
This is the exported Excel:
and basically now when I'm scrolling down the column names should always appears and have "blue" background but as I say n the # line of code all works but its not applying idk why
for dfSvnUsers in xlwriter.sheets:
ws = xlwriter.sheets['SvnUsers']
ws.freeze_panes(1, 0)
ws.style.set_table_styles([
{'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:red;'},
{'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:green;'}
]).to_html()
I updated the script, now work and freeze the first row with column names for the specific sheet, but I tried to change also the color of that background in "red" and didn't work,

try:
with pd.ExcelWriter(Your_FilePath, engine='xlsxwriter') as writer:
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails',index = False)
# etc etc ...
for sht_name in writer.sheets:
ws = writer.sheets[sht_name]
ws.freeze_panes(1, 0)
print("UsageSvnAnalysis.xlsx a fost exportat cu succes continand ca sheet toate xlsx anterioare")
svnanalysis()

Related

pandas change column color of different sheets

i have a script that read multiple excel files and put them inside a final excel as sheets.
I also have a function that fill the background of column names with blue color for all sheets inside the workbook, but i want to have color green for some columns and blue for others for specific sheets, its posible to do that?
This is my script:
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
#THIS IS WHERE I CHANGE THE BG Color to Blue
workbook = xlwriter.book
cell_format = workbook.add_format({'bg_color': 'blue'})
cell_format.set_bold()
cell_format.set_font_color('black')
cell_format.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with succes!")
This is how its look:
And this is the expected output for this sheet:
IIUC, For all your sheets you want the first two column to be green and the others to be blue in color for your header row.
You can try the following code to color the header row where in the code #92D050 corresponds to color green and #00B0F0 corresponds to the light blue.
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
Output :
which gives us the expected output
EDIT :
As we discussed, you want only some columns from specific sheet to be colored green else all blue.
Here is the code for that
def gitanalysis():
dest = createdir()
dfGitUsers = pd.read_excel(os.path.join(dest, "GitUsers.xlsx"))
dfGitUsers.fillna("N/A", inplace=True)
dfGitGroupMembership = pd.read_excel(os.path.join(dest, "GitGroupMembership.xlsx"))
dfGitRepoGroupAccess= pd.read_excel(os.path.join(dest,"GitRepoGroupAccess.xlsx"))
dfGitReposSize=pd.read_excel(os.path.join(dest,"GitReposSize.xlsx"))
dfGitRepoLastChangeDate=pd.read_excel(os.path.join(dest,"GitRepoLastChangeDate.xlsx"))
pathdest = path_dir()
# below its the path from where reads "CM_UserDetails.xlsx" file to add it in the excel sheet
dfUserDetails = pd.read_excel(rf"{pathdest}\CM_UsersDetails.xlsx")
dfUserDetails.fillna("N/A", inplace=True)
timestr = time.strftime("%Y-%m-%d-")
xlwriter = pd.ExcelWriter(os.path.join(dest, f'{timestr}Usage-GitAnalysis.xlsx'))
dfUserDetails.to_excel(xlwriter, sheet_name='UserDetails', index=False)
dfGitUsers.to_excel(xlwriter, sheet_name='GitUsers', index=False)
dfGitGroupMembership.to_excel(xlwriter, sheet_name='GitGroupMembership', index=False)
dfGitRepoGroupAccess.to_excel(xlwriter,sheet_name='GitRepoGroupAccess',index=False)
dfGitReposSize.to_excel(xlwriter,sheet_name='GitReposSize',index=False)
dfGitRepoLastChangeDate.to_excel(xlwriter,sheet_name='GitRepoLastChangeDate',index=False)
for column in dfGitUsers:
column_width = max(dfGitUsers[column].astype(str).map(len).max(), len(column))
col_idx = dfGitUsers.columns.get_loc(column)
xlwriter.sheets['GitUsers'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['UserDetails'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitGroupMembership'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoGroupAccess'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitReposSize'].set_column(col_idx, col_idx, column_width)
xlwriter.sheets['GitRepoLastChangeDate'].set_column(col_idx,col_idx,column_width)
for sheet_name in xlwriter.sheets:
ws = xlwriter.sheets[sheet_name]
ws.freeze_panes(1, 0)
workbook = xlwriter.book
# Green color for the first two cells
cell_format_green = workbook.add_format({'bg_color': '#92D050'})
cell_format_green.set_bold()
cell_format_green.set_font_color('black')
cell_format_green.set_border(1)
# Blue color for the next cells
cell_format_blue = workbook.add_format({'bg_color': '#00B0F0'})
cell_format_blue.set_bold()
cell_format_blue.set_font_color('black')
cell_format_blue.set_border(1)
# UserDetails : all columns green
ws = xlwriter.sheets['UserDetails']
ws.freeze_panes(1, 0)
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitUsers : First two column Green
ws = xlwriter.sheets['GitUsers']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitGroupMembership : First two column Green
ws = xlwriter.sheets['GitGroupMembership']
ws.conditional_format('A1:B1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoGroupAccess : All columns green
ws = xlwriter.sheets['GitRepoGroupAccess']
ws.conditional_format('A1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
# GitReposSize : mid two column Green
ws = xlwriter.sheets['GitReposSize']
ws.conditional_format('B1:C1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('D1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_blue})
# GitRepoLastChangeDate : First and third column green
ws = xlwriter.sheets['GitRepoLastChangeDate']
ws.conditional_format('A1:A1', {'type': 'no_blanks', 'format': cell_format_green})
ws.conditional_format('B1:B1', {'type': 'no_blanks', 'format': cell_format_blue})
ws.conditional_format('C1:{}1'.format(chr(65 + ws.dim_colmax)), {'type': 'no_blanks', 'format': cell_format_green})
xlwriter.close()
print("GitSvnAnalysis.xlsx was exported with success!")
which will give you the expected output :

Coloring Columns with Pandas

I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()

Adding borders to rows and columns with openpyxl

How can I add borders to whole rows and columns with openpyxl?
I tried:
import openpyxl
from openpyxl.styles import borders
from openpyxl.styles.borders import Border
wb = openpyxl.Workbook()
ws = wb.active
border1 = borders.Side(style = None, color = Color(indexed = 0), border_style = 'thin')
border0 = borders.Side(style = None, color = None, border_style = None)
thin = Border(left = border1, right = border0, bottom = border0, top = border 0)
ws.column['C'].border = thin
I then got the Error:
Worksheet object has no attribute column
Is there a possibility to assign the border to whole row/column or do I need to apply it to the cells one by one?
Here an example how to iterate through the cells to aplly the border to each cell. min_col = 3 and max_col = 3 leads to column 'C' and with max_row you can set till which row you want the border.
import openpyxl
from openpyxl.styles import borders
from openpyxl.styles.borders import Border
wb = openpyxl.load_workbook('border.xlsx')
ws = wb.active
border1 = borders.Side(style = None, color = 'FF000000', border_style = 'thin')
border0 = borders.Side(style = None, color = None, border_style = None)
thin = Border(left = border1, right = border0, bottom = border0, top = border0)
for row in ws.iter_rows(min_row=1, min_col=3, max_row=20, max_col=3):
for cell in row:
cell.border = thin
wb.save('border_new.xlsx')

Looping through a spreadsheet and plotting all columns with xlsxwriter, Pandas

I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()

Change the name of excel worksheet with pandas

import pandas as pd
import numpy as np
df = pd.read_excel(r"C:\Users\venkagop\Subbu\promo validation testing\P 02. Promotions-UK C1.xls")
df = df[['Promotions', 'Promotions: AE', 'Promotions: Anaplan ID', 'Promotions: Is Optima Scenario?', 'Promotions: SIDs', 'Set Inactive?', 'Start Date', 'End Date', 'Promo Period', 'Promo Optima Status', 'Change Promo Status']]
df = df[(df['Promo Period'] == 'FY1819')]
df = df[(df['Set Inactive?'] == 0 ) & (df['Promotions: Is Optima Scenario?'] == 1)]
df.dropna(subset=['Promotions: SIDs'], inplace=True)
df['Optima vs Anaplan Promo Status Validation'] = ""
df['Optima vs Anaplan Promo Status Validation'] = np.where(df['Promo Optima Status'] == df['Change Promo Status'], 'True', 'False')
df.to_excel(r"C:\Users\venkagop\Subbu\mytest.xls", index = False)
#after this i want to change sheeet1 name to some other name#
There are 2 ways you can approach this problem.
Approach 1
Save the excel file to the correct worksheet name from the beginning, by using the sheet_name argument.
import pandas as pd
writer = pd.ExcelWriter(r'C:\Users\venkagop\Subbu\mytest.xls')
df.to_excel(writer, sheet_name='MySheetName', index=False)
writer.save()
Approach 2
If Approach 1 is not possible, change the worksheet name at a later stage using openpyxl. The advantage of this method is you remove the cost of converting pandas dataframe to Excel format again.
import openpyxl
file_loc = r'C:\Users\venkagop\Subbu\mytest.xls'
ss = openpyxl.load_workbook(file_loc)
ss_sheet = ss.get_sheet_by_name('Sheet1')
ss_sheet.title = 'MySheetName'
ss.save(file_loc)

Categories