Coloring Columns with Pandas - python

I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()

Related

Python xlsxwriter. Header_format sub-property of columns parameter not working as expected

I need to set a table header in Excel with the next date format: 'mmm-yy'.
Formatting I've set:
title_date_format = workbook.add_format({
'text_wrap': True,
'font_size': 11,
'num_format': 'mmm-yy'
})
Column settings:
column_settings = []
index = 0
for column in df.columns:
if index < 3:
dct = {}
dct['header'] = column
column_settings.append(dct)
else:
dct = {}
formula = '=[#[Value]]*[#Qty]'
dct['header'] = column
dct['formula'] = formula
dct['header_format'] = title_date_format
column_settings.append(dct)
index += 1
Table creation:
# Create a table
worksheet.add_table(0, 0, max_row + 2, max_col - 1, {
'columns': column_settings
})
The problem is that only the 'text_wrap' and font_size' sub-properties work fine. The column header, which is a date, stays in '1/24/2022' format instead of 'Jan-22', so 'num_format': 'mmm-yy' doesn't apply.
Full example:
import datetime as dt
import pandas as pd
import numpy as np
import xlsxwriter
initial_data = {
'Category': ['catA', 'catB', 'catC', 'catC'],
'Item': ['item1', 'item2', 'item3', 'item4']
}
df = pd.DataFrame(initial_data)
# Add columns with month-year
for year in range(2,4):
if year == 2:
for month in range(11,13):
date_str = str(month) + '/1/202' + str(year)
df[date_str] = ''
else:
for month in range(1,4):
date_str = str(month) + '/1/202' + str(year)
df[date_str] = ''
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', header=False, startrow=1, index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
title_date_format = workbook.add_format({
'text_wrap': True,
'font_name': 'Calibri',
'font_size': 10,
'num_format': 'mmm-yy'
})
column_settings = []
for column in df.columns:
dct = {}
dct['header'] = column
dct['header_format'] = title_date_format
column_settings.append(dct)
(max_row, max_col) = df.shape
worksheet.add_table(0, 0, max_row, max_col - 1, {
'columns': column_settings,
'style': 'Table Style Light 9'
})
writer.save()
Any ideas on how to make it work?
Thank you
The issue is that the column headers are strings and the date number format only applies to numbers. So the solution would be to turn the column headers into datetime numbers so that the format can be applied. However, as far as I can see Table column headers in Excel need to be strings, so that isn't an option.
So as a workaround you could format the header strings that you are currently using into the format that you want:
# ...
from datetime import datetime
# ...
for year in range(2,4):
if year == 2:
for month in range(11,13):
date_str = datetime(2022, month, 1).strftime("%b-%y")
df[date_str] = ''
else:
for month in range(1,4):
date_str = datetime(2024, month, 1).strftime("%b-%y")
df[date_str] = ''
Output:

Formating Headers of dataframes with openpyxl

I am currently using the below code to write the first two dataframes into two different sheets in an excel workbook and then a loop to select dataframes from a list and write those to a seperate sheet.I want to format the headers of the dataframes in the list so i used :
book=load_workbook(desktop+'test.xlsx')
writer =pd.ExcelWriter(desktop+'/test.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = {ws.title: ws for ws in book.worksheets}
frameg.to_excel(writer,sheet_name='Sheet 1',startrow=1,index = False,header= False)
framed.to_excel(writer,sheet_name='Sheet 2',startrow=1,index=False,header=False )
for df in ls2:
df.to_excel(writer,sheet_name='test',startrow=writer.sheets['test'].max_row+1,header=True,index=False)
header_format = book.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': '#5C5C5C',
'font_color':'#ffffff',
'border': 1})
for col_num, value in enumerate(df2.columns.values):
ws.write(0, col_num , value, header_format)
writer.save()
I am getting an AttributeError: 'Workbook' object has no attribute 'add_format'
Any help would be appreciated

how to make a list from result of a function?

When I run my code it works very well and makes an excel file I named exel.xlsx, but there is no information in exel.xlsx.
I think I make a mistake in making a list but I can't find a solution.
def randStr(length=7):
characters = list('bcdghijkmnpqrtuvwxyz23456789')
shuffle(characters)
exel = ''.join(characters[:length])
listb = [exel]
listb.append(exel)
workbook = xlsxwriter.Workbook('Exel.xlsx')
worksheet = workbook.add_worksheet('randomise')
chart = workbook.add_chart({'type': 'line'})
expenses = (listb)
row = 0
col = 0
workbook.close()
return exel
import xlsxwriter
workbook = xlsxwriter.Workbook('chart_line.xlsx')
worksheet = workbook.add_worksheet()
# Add the worksheet data to be plotted.
data = [10, 40, 50, 20, 10, 50]
worksheet.write_column('A1', data)
# Create a new chart object.
chart = workbook.add_chart({'type': 'line'})
# Add a series to the chart.
chart.add_series({'values': '=Sheet1!$A$1:$A$6'})
# Insert the chart into the worksheet.
worksheet.insert_chart('C1', chart)
workbook.close()
This is the basic way to add_chart , then adding the values to chart and inserting the chart .
But you are not adding any value to your chart. Moreover you are not adding any value to the column/row in sheet.
I ran your code , sheet with name "randomise" is created , but no data in it, as you havent added anything
solved!
def randStr(length=7):
characters = list('bcdghijkmnpqrtuvwxyz23456789')
shuffle(characters)
listc=''.join(characters[:length])
return listc
listb=[]
if __name__ == '__main__':
for i in range(20):
value1=(randStr())
listb.append(value1)
workbook = xlsxwriter.Workbook('Exel.xlsx')
worksheet = workbook.add_worksheet('randomise')
worksheet.write_column(0,0,listb)
chart = workbook.add_chart({'type': 'line'})
expenses =(listb)
row = 0
col = 0
workbook.close()
img = makeImage(value1, width=512)
with open('%d.png' % i, 'wb') as f:
f.write(img)
print (i)

Looping through a spreadsheet and plotting all columns with xlsxwriter, Pandas

I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()

number instead of date string when writing xlsx file

In a LibreOffice xlsx cell, the value is like this: 01/13/2016.
When I am creating a new xlsx file using python2, then that 01/13/2016 is converting to 42461.
python code :
sheet1.write(row,col,sheet.cell(row,col).value)
tab_matching = 0
for sheet_name in book.sheet_names():
temp_sheet_name = sheet_name.lower()
if temp_sheet_name == tab_name:
tab_matching = 1
sheet = book.sheet_by_name(sheet_name)
temp_sheet_name = file_prefix+part_file_name+"_"+file_type+".xlsx"
if os.path.exists(detail_path):
xlsx_file_name = detail_path+"/"+temp_sheet_name
else:
xlsx_file_name = dirname+"/"+temp_sheet_name
new_book = xlsxwriter.Workbook(xlsx_file_name)
sheet1 = new_book.add_worksheet()
for row in range(sheet.nrows):
for col in range(sheet.ncols):
sheet1.write(row,col,sheet.cell(row,col).value)
new_book.close()
Could you tell me why this is happening?
42461 is the underlying date value for 04/01/2016. To show the date instead of the number, specify a date format:
format1 = new_book.add_format({'num_format': 'mm/dd/yyyy'})
sheet1.write('B1', 42461, format1) # 04/01/2016
sheet1.write('B2', 42382, format1) # 01/13/2016
Documentation is at http://xlsxwriter.readthedocs.io/working_with_dates_and_time.html.
You could do this.
>>> import datetime
>>> today=datetime.datetime.now()
>>> today
datetime.datetime(2016, 8, 27, 1, 7, 1, 909049)
>>> value=today.strftime("%d/%m/%Y")
'27/08/2016'
>>> sheet1.write(row,col,sheet.cell(row,col).value)

Categories