number instead of date string when writing xlsx file - python

In a LibreOffice xlsx cell, the value is like this: 01/13/2016.
When I am creating a new xlsx file using python2, then that 01/13/2016 is converting to 42461.
python code :
sheet1.write(row,col,sheet.cell(row,col).value)
tab_matching = 0
for sheet_name in book.sheet_names():
temp_sheet_name = sheet_name.lower()
if temp_sheet_name == tab_name:
tab_matching = 1
sheet = book.sheet_by_name(sheet_name)
temp_sheet_name = file_prefix+part_file_name+"_"+file_type+".xlsx"
if os.path.exists(detail_path):
xlsx_file_name = detail_path+"/"+temp_sheet_name
else:
xlsx_file_name = dirname+"/"+temp_sheet_name
new_book = xlsxwriter.Workbook(xlsx_file_name)
sheet1 = new_book.add_worksheet()
for row in range(sheet.nrows):
for col in range(sheet.ncols):
sheet1.write(row,col,sheet.cell(row,col).value)
new_book.close()
Could you tell me why this is happening?

42461 is the underlying date value for 04/01/2016. To show the date instead of the number, specify a date format:
format1 = new_book.add_format({'num_format': 'mm/dd/yyyy'})
sheet1.write('B1', 42461, format1) # 04/01/2016
sheet1.write('B2', 42382, format1) # 01/13/2016
Documentation is at http://xlsxwriter.readthedocs.io/working_with_dates_and_time.html.

You could do this.
>>> import datetime
>>> today=datetime.datetime.now()
>>> today
datetime.datetime(2016, 8, 27, 1, 7, 1, 909049)
>>> value=today.strftime("%d/%m/%Y")
'27/08/2016'
>>> sheet1.write(row,col,sheet.cell(row,col).value)

Related

Python xlsxwriter. Header_format sub-property of columns parameter not working as expected

I need to set a table header in Excel with the next date format: 'mmm-yy'.
Formatting I've set:
title_date_format = workbook.add_format({
'text_wrap': True,
'font_size': 11,
'num_format': 'mmm-yy'
})
Column settings:
column_settings = []
index = 0
for column in df.columns:
if index < 3:
dct = {}
dct['header'] = column
column_settings.append(dct)
else:
dct = {}
formula = '=[#[Value]]*[#Qty]'
dct['header'] = column
dct['formula'] = formula
dct['header_format'] = title_date_format
column_settings.append(dct)
index += 1
Table creation:
# Create a table
worksheet.add_table(0, 0, max_row + 2, max_col - 1, {
'columns': column_settings
})
The problem is that only the 'text_wrap' and font_size' sub-properties work fine. The column header, which is a date, stays in '1/24/2022' format instead of 'Jan-22', so 'num_format': 'mmm-yy' doesn't apply.
Full example:
import datetime as dt
import pandas as pd
import numpy as np
import xlsxwriter
initial_data = {
'Category': ['catA', 'catB', 'catC', 'catC'],
'Item': ['item1', 'item2', 'item3', 'item4']
}
df = pd.DataFrame(initial_data)
# Add columns with month-year
for year in range(2,4):
if year == 2:
for month in range(11,13):
date_str = str(month) + '/1/202' + str(year)
df[date_str] = ''
else:
for month in range(1,4):
date_str = str(month) + '/1/202' + str(year)
df[date_str] = ''
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', header=False, startrow=1, index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
title_date_format = workbook.add_format({
'text_wrap': True,
'font_name': 'Calibri',
'font_size': 10,
'num_format': 'mmm-yy'
})
column_settings = []
for column in df.columns:
dct = {}
dct['header'] = column
dct['header_format'] = title_date_format
column_settings.append(dct)
(max_row, max_col) = df.shape
worksheet.add_table(0, 0, max_row, max_col - 1, {
'columns': column_settings,
'style': 'Table Style Light 9'
})
writer.save()
Any ideas on how to make it work?
Thank you
The issue is that the column headers are strings and the date number format only applies to numbers. So the solution would be to turn the column headers into datetime numbers so that the format can be applied. However, as far as I can see Table column headers in Excel need to be strings, so that isn't an option.
So as a workaround you could format the header strings that you are currently using into the format that you want:
# ...
from datetime import datetime
# ...
for year in range(2,4):
if year == 2:
for month in range(11,13):
date_str = datetime(2022, month, 1).strftime("%b-%y")
df[date_str] = ''
else:
for month in range(1,4):
date_str = datetime(2024, month, 1).strftime("%b-%y")
df[date_str] = ''
Output:

Coloring Columns with Pandas

I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()

Pivot Table with python win32com

I try to make excel file with Python. For that I use win32com because that seem the best way to make pivot table (pandas is not a good solution), but I have some troubles.
First when I try to make a pivot table on Sheet where there is already a pivot table i have that error message:
Traceback (most recent call last):
File "<ipython-input-55-62ca0c0e21ec>", line 1, in <module>
PivotTable = PivotCache.CreatePivotTable(TableDestination=PivotTargetRange, TableName=PivotTableName, DefaultVersion=win32c.xlPivotTableVersion14)
File "C:\Users\LPLOUV~1\AppData\Local\Temp\gen_py\3.7\00020813-0000-0000-C000-000000000046x0x1x9\PivotCache.py", line 45, in CreatePivotTable
, TableName, ReadData, DefaultVersion)
com_error: (-2147352567, 'Une exception s’est produite.', (0, None, None, None, 0, -2146827284), None)
Second I need to remake some pivot table that exist already, so I need to delete some pivote table from sheet but I don't know how I can make that.
Until there I use this code:
import os
import win32com.client
import time
time_first = time.time()
os.chdir(r'C:\Users\msmith\Desktop')
Excel = win32com.client.gencache.EnsureDispatch('Excel.Application') # Excel = win32com.client.Dispatch('Excel.Application')
win32c = win32com.client.constants
wb = Excel.Workbooks.Open('excel_file.xlsx')
Sheet1 = wb.Worksheets("Sheet1")
Sheet2 = wb.Worksheets("Sheet2")
xldata = Sheet1.UsedRange.Value
data_pivot = xldata[:24291]
cl1 = Sheet1.Cells(1,1)
cl2 = Sheet1.Cells(1+len(data_pivot)-1,1+len(data_pivot[0])-1)
PivotSourceRange = Sheet1.Range(cl1,cl2)
cl3=Sheet2.Cells(200,200)
PivotTargetRange= Sheet2.Range(cl3,cl3)
PivotTableName = 'ReportPivotTable'
PivotCache = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=PivotSourceRange, Version=win32c.xlPivotTableVersion14)
PivotTable = PivotCache.CreatePivotTable(TableDestination=PivotTargetRange, TableName=PivotTableName, DefaultVersion=win32c.xlPivotTableVersion14)
PivotTable.PivotFields('A').Orientation = win32c.xlRowField
PivotTable.PivotFields('A').Position = 1
PivotTable.PivotFields('B').Orientation = win32c.xlPageField
PivotTable.PivotFields('B').Position = 1
PivotTable.PivotFields('B').CurrentPage = 'b'
PivotTable.PivotFields('C').Orientation = win32c.xlPageField
PivotTable.PivotFields('C').Position = 2
PivotTable.PivotFields('C').CurrentPage = 5
PivotTable.PivotFields('D').Orientation = win32c.xlPageField
PivotTable.PivotFields('D').Position = 1
PivotTable.PivotFields('D').CurrentPage = 'd'
PivotTable.PivotFields('E').Orientation = win32c.xlPageField
PivotTable.PivotFields('E').Position = 3
PivotTable.PivotFields('E').CurrentPage = "(All)"
PivotTable.PivotFields('D').Orientation = win32c.xlColumnField
PivotTable.PivotFields('D').Position = 1
DataField = PivotTable.AddDataField(PivotTable.PivotFields('F'), Function = win32c.xlSum)
DataField = PivotTable.AddDataField(PivotTable.PivotFields('G'), Function = win32c.xlSum)
wb.SaveAs('excel_file.xlsx')
Excel.Application.Quit()
I think you have a blank field in your data. For example:
This will give you the error.
This will not.

Error while trying to get dictionary to pandas dataframe, python

I got "Pandas ValueError Arrays Must be All Same Length"
Before I start, I checked answers to similar problems and folks suggest to use something like:
DataFrame(dict([ (k,Series(v)) for k,v in d.iteritems() ]))
if you have only two values in dictionary or,
a = {'Links' : lines ,'Titles' : titles , 'Singers': finalsingers , 'Albums':finalalbums , 'Years' : years}
df = pd.DataFrame.from_dict(a, orient='index')
df.transpose()
But neither of them worked for me. What my code does is, goes to file in directory, captures the name and last_modified time, opens the file and use it in function called phash and returns a value. I think there could be a problem with phash function, maybe sometimes it returns a null value.
So in my case data is something like this:
raw_data = {}
hash_11 = []
time_1 = []
file_name_1 = []
for file in date_file_list:
try:
#print(file[1])
y = file[1]
file_name = os.path.basename(y) # extract just the filename or #file_name = os.path.split(file[1])
file_name = file_name.split('_-_')[0]
file_name_1.append(file_name)
#print(file_name)
# convert date tuple to MM/DD/YYYY HH:MM:SS format
#file_date = time.strftime("%m/%d/%y %H:%M:%S", file[0])
time = file[0]
time_1.append(time)
img = Image.open(str(file[1]))
hash_1 = imagehash.dhash(img)
hash_11.append(hash_1)
#hash_1 = str(hash_1)
#data = [hash_1, time, file_name]
#hamming_list.append(data)
#print(file_name, hash_1, file_date)
data ={'hash_1': hash_11,'time': time_1, 'file_name': file_name_1}
raw_data.update(data)
except:
pass
df = pd.DataFrame(raw_data, columns = ['hash_1', 'time','file_name'])

xlrd original value of the cell

I'm reading xls file using xlrd. The problem is, when xlrd reading value like this "12/09/2012", i get result like this "xldate:41252.0". When I use xlrd.xldate_as_tuple, i get this result:
(2016, 12, 10, 0, 0, 0)
My code:
curr_row = -1
while curr_row < num_rows:
curr_row += 1
row = worksheet.row(curr_row)
for x in xrange(num_cols):
field_type = worksheet.cell_type(curr_row, x)
if field_type == 3: # this is date
field_value = worksheet.cell_value(curr_row, x)
print worksheet.cell(curr_row, x).value
print xlrd.xldate_as_tuple(field_value, 1)
Result:
41252.0
(2016, 12, 10, 0, 0, 0)
Both results are wrong for me. How can i get original cell value "12/09/2012" using xlrd ?
According to the docstring, you should pass your workbook's datemode to xldate_as_tuple as a second parameter:
from datetime import datetime
import xlrd
book = xlrd.open_workbook("test.xls")
sheet = book.sheet_by_index(0)
a1 = sheet.cell_value(rowx=0, colx=0)
print a1 # prints 41252.0
print xlrd.xldate_as_tuple(a1, 1) # prints (2016, 12, 10, 0, 0, 0)
a1_tuple = xlrd.xldate_as_tuple(a1, book.datemode)
print a1_tuple # prints (2012, 12, 9, 0, 0, 0)
a1_datetime = datetime(*a1_tuple)
print a1_datetime.strftime("%m/%d/%Y") # prints 12/09/2012

Categories