Merge two excel files with multiple sheet without losing formatting - python

I want to merge multiple excel files with multiple sheets respectively using python. I do not want to lose any formatting from the sheets. It should copy all sheets and just create a single excel file.
I'm able to merge only the first sheet and also all formatting is lost.
This is my code:
import os
import os.path
import xlrd
import xlsxwriter
file_name = input("merge")
merged_file_name = file_name + ".xls"
dest_book = xlsxwriter.Workbook('m.xls')
dest_sheet_1 = dest_book.add_worksheet()
dest_row = 1
temp = 0
path = input("C:\\test")
out = os.path.isdir("")
print(out)
print("File path: " + path)
for root,dirs, files in os.walk("C:\\test"):
for xlsfile in files:
print ("File in mentioned folder is: " + xlsfile)
temp_book = xlrd.open_workbook(os.path.join(root,xlsfile))
temp_sheet = temp_book.sheet_by_index(0)
if temp == 0:
for col_index in range(temp_sheet.ncols):
str = temp_sheet.cell_value(0, col_index)
dest_sheet_1.write(0, col_index, str)
temp = temp + 1
for row_index in range(1, temp_sheet.nrows):
for col_index in range(temp_sheet.ncols):
str = temp_sheet.cell_value(row_index, col_index)
dest_sheet_1.write(dest_row, col_index, str)
dest_row = dest_row + 1
dest_book.close()
book = xlrd.open_workbook("m.xls")
sheet = book.sheet_by_index(0)
print ("number of rows in destination file are: "), sheet.nrows
print ("number of columns in destination file are: "), sheet.ncols

Since you require Excel specific needs like formatting, consider directly interfacing to the Excel object library with a COM interface. Of course this assumes you have Excel installed on machine. For Windows, Python can run COM with the win32com library and this connects beyond Excel but to most Windows apps and objects including Notepad, Paint, even ADODB.
Essentially, this mirrors VBA (which does a similar interface to the Excel object library) using Workbooks.Add, Sheets.Add, Range.Copy, and other methods. All other APIs such as xlrd and xlwriter do not directly use Excel methods and hence why you lose formatting even graphics but not data.
import os
import win32com.client as win32
path = input("C:\\test")
file_name = input("merge")
merged_file_name = file_name + ".xlsx"
try:
# INITIALIZE EXCEL COM APP
xlapp = win32.gencache.EnsureDispatch('Excel.Application')
# ASSIGN CONSTANTS
xlPasteValues = -4163; lPasteFormats = -4122; xlWorkbookDefault = 51
# CREATE NEW WOKRBOOK (PROMPTS IF EXISTS)
new_wb = xlapp.Workbooks.Add()
new_wb.SaveAs(Filename='MasterMerge.xlsx', FileFormat=xlWorkbookDefault)
# LOOP THROUGH WORKBOOKS
xl_files = [f for f in os.listdir(path) if f.endswith('.xls') or f.endswith('.xlsx')]
for wb in xl_files:
xlwb = xlapp.Workbooks.Open(os.path.join(path, wb))
# LOOP THROUGH EVERY WORKSHEET, COPYING TO NEW WORKSHEET
for xlsh in xlwb.Worksheets:
new_sh = new_wb.Worksheets.Add()
new_sh.Name = xlsh.Name
new_wb.Save()
new_sh.Move(After=new_wb.Worksheets(new_wb.Worksheets.Count))
xlsh.Cells.Copy(new_sh.Cells)
new_sh = None
xlwb.Close(False)
xlwb = None
# REMOVNIG DEFAULT SHEET AND LAUNCHING TO SCREEN
new_wb.Worksheets('Sheet1').Delete()
new_wb.Save()
xlapp.Visible = True
except Exception as e:
print(e)
finally:
# RELEASE RESOURCES
xlsh = None; new_sh = None;
xlwb = None; new_wb = None; xlapp = None

Regarding to the error faced by Sachin Ingle,
(-2147352567, 'Exception occurred.', (0, 'Microsoft Excel', 'That name is already taken. Try a different one.', 'xlmain11.chm', 0, -2146827284), None) facing this error
It's probably because you have created a file of same name before. Try create it with different name.
The answer by Parfait on
new_wb.SaveAs(Filename='MasterMerge.xlsx', FileFormat=xlWorkbookDefault)
will make the file named "MasterMerge.xlsx" and probably you have created the file already.
And btw can add in xlapp.Quit() at finally: block to solve the in-use problem
I done some changes on Parfait answer (Thanks mate)
def merge_excel_files(filepath_list,filename,delete_original_files=False):
import os, errno
import win32com.client as win32
try:
# INITIALIZE EXCEL COM APP
xlapp = win32.gencache.EnsureDispatch('Excel.Application')
# ASSIGN CONSTANTS
xlPasteValues = -4163; lPasteFormats = -4122; xlWorkbookDefault = 51
# CREATE NEW WOKRBOOK (PROMPTS IF EXISTS)
new_wb = xlapp.Workbooks.Add()
new_wb.SaveAs(Filename=filename, FileFormat=xlWorkbookDefault)
# Gain filename in a directory
# xl_files = [f for f in os.listdir(path) if f.endswith('.xls') or f.endswith('.xlsx')]
for wb in filepath_list:
xlwb = xlapp.Workbooks.Open(wb)
# LOOP THROUGH EVERY WORKSHEET, COPYING TO NEW WORKSHEET
for xlsh in xlwb.Worksheets:
new_sh = new_wb.Worksheets.Add()
new_sh.Name = xlsh.Name
new_wb.Save()
new_sh.Move(After=new_wb.Worksheets(new_wb.Worksheets.Count))
xlsh.Cells.Copy(new_sh.Cells)
new_sh = None
xlwb.Close(False)
xlwb = None
# REMOVNIG DEFAULT SHEET AND LAUNCHING TO SCREEN
new_wb.Worksheets('Sheet1').Delete()
new_wb.Save()
# xlapp.Visible = True
except Exception as e:
print(e)
finally:
# Close the Excel file since done writing
xlapp.Quit()
# RELEASE RESOURCES
xlsh = None; new_sh = None;
xlwb = None; new_wb = None; xlapp = None
# Delete the initial file
if delete_original_files:
for count,x in enumerate(filepath_list):
print(f"Deleting the {count+1}/{len(filepath_list)} original file(s)...")
try:
os.remove(x)
except OSError as e:
# No such file or directory
if e.errno != errno.ENOENT:
raise
else:
# If there's no exception
print(f"Deleted {x}")
## Merge Excel files into one workbook with keeping the sheets and styling/formatting
# => https://stackoverflow.com/questions/51986517/merge-two-excel-files-with-multiple-sheet-without-losing-formatting
# => https://stackoverflow.com/questions/44593705/how-to-copy-over-an-excel-sheet-to-another-workbook-in-python/44596301#44596301 [openpyxl (Can't keep formatting), pywin32, xlwings]
# => https://stackoverflow.com/questions/56687602/copy-excel-sheet-from-one-worksheet-to-another-in-python/56688138#56688138 [xlwings]
## Solve file in use problem with pywin32 solution from questions/51986517
# => https://stackoverflow.com/questions/6337595/python-win32-com-closing-excel-workbook/6338030
## Basic Python: Pythonic way to delete a files, running code if try statements were successful
# => https://stackoverflow.com/questions/10840533/most-pythonic-way-to-delete-a-file-which-may-not-exist
# => https://stackoverflow.com/questions/2792568/running-code-if-try-statements-were-successful-in-python
## Research on openpyxl copy_worksheet(); Conclusion: it can only copy and paste sheet within same workbook. =(
# => https://stackoverflow.com/questions/44593705/how-to-copy-over-an-excel-sheet-to-another-workbook-in-python/44596301
# => https://openpyxl.readthedocs.io/en/latest/tutorial.html?highlight=copy_worksheet#manipulating-a-workbook-in-memory

Related

Why am I getting 'subscript out of range' error for my macro?

Previously, I was getting a win32 error 'open method of excel workbooks failed'. But now, I'm getting an error in the excel macro I'm trying to run via python, 'subscript out of range'.
Screenshot of error
When I debug it, it outlines this line of code:
Sheets("2 RawData").Select
I know that the error is supposed to mean that there isn't a sheet with that name, but when I checked, it was there:
All sheets in excel file
And this is a screenshot of the errors from python:
Python errors
This is the code I'm trying to run to parse an excel document via macro:
if os.path.exists(self.excel_parser_location):
# print "Opening Telematics_Messages_Parser.xlsm in Excel"
xl = client.Dispatch("Excel.Application")
xl.Application.visible = False
wb = xl.Workbooks.Open(os.path.abspath(self.excel_parser_location), ReadOnly=True)
xl.Application.Run('DoThisFirst')
xl.DisplayAlerts = False
wb.DoNotPromptForConvert = True
wb.CheckCompatibility = False
# xl.Application.Run('SheetKiller')
xl.Application.Run('CleanUp')
xl.DisplayAlerts = False
wb.DoNotPromptForConvert = True
wb.CheckCompatibility = False
# If the file already exists
if os.path.exists(save_path):
# Remove the older save
os.remove(save_path)
wb.SaveAs(save_path)
# print "Saving Parsed_Messages.xlsm"
wb.Close(True)
del xl
Edit: I called this method before the method to run the macros:
if os.path.exists(csv_path):
#data = pd.read_csv(csv_path, error_bad_lines=False)
data = pd.read_csv(csv_path, on_bad_lines='skip')
book = openpyxl.load_workbook(self.excel_parser_location,keep_vba=True)
writer = pd.ExcelWriter(self.excel_parser_location)
writer.book = book
data.to_excel(writer, sheet_name='2 RawData', index=False)
# print 'Writing new data'
book.remove(book['2 RawData'])
# print 'Removing blank sheet'
book_sheet = book['2 RawData1']
book_sheet.title = '2 RawData'
# print 'Renaming sheet'
writer.save()
writer.close()
# print 'Saving Telematics_Messages_Parser.xlsm'
self.run_parsing_macro(sn)
os.remove(csv_path)
return True
It's supposed to copy data to the excel sheet from a downloaded csv file.

Python: Is there way how to write in to Excel Cell and export PDF in 1 loop

I do have sticky situation with my project. I am trying to update Excel Sheet and export it to PDF in one loop.
At moment I bevies’ best for this is openpyxl library.
Issue is that both are functions writing and printing are opening Excel different way.. using:
book = openpyxl.load_workbook(excel_file) and
wb = excel.Workbooks.Open(excel_file).
Both functions are crossing each other and creating permission issues (at least it is looking like it) plus crashing Jupyter :).
PLEASE is there any elegant way how to do this or I really need 2 loops?
Error call example:
PermissionError: [Errno 13] Permission denied: 'C:/Users/admin/test_files/dir#$$.xlsx'
Code is looking like this:
def update_directory():
excel_file = r'C:/Users/admin/test_files/doo.xlsx'
excel = client.DispatchEx("Excel.Application")
excel.Visible = 0
folder_selected = filedialog.askdirectory()
os.chdir(folder_selected)
for root, dirs, files in os.walk(".", topdown=False):
for name in dirs:
a_pth = os.getcwd()
pdf_file = os.path.join(a_pth,name," ")+"Dic_"+"%s.pdf" % name
book = openpyxl.load_workbook(excel_file)
sheet= book['Sheet1']
sheet.cell(row=4, column=6).value = name
book.save(excel_file)
wb = excel.Workbooks.Open(excel_file)
ws = wb.Worksheets[1]
ws.SaveAs(pdf_file, FileFormat=57)
wb.Close() # <- need to be part of loop (comment from Amiga500). File save
# prompt from Excell present.
excel.Exit()
Having an entry
wb.application.displayalerts = False
Inserted just before the
wb.Close()
line seems to have worked for me, so the code snippet would resemble
book = openpyxl.load_workbook(excel_file)
sheet= book['Sheet1']
sheet.cell(row=4, column=6).value = name
book.save(excel_file)
wb = excel.Workbooks.Open(excel_file)
ws = wb.Worksheets[1]
ws.SaveAs(pdf_file, FileFormat=57)
wb.application.displayalerts = False #This stops the popup asking for a save
wb.Close() # <- need to be part of loop (comment from Amiga500). File save
# prompt from Excell present.
Note wb.Close() is at same indentation as the rest of inner for loop.

How to create a pivot table in Excel with python win32com

Given an existing Excel file, with data in a long format
Automate creating the following pivot table in Excel with the Python win32com module
Following is code to setup test.xlsx with data and connect to create a Excel com object
Imports
import win32com.client as win32
from pathlib import Path
import sys
import pandas as pd
import numpy as np
import random
from datetime import datetime
win32c = win32.constants
Function to create test.xlsx
This function is only to provide test data and a file
def create_test_excel_file(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
random.seed(365)
np.random.seed(365)
number_of_data_rows = 1000
# create list of 31 dates
dates = pd.bdate_range(datetime(2020, 7, 1), freq='1d', periods=31).tolist()
data = {'date': [random.choice(dates) for _ in range(number_of_data_rows)],
'expense': [random.choice(['business', 'personal']) for _ in range(number_of_data_rows)],
'products': [random.choice(['book', 'ribeye', 'coffee', 'salmon', 'alcohol', 'pie']) for _ in range(number_of_data_rows)],
'price': np.random.normal(15, 5, size=(1, number_of_data_rows))[0]}
pd.DataFrame(data).to_excel(filename, index=False, sheet_name=sheet_name, float_format='%.2f')
Function to create Excel com object
def run_excel(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
# try except for file / path
try:
wb = excel.Workbooks.Open(filename)
except com_error as e:
if e.excepinfo[5] == -2146827284:
print(f'Failed to open spreadsheet. Invalid filename or location: {filename}')
else:
raise e
sys.exit(1)
# set worksheet
ws1 = wb.Sheets('data')
# wb.Close(True)
# excel.Quit()
Main
def main():
# sheet name for data
sheet_name = 'data' # update with sheet name from your file
# file path
f_path = Path.cwd() # file in current working directory
# f_path = Path(r'c:\...\Documents') # file located somewhere else
# excel file
f_name = 'test.xlsx'
# function calls
create_test_excel_file(f_path, f_name, sheet_name) # remove when running your own file
run_excel(f_path, f_name, sheet_name)
A helpful way to figure out the proper Excel methods to use, is record a step-by-step Macro in Excel, while creating a pivot table in the form you want.
This is useful for creating a pivot table that has to be run on a routine basis in a file with existing data.
Uses the imports and methods from the question
To modify this code for a new data file
Update def main
sheet_name
f_path
f_name
Update def run_excel
ws1
ws2_name
pt_name
pt_rows
pt_cols
pt_filters
pt_fields
Call main() to run code
pivot_table function
def pivot_table(wb: object, ws1: object, pt_ws: object, ws_name: str, pt_name: str, pt_rows: list, pt_cols: list, pt_filters: list, pt_fields: list):
"""
wb = workbook1 reference
ws1 = worksheet1
pt_ws = pivot table worksheet number
ws_name = pivot table worksheet name
pt_name = name given to pivot table
pt_rows, pt_cols, pt_filters, pt_fields: values selected for filling the pivot tables
"""
# pivot table location
pt_loc = len(pt_filters) + 2
# grab the pivot table source data
pc = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=ws1.UsedRange)
# create the pivot table object
pc.CreatePivotTable(TableDestination=f'{ws_name}!R{pt_loc}C1', TableName=pt_name)
# selecte the pivot table work sheet and location to create the pivot table
pt_ws.Select()
pt_ws.Cells(pt_loc, 1).Select()
# Sets the rows, columns and filters of the pivot table
for field_list, field_r in ((pt_filters, win32c.xlPageField), (pt_rows, win32c.xlRowField), (pt_cols, win32c.xlColumnField)):
for i, value in enumerate(field_list):
pt_ws.PivotTables(pt_name).PivotFields(value).Orientation = field_r
pt_ws.PivotTables(pt_name).PivotFields(value).Position = i + 1
# Sets the Values of the pivot table
for field in pt_fields:
pt_ws.PivotTables(pt_name).AddDataField(pt_ws.PivotTables(pt_name).PivotFields(field[0]), field[1], field[2]).NumberFormat = field[3]
# Visiblity True or Valse
pt_ws.PivotTables(pt_name).ShowValuesRow = True
pt_ws.PivotTables(pt_name).ColumnGrand = True
Update run_excel to call pivot_table
def run_excel(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
# try except for file / path
try:
wb = excel.Workbooks.Open(filename)
except com_error as e:
if e.excepinfo[5] == -2146827284:
print(f'Failed to open spreadsheet. Invalid filename or location: {filename}')
else:
raise e
sys.exit(1)
# set worksheet
ws1 = wb.Sheets('data')
# Setup and call pivot_table
ws2_name = 'pivot_table'
wb.Sheets.Add().Name = ws2_name
ws2 = wb.Sheets(ws2_name)
pt_name = 'example'
pt_rows = ['expense']
pt_cols = ['products']
pt_filters = ['date']
# [0]: field name [1]: pivot table column name [3]: calulation method [4]: number format
pt_fields = [['price', 'price: mean', win32c.xlAverage, '$#,##0.00'],
['price', 'price: sum', win32c.xlSum, '$#,##0.00'],
['price', 'price: count', win32c.xlCount, '0']]
pivot_table(wb, ws1, ws2, ws2_name, pt_name, pt_rows, pt_cols, pt_filters, pt_fields)
# wb.Close(True)
# excel.Quit()
Resources
Jupyter Notebook: How to Create a Pivot Table in Excel with the Python win32com Module
Automate Excel with Python
Examples with Pivot Table
Using Python win32com to get list of Excel worksheets
Excel VBA reference
Workbook object (Excel)
Worksheet object (Excel)

Copy excel sheet from one worksheet to another in Python

All I want to do is copy a worksheet from an excel workbook to another excel workbook in Python.
I want to maintain all formatting (coloured cells, tables etc.)
I have a number of excel files and I want to copy the first sheet from all of them into one workbook. I also want to be able to update the main workbook if changes are made to any of the individual workbooks.
It's a code block that will run every few hours and update the master spreadsheet.
I've tried pandas, but it doesn't maintain formatting and tables.
I've tried openpyxl to no avail
I thought xlwings code below would work:
import xlwings as xw
wb = xw.Book('individual_files\\file1.xlsx')
sht = wb.sheets[0]
new_wb = xw.Book('Master Spreadsheet.xlsx')
new_wb.sheets["Sheet1"] = sht
But I just get the error:
----> 4 new_wb.sheets["Sheet1"] = sht
AttributeError: __setitem__
"file1.xlsx" above is an example first excel file.
"Master Spreadsheet.xlsx" is my master spreadsheet with all individual files.
In the end I did this:
def copyExcelSheet(sheetName):
read_from = load_workbook(item)
#open(destination, 'wb').write(open(source, 'rb').read())
read_sheet = read_from.active
write_to = load_workbook("Master file.xlsx")
write_sheet = write_to[sheetName]
for row in read_sheet.rows:
for cell in row:
new_cell = write_sheet.cell(row=cell.row, column=cell.column,
value= cell.value)
write_sheet.column_dimensions[get_column_letter(cell.column)].width = read_sheet.column_dimensions[get_column_letter(cell.column)].width
if cell.has_style:
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
write_sheet.merge_cells('C8:G8')
write_sheet.merge_cells('K8:P8')
write_sheet.merge_cells('R8:S8')
write_sheet.add_table(newTable("table1","C10:G76","TableStyleLight8"))
write_sheet.add_table(newTable("table2","K10:P59","TableStyleLight9"))
write_to.save('Master file.xlsx')
read_from.close
With this to check if the sheet already exists:
#checks if sheet already exists and updates sheet if it does.
def checkExists(sheetName):
book = load_workbook("Master file.xlsx") # open an Excel file and return a workbook
if sheetName in book.sheetnames:
print ("Removing sheet",sheetName)
del book[sheetName]
else:
print ("No sheet ",sheetName," found, will create sheet")
book.create_sheet(sheetName)
book.save('Master file.xlsx')
with this to create new tables:
def newTable(tableName,ref,styleName):
tableName = tableName + ''.join(random.choices(string.ascii_uppercase + string.digits + string.ascii_lowercase, k=15))
tab = Table(displayName=tableName, ref=ref)
# Add a default style with striped rows and banded columns
tab.tableStyleInfo = TableStyleInfo(name=styleName, showFirstColumn=False,showLastColumn=False, showRowStripes=True, showColumnStripes=True)
return tab
Adapted from this solution, but note that in my (limited) testing (and as observed in the other Q&A), this does not support the After parameter of the Copy method, only Before. If you try to use After, it creates a new workbook instead.
import xlwings as xw
wb = xw.Book('individual_files\\file1.xlsx')
sht = wb.sheets[0]
new_wb = xw.Book('Master Spreadsheet.xlsx')
# copy this sheet into the new_wb *before* Sheet1:
sht.api.Copy(Before=new_wb.sheets['Sheet1'].api)
# now, remove Sheet1 from new_wb
new_wb.sheets['Sheet1'].delete()
This can be done using pywin32 directly. The Before or After parameter needs to be provided (see the api docs), and the parameter needs to be a worksheet <object>, not simply a worksheet Name or index value. So, for example, to add it to the end of an existing workbook:
def copy_sheet_within_excel_file(excel_filename, sheet_name_or_number_to_copy):
excel_app = win32com_client.gencache.EnsureDispatch('Excel.Application')
wb = excel_app.Workbooks.Open(excel_filename)
wb.Worksheets[sheet_name_or_number_to_copy].Copy(After=wb.Worksheets[wb.Worksheets.Count])
new_ws = wb.ActiveSheet
return new_ws
As most of my code runs on end-user machines, I don't like to make assumptions whether Excel is open or not so my code determines if Excel is already open (see GetActiveObject), as in:
try:
excel_app = win32com_client.GetActiveObject('Excel.Application')
except com_error:
excel_app = win32com_client.gencache.EnsureDispatch('Excel.Application')
And then I also check to see if the workbook is already loaded (see Workbook.FullName). Iterate through the Application.Workbooks testing the FullName to see if the file is already open. If so, grab that wb as your wb handle.
You might find this helpful for digging around the available Excel APIs directly from pywin32:
def show_python_interface_modules():
os.startfile(os.path.dirname(win32com_client.gencache.GetModuleForProgID('Excel.Application').__file__))

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories