How to create a pivot table in Excel with python win32com

How to create a pivot table in Excel with python win32com - python

Given an existing Excel file, with data in a long format
Automate creating the following pivot table in Excel with the Python win32com module
Following is code to setup test.xlsx with data and connect to create a Excel com object
Imports
import win32com.client as win32
from pathlib import Path
import sys
import pandas as pd
import numpy as np
import random
from datetime import datetime
win32c = win32.constants
Function to create test.xlsx
This function is only to provide test data and a file
def create_test_excel_file(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
random.seed(365)
np.random.seed(365)
number_of_data_rows = 1000
# create list of 31 dates
dates = pd.bdate_range(datetime(2020, 7, 1), freq='1d', periods=31).tolist()
data = {'date': [random.choice(dates) for _ in range(number_of_data_rows)],
'expense': [random.choice(['business', 'personal']) for _ in range(number_of_data_rows)],
'products': [random.choice(['book', 'ribeye', 'coffee', 'salmon', 'alcohol', 'pie']) for _ in range(number_of_data_rows)],
'price': np.random.normal(15, 5, size=(1, number_of_data_rows))[0]}
pd.DataFrame(data).to_excel(filename, index=False, sheet_name=sheet_name, float_format='%.2f')
Function to create Excel com object
def run_excel(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
# try except for file / path
try:
wb = excel.Workbooks.Open(filename)
except com_error as e:
if e.excepinfo[5] == -2146827284:
print(f'Failed to open spreadsheet. Invalid filename or location: {filename}')
else:
raise e
sys.exit(1)
# set worksheet
ws1 = wb.Sheets('data')
# wb.Close(True)
# excel.Quit()
Main
def main():
# sheet name for data
sheet_name = 'data' # update with sheet name from your file
# file path
f_path = Path.cwd() # file in current working directory
# f_path = Path(r'c:\...\Documents') # file located somewhere else
# excel file
f_name = 'test.xlsx'
# function calls
create_test_excel_file(f_path, f_name, sheet_name) # remove when running your own file
run_excel(f_path, f_name, sheet_name)

A helpful way to figure out the proper Excel methods to use, is record a step-by-step Macro in Excel, while creating a pivot table in the form you want.
This is useful for creating a pivot table that has to be run on a routine basis in a file with existing data.
Uses the imports and methods from the question
To modify this code for a new data file
Update def main
sheet_name
f_path
f_name
Update def run_excel
ws1
ws2_name
pt_name
pt_rows
pt_cols
pt_filters
pt_fields
Call main() to run code
pivot_table function
def pivot_table(wb: object, ws1: object, pt_ws: object, ws_name: str, pt_name: str, pt_rows: list, pt_cols: list, pt_filters: list, pt_fields: list):
"""
wb = workbook1 reference
ws1 = worksheet1
pt_ws = pivot table worksheet number
ws_name = pivot table worksheet name
pt_name = name given to pivot table
pt_rows, pt_cols, pt_filters, pt_fields: values selected for filling the pivot tables
"""
# pivot table location
pt_loc = len(pt_filters) + 2
# grab the pivot table source data
pc = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=ws1.UsedRange)
# create the pivot table object
pc.CreatePivotTable(TableDestination=f'{ws_name}!R{pt_loc}C1', TableName=pt_name)
# selecte the pivot table work sheet and location to create the pivot table
pt_ws.Select()
pt_ws.Cells(pt_loc, 1).Select()
# Sets the rows, columns and filters of the pivot table
for field_list, field_r in ((pt_filters, win32c.xlPageField), (pt_rows, win32c.xlRowField), (pt_cols, win32c.xlColumnField)):
for i, value in enumerate(field_list):
pt_ws.PivotTables(pt_name).PivotFields(value).Orientation = field_r
pt_ws.PivotTables(pt_name).PivotFields(value).Position = i + 1
# Sets the Values of the pivot table
for field in pt_fields:
pt_ws.PivotTables(pt_name).AddDataField(pt_ws.PivotTables(pt_name).PivotFields(field[0]), field[1], field[2]).NumberFormat = field[3]
# Visiblity True or Valse
pt_ws.PivotTables(pt_name).ShowValuesRow = True
pt_ws.PivotTables(pt_name).ColumnGrand = True
Update run_excel to call pivot_table
def run_excel(f_path: Path, f_name: str, sheet_name: str):
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True # False
# try except for file / path
try:
wb = excel.Workbooks.Open(filename)
except com_error as e:
if e.excepinfo[5] == -2146827284:
print(f'Failed to open spreadsheet. Invalid filename or location: {filename}')
else:
raise e
sys.exit(1)
# set worksheet
ws1 = wb.Sheets('data')
# Setup and call pivot_table
ws2_name = 'pivot_table'
wb.Sheets.Add().Name = ws2_name
ws2 = wb.Sheets(ws2_name)
pt_name = 'example'
pt_rows = ['expense']
pt_cols = ['products']
pt_filters = ['date']
# [0]: field name [1]: pivot table column name [3]: calulation method [4]: number format
pt_fields = [['price', 'price: mean', win32c.xlAverage, '$#,##0.00'],
['price', 'price: sum', win32c.xlSum, '$#,##0.00'],
['price', 'price: count', win32c.xlCount, '0']]
pivot_table(wb, ws1, ws2, ws2_name, pt_name, pt_rows, pt_cols, pt_filters, pt_fields)
# wb.Close(True)
# excel.Quit()
Resources
Jupyter Notebook: How to Create a Pivot Table in Excel with the Python win32com Module
Automate Excel with Python
Examples with Pivot Table
Using Python win32com to get list of Excel worksheets
Excel VBA reference
Workbook object (Excel)
Worksheet object (Excel)

Related

ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'

I am trying to create an excel file of 3 columns: System Date, Time, Value on a webpage at that time.
Intention is to create a dataframe of the 3 values, every time the code runs, and append the dataframe to existing excel workbook (with one existing sheet).
I am able to create dataframe every time code runs, but when I try to append it to an excel file, it throws error:
ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'
Can you please suggest, where am I going wrong.
# Importing Libraries
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
import openpyxl
#getting today's date amd formatting it
now = datetime.now()
Date = now.strftime ("%d/%m/%Y")
Time = now.strftime ("%H:%M")
# GET request to scrape. 'Page' variable to assign contents
page = requests.get("https://www.traderscockpit.com/?pageView=live-nse-advance-decline-ratio-chart")
# Create BeautifulSoup object to parse content
soup = BeautifulSoup(page.content, 'html.parser')
adv = soup.select_one('a:-soup-contains("Advanced:")').next_sibling.strip()
dec = soup.select_one('a:-soup-contains("Declined:")').next_sibling.strip()
ADratio = round(int(adv)/int(dec), 2)
df = pd.DataFrame({tuple([Date, Time, ADratio])})
#Load workbook and read last used row
path = r'C:\Users\kashk\OneDrive\Documents\ADratios.xlsx'
writer = pd.ExcelWriter (path, engine='openpyxl', mode = 'a')
wb = openpyxl.load_workbook(path)
startrow = writer.sheets['Sheet1'].max_row
#Append data frame to existing table in existing sheet
df.to_excel (writer, sheet_name = 'Sheet1', index = False, header = False, startrow = startrow)
writer.save()
writer.close()

A fast and easy solution would be upgrading your pandas > 1.4.0 since it provides a if_sheet_exists = 'overlay' Source
pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='overlay')
If you don't want to upgrade your pandas, there is a way to work around by removing and re-write the sheet into the excel file. (Not recommended if you have a lot of records since it will be slow).
path, sheet_name = 'ADratios.xlsx' , 'Sheet 1'
df.columns = ['Date','Time','ADratio']
with pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
book = openpyxl.load_workbook(path, 'r')
df_bak = pd.read_excel(path)
writer.book = openpyxl.load_workbook(path)
writer.book.remove(writer.book.worksheets[writer.book.sheetnames.index(sheet_name)])
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
pd.concat([df_bak, df], axis=0).to_excel(writer, sheet_name=sheet_name, index = False)

TypeError: 'bool' object is not callable in python, while saving the workbook

I am trying to create a script where I first read a macro file, save it as a xlsx file and perform some tasks on it. Creating a subset dataframe from this excel file. After refreshing this excel again a subset dataframe is extracted and then these two distinct files with dataframes are compared.
The script runs fine for the first time, but next time it shows the below error:
Traceback (most recent call last):
File "C:\Users\1234\AppData\Local\Programs\Python\Python38-32\Scripts\sell_triggers1.py", line 47, in <module>
wb.Save()
TypeError: 'bool' object is not callable
If I close all the excel instances from the windows task manager, then script will work fine.
Please help where I am going wrong. I am very new to Python and can't figure out the issue. Thanks
import win32com.client
import pandas as pd
import os
import smtplib
import numpy as np
import time
excel = win32com.client.DispatchEx('Excel.Application')
# Load the .XLSM file into Excel
wb = excel.Workbooks.Open(r'D:\\MY DOCUMENTS\\WATCHLIST.xlsm')
# Save it in .XLSX format to a different filename
excel.DisplayAlerts = False
wb.DoNotPromptForConvert = True
wb.CheckCompatibility = False
wb.SaveAs(r"WATCHLIST1.xlsx", FileFormat=51, ConflictResolution=2)
wb.Close(True)
excel.Application.Quit()
# Load the .XLSX file into Pandas
df = pd.read_excel("C:\\Users\\1234\\Documents\\WATCHLIST1.xlsx", sheet_name='TRADES')
df1 = df[["Ticker1","Current_price","Condition(Current_price>STOP SMAW10)"]].copy()
df2 = df1[(df1["Condition(Current_price>STOP SMAW10)"] == 2)]
df2.to_excel("watchlist2.xlsx", index= False)
# refreshing the workbook and saving it
xlapp = win32com.client.DispatchEx('Excel.Application')
wb = xlapp.Workbooks.Open(r'C:\\Users\\1234\\Documents\\WATCHLIST1.xlsx')
wb.RefreshAll()
xlapp.CalculateUntilAsyncQueriesDone()
xlapp.DisplayAlerts = False
time.sleep(20)
wb.Save()
wb.Close(True)
xlapp.Application.Quit()
# again reading the watchlist and storing the filtered output to an excel file after refresh
df = pd.read_excel("C:\\Users\\1234\\Documents\\WATCHLIST1.xlsx", sheet_name='TRADES')
df1 = df[["Ticker1","Current_price","Condition(Current_price>STOP SMAW10)"]].copy()
df2 = df1[(df1["Condition(Current_price>STOP SMAW10)"] == 2)]
df2.to_excel("watchlist3.xlsx", index= False)
# reading two different output files
df1 = pd.read_excel("C:\\Users\\1234\\AppData\\Local\\Programs\\Python\\Python38-32\\Scripts\\watchlist2.xlsx")
df2 = pd.read_excel("C:\\Users\\1234\\AppData\\Local\\Programs\\Python\\Python38-32\\Scripts\\watchlist3.xlsx")
# preparing a comparison output
df1 = df1.set_index('Ticker1')
df2 = df2.set_index('Ticker1')
df3 = pd.concat([df1,df2],sort=False)
df3a = df3.stack().groupby(level=[0,1]).unique().unstack(1).copy()
df3a.loc[~df3a.index.isin(df2.index),'status'] = 'deleted' # if not in df2 index then deleted
df3a.loc[~df3a.index.isin(df1.index),'status'] = 'new' # if not in df1 index then new
idx = df3.stack().groupby(level=[0,1]).nunique() # get modified cells.
df3a.loc[idx.mask(idx <= 1).dropna().index.get_level_values(0),'status'] = 'modified'
df3a['status'] = df3a['status'].fillna('same') # assume that anything not fufilled by above rules is the same.
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('./watchlist4.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
df3a.to_excel(writer, sheet_name='Data')
# set column width for 'Data'.
worksheet = writer.sheets['Data']
worksheet.set_column(0, 2, 15)
# Close the Pandas Excel writer and output the Excel file.
writer.save()

Copy excel sheet from one worksheet to another in Python

All I want to do is copy a worksheet from an excel workbook to another excel workbook in Python.
I want to maintain all formatting (coloured cells, tables etc.)
I have a number of excel files and I want to copy the first sheet from all of them into one workbook. I also want to be able to update the main workbook if changes are made to any of the individual workbooks.
It's a code block that will run every few hours and update the master spreadsheet.
I've tried pandas, but it doesn't maintain formatting and tables.
I've tried openpyxl to no avail
I thought xlwings code below would work:
import xlwings as xw
wb = xw.Book('individual_files\\file1.xlsx')
sht = wb.sheets[0]
new_wb = xw.Book('Master Spreadsheet.xlsx')
new_wb.sheets["Sheet1"] = sht
But I just get the error:
----> 4 new_wb.sheets["Sheet1"] = sht
AttributeError: __setitem__
"file1.xlsx" above is an example first excel file.
"Master Spreadsheet.xlsx" is my master spreadsheet with all individual files.

In the end I did this:
def copyExcelSheet(sheetName):
read_from = load_workbook(item)
#open(destination, 'wb').write(open(source, 'rb').read())
read_sheet = read_from.active
write_to = load_workbook("Master file.xlsx")
write_sheet = write_to[sheetName]
for row in read_sheet.rows:
for cell in row:
new_cell = write_sheet.cell(row=cell.row, column=cell.column,
value= cell.value)
write_sheet.column_dimensions[get_column_letter(cell.column)].width = read_sheet.column_dimensions[get_column_letter(cell.column)].width
if cell.has_style:
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
write_sheet.merge_cells('C8:G8')
write_sheet.merge_cells('K8:P8')
write_sheet.merge_cells('R8:S8')
write_sheet.add_table(newTable("table1","C10:G76","TableStyleLight8"))
write_sheet.add_table(newTable("table2","K10:P59","TableStyleLight9"))
write_to.save('Master file.xlsx')
read_from.close
With this to check if the sheet already exists:
#checks if sheet already exists and updates sheet if it does.
def checkExists(sheetName):
book = load_workbook("Master file.xlsx") # open an Excel file and return a workbook
if sheetName in book.sheetnames:
print ("Removing sheet",sheetName)
del book[sheetName]
else:
print ("No sheet ",sheetName," found, will create sheet")
book.create_sheet(sheetName)
book.save('Master file.xlsx')
with this to create new tables:
def newTable(tableName,ref,styleName):
tableName = tableName + ''.join(random.choices(string.ascii_uppercase + string.digits + string.ascii_lowercase, k=15))
tab = Table(displayName=tableName, ref=ref)
# Add a default style with striped rows and banded columns
tab.tableStyleInfo = TableStyleInfo(name=styleName, showFirstColumn=False,showLastColumn=False, showRowStripes=True, showColumnStripes=True)
return tab

Adapted from this solution, but note that in my (limited) testing (and as observed in the other Q&A), this does not support the After parameter of the Copy method, only Before. If you try to use After, it creates a new workbook instead.
import xlwings as xw
wb = xw.Book('individual_files\\file1.xlsx')
sht = wb.sheets[0]
new_wb = xw.Book('Master Spreadsheet.xlsx')
# copy this sheet into the new_wb *before* Sheet1:
sht.api.Copy(Before=new_wb.sheets['Sheet1'].api)
# now, remove Sheet1 from new_wb
new_wb.sheets['Sheet1'].delete()

This can be done using pywin32 directly. The Before or After parameter needs to be provided (see the api docs), and the parameter needs to be a worksheet <object>, not simply a worksheet Name or index value. So, for example, to add it to the end of an existing workbook:
def copy_sheet_within_excel_file(excel_filename, sheet_name_or_number_to_copy):
excel_app = win32com_client.gencache.EnsureDispatch('Excel.Application')
wb = excel_app.Workbooks.Open(excel_filename)
wb.Worksheets[sheet_name_or_number_to_copy].Copy(After=wb.Worksheets[wb.Worksheets.Count])
new_ws = wb.ActiveSheet
return new_ws
As most of my code runs on end-user machines, I don't like to make assumptions whether Excel is open or not so my code determines if Excel is already open (see GetActiveObject), as in:
try:
excel_app = win32com_client.GetActiveObject('Excel.Application')
except com_error:
excel_app = win32com_client.gencache.EnsureDispatch('Excel.Application')
And then I also check to see if the workbook is already loaded (see Workbook.FullName). Iterate through the Application.Workbooks testing the FullName to see if the file is already open. If so, grab that wb as your wb handle.
You might find this helpful for digging around the available Excel APIs directly from pywin32:
def show_python_interface_modules():
os.startfile(os.path.dirname(win32com_client.gencache.GetModuleForProgID('Excel.Application').__file__))

How to read specific sheets from My XLS file in Python

As of now i can read EXCEL file's all sheet.
e.msgbox("select Excel File")
updated_deleted_xls = e.fileopenbox()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
openfile = e.fileopenbox()
for sheet in book.sheets():
for row in range(sheet.nrows):
for col in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]

If you open your editor from the desktop or command line, you would have to specify the file path while trying to read the file:
import pandas as pd
df = pd.read_excel(r'File path', sheet_name='Sheet name')
Alternatively, if you open your editor in the file's directory, then you could read directly using the panda library
import pandas as pd
df = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='Title Sheet')
df1 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx',sheet_name='Transactions')
df2 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='NewCustomerList')
df3 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerDemographic')
df4 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerAddress')

Maybe Pandaswould be helpful ( the go-to package for data) :
import pandas as pd
df = pd.read_excel('filname.xls', sheet = 0)
Edit: Since a lot of time has passed and pandas matured the arguemnts have change. So for pandas >1.0.0
import pandas as pd
df = pd.read_excel('filname.xls', sheet_name = 0)

You can use book.sheet_by_name() to read specific sheets by their name from xls file.
for name, sheet_name in zip(filename, sheetnumber):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
filename is the path to your xls file. Specify the sheet number you need to read in sheetnumber.
Alternatively, you could use book.sheet_by_index() and pass argument to return a specific sheet.
From docs:
sheet_by_index(sheetx)
Parameters: sheetx – Sheet index in range(nsheets)
For example:
first_sheet = book.sheet_by_index(0) # returns the first sheet.

You can use either book.sheet_by_name() or book.get_sheet()
Example using get_sheet()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet = book.get_sheet(0) #Gets the first sheet.
Example using sheet_by_name()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet_names = book.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
MoreInfo on getting sheet by sheet_by_name

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?

Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)

Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.

from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df

Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.

Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)

Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to create a pivot table in Excel with python win32com - python

Related

ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'

TypeError: 'bool' object is not callable in python, while saving the workbook

Copy excel sheet from one worksheet to another in Python

How to read specific sheets from My XLS file in Python

From password-protected Excel file to pandas DataFrame

Categories

Resources