openpyxl is overwriting any work done by win32 macro in Excel - python

I am currently writing a script that requires me to update data in an Excel sheet and run a macro. After running the macro, I am trying to save the file using win32, in order to be able to run through a similar process again. However, I have linked everything back to a weird issue with win32 and openpyxl. It seems that if I run the macro using win32, save the file and open it, everything looks exactly how it should. But if I then use openpyxl to add something to that file after running the macro, the entire results from the macro are then deleted. For example, the macro fills in cells A4:H23 with the correct data, but then after saving with win32 and reopening and editing with openpyxl, if I try to add any value to cell A3 afterwards and open up the file, the only thing that shows is the value in A3 and nothing that the macro should have done. I can open the sheet manually and add the number and resave it perfectly fine, but when I use openpyxl to do so, it clears all the macro results. I have included my code below for reference. Any help would be appreciated.
from openpyxl import load_workbook
import os
import win32com.client
import re
import time
Macro1 = r"C:\Users\...\Documents\Macro1Demo.xlsm"
Macro2 = r"C:\Users\...\Documents\Macro2Demo.xlsm"
MiddleManTest = r"C:\Users\...\Documents\MiddleManTest.xlsm"
MiddleMan2 = r"C:\Users\...\Documents\MiddleMan2.xlsm"
DistrictList = r"C:\Users\...\Documents\DistrictList.xlsx"
Macro1WB = load_workbook(filename = Macro1, keep_vba = True)
Macro2WB = load_workbook(filename = Macro2, keep_vba = True)
MM2WB = load_workbook(filename = MiddleMan2, keep_vba = True)
ListWB = load_workbook(filename = DistrictList, keep_vba = True)
ListSheet = ListWB['Sheet1']
M1Sheet = Macro1WB['Entry']
M2Sheet = Macro2WB['Raw']
for i in range(2,3):
Macro1WB['Entry']['A2'].value = ListWB['Sheet1']['A' + str(i)].value
Macro1WB['Entry']['B2'].value = ListWB['Sheet1']['B' + str(i)].value
OrgName = Macro1WB['Entry']['A2'].value
DistrictName = Macro1WB['Entry']['B2'].value
Macro1WB.save(Macro1)
SimpleOrgName = re.sub('[/\:*?<>|"]','',OrgName)
SimpleDistrictName = re.sub('[/\:*?<>|"]','',DistrictName)
NewFileName = SimpleOrgName + "(" + SimpleDistrictName + ")"
print(Macro1WB['Entry']['A2'].value,Macro1WB['Entry']['B2'].value)
print("Org/District Copied.")
if os.path.exists(r"C:\Users\...\Documents\Macro1Demo.xlsm"):
xl = win32com.client.Dispatch("Excel.application")
xl.visible = True
workbook = xl.Workbooks.Open(os.path.abspath(r"C:\Users\...\Documents\Macro1Demo.xlsm"))
xl.Application.Run("Macro1Demo.xlsm!Module1.AdvancedFilter")
print("Data filtered.")
workbook.SaveAs(MiddleMan2)
xl.Application.Quit()
MM2WB['Entry']['A3'] = i
MM2WB.save(MiddleMan2)

Related

Is there a way to read the calcuated value in excel using openpyxl without data_only = True?

When I use data_only = True, it gets rid of all of the formulas and I don't want that. I just want it to be able to reference the actual calculated value and then do stuff with it. Then I want to be able to go back into the spreadsheet and have it work as it did before.
The only solution I can think of is to basically have python execute the command that excel does, but I don't even know if that's possible. For example if on Sheet1, cell A5 = "Sheet1!A1^2", would it be possible or practical to ask python to go into Sheet1A1 and grab the value from it?
I tried using data_only = True and it ruined my spreadsheet as it got rid of all the relationships between the cells.
So what I did is I opened the same workbook twice. Once as data_only = True the other as data_only = False. I only then saved the workbook that was data_only = False. That seemed to work.
Here's the relevant code:
import os, os.path
import openpyxl
import glob
Files = glob.glob('./*.csv')
directory_path = os.getcwd()
file_location = directory_path
file_name1 = "Navigator.xlsx"
path1 = file_location + "\\" + file_name1
wb = openpyxl.load_workbook(path1, data_only = False)
wbd = openpyxl.load_workbook(path1, data_only = True)
Worksheet1_Name = "NavLog"
Worksheet2_Name = "Departure-Arrival"
ws = wb[Worksheet1_Name]
wsd = wbd[Worksheet1_Name]
#Do whatever it is you want to do here
wb.save(file_name1)

with statement python __enter__ attribute error

This :
def add_to_excel(list_to_save, file_to_save_in):
my_file = dir_path + '\\' + file_to_save_in
with openpyxl.load_workbook(filename=my_file) as links_excel:
sheet = links_excel['Sheet1']
for i in list_to_save:
sheet.append(i)
links_excel.save(filename)
return
returns this:
3 my_file = dir_path + '\\' + file_to_save_in
----> 4 with openpyxl.load_workbook(filename=my_file) as links_excel:
5 sheet = links_excel['Sheet1']
6 for i in list_to_save:
AttributeError: __enter__
Tried this:
You're not using with statement and there's no close() statement so if this is not the first time you're running the code, it's likely that you haven't closed the file properly and it is still sitting in the memory and prevents access.
Edit:
Apparently closing the excel fixes it, and the with statement is not needed.
links_excel.close()
def add_to_excel(list_to_save, file_to_save_in):
my_file = os.path.join(dir_path, file_to_save_in)
links_excel=openpyxl.load_workbook(filename=my_file)
sheet = links_excel['Sheet1']
for i in list_to_save:
sheet.append(i)
links_excel.save(my_file)
links_excel.close()
from openpyxl documentation
Read an existing workbook:
from openpyxl import load_workbook
wb = load_workbook(filename = 'empty_book.xlsx')
sheet_ranges = wb['range names']
print(sheet_ranges['D18'].value)
This is an example on how to use the load_workbook method, so you don't need to use that with statement. Just use assignment.
def add_to_excel(list_to_save, file_to_save_in):
my_file = dir_path + '\\' + file_to_save_in
links_excel = openpyxl.load_workbook(filename=my_file)
sheet = links_excel['Sheet1']
for i in list_to_save:
sheet.append(i)
links_excel.save(filename)
links_excel.close()
return

Last filled cell in excel sheet by Python win32

I am trying to find the last cell in an excel worksheet by Python win32 module, but somehow it doesn't seem to recognize End(xlDown).
Program line-
LastRow = WBName.Sheets("Sheet1").Cells(2,2).End(xlDown).Row
The error-
NameError: name 'xlDown' is not defined
Thanks in advance for any help.
win32c = win32.constants
XlApp.Visible = 1
wb = XlApp.Workbooks.Open(Input)
ws = wb.ActiveSheet
excel.DisplayAlerts = False
lr = XlApp.Cells(ws.Rows.Count, "A").End(win32c.xlShiftUp).Row
print(lr)
or
def LastRow(WS,COL):
return WS.Cells(WS.Rows.Count, COL).End(win32c.xlShiftUp).Row
print(LastRow(ws,"a"))

Python: Is there way how to write in to Excel Cell and export PDF in 1 loop

I do have sticky situation with my project. I am trying to update Excel Sheet and export it to PDF in one loop.
At moment I bevies’ best for this is openpyxl library.
Issue is that both are functions writing and printing are opening Excel different way.. using:
book = openpyxl.load_workbook(excel_file) and
wb = excel.Workbooks.Open(excel_file).
Both functions are crossing each other and creating permission issues (at least it is looking like it) plus crashing Jupyter :).
PLEASE is there any elegant way how to do this or I really need 2 loops?
Error call example:
PermissionError: [Errno 13] Permission denied: 'C:/Users/admin/test_files/dir#$$.xlsx'
Code is looking like this:
def update_directory():
excel_file = r'C:/Users/admin/test_files/doo.xlsx'
excel = client.DispatchEx("Excel.Application")
excel.Visible = 0
folder_selected = filedialog.askdirectory()
os.chdir(folder_selected)
for root, dirs, files in os.walk(".", topdown=False):
for name in dirs:
a_pth = os.getcwd()
pdf_file = os.path.join(a_pth,name," ")+"Dic_"+"%s.pdf" % name
book = openpyxl.load_workbook(excel_file)
sheet= book['Sheet1']
sheet.cell(row=4, column=6).value = name
book.save(excel_file)
wb = excel.Workbooks.Open(excel_file)
ws = wb.Worksheets[1]
ws.SaveAs(pdf_file, FileFormat=57)
wb.Close() # <- need to be part of loop (comment from Amiga500). File save
# prompt from Excell present.
excel.Exit()
Having an entry
wb.application.displayalerts = False
Inserted just before the
wb.Close()
line seems to have worked for me, so the code snippet would resemble
book = openpyxl.load_workbook(excel_file)
sheet= book['Sheet1']
sheet.cell(row=4, column=6).value = name
book.save(excel_file)
wb = excel.Workbooks.Open(excel_file)
ws = wb.Worksheets[1]
ws.SaveAs(pdf_file, FileFormat=57)
wb.application.displayalerts = False #This stops the popup asking for a save
wb.Close() # <- need to be part of loop (comment from Amiga500). File save
# prompt from Excell present.
Note wb.Close() is at same indentation as the rest of inner for loop.

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories