Delete file with python after downloading - python

I have this script which downloads an excel file from google drive, updates the external link then runs a desired macro. However, I would like it so that the file is deleted from the path after the macro has been run so that I don't need to update the script to change the file name every time.
I've included the script only from the run macro part.
def run_macro(workbook_name, com_instance):
wb = com_instance.workbooks.open(workbook_name)
com_instance.AskToUpdateLinks = False
try:
wb.UpdateLink(Name=wb.LinkSources())
except Exception as e:
print(e)
finally:
wb.Close(True)
wb = None
return True
def main():
dir_root = ("C:\\users\\ciara\\desktop\\test4.xlsm")
xl_app = Dispatch("Excel.Application")
xl_app.Visible = False
xl_app.DisplayAlerts = False
for root, dirs, files in os.walk(dir_root):
for fn in files:
if fn.endswith(".xlsx") and fn[0] is not "~":
run_macro(os.path.join(root, fn), xl_app)
xl_app.Quit()
xl = None
import unittest
import os.path
import win32com.client
class ExcelMacro(unittest.TestCase):
def test_excel_macro(self):
try:
xlApp = win32com.client.DispatchEx('Excel.Application')
xlsPath = os.path.expanduser('C:\\users\\ciara\\desktop\\test4.xlsm')
wb = xlApp.Workbooks.Open(Filename=xlsPath)
xlApp.Run('BridgeHit')
wb.Save()
xlApp.Quit()
print("Macro ran successfully!")
except:
print("Error found while running the excel macro!")
xlApp.Quit()
if __name__ == "__main__":
unittest.main()
if os.path.exists('C:\\users\\ciara\\desktop\\test4.xlsm'):
os.remove('C:\\users\\ciara\\desktop\\test4.xlsm')
else:
print('File does not exists')
The console displays does not display even 'File does not exists'.
If there is any other feedback on the code above I would appreciate it! I'm just starting to learn

Try adding an indent to your if-else block.

You should add the file removal if-else block to ExcelMacro class. Then only, before running (or after) a macro, it could remove the file if it exists.

Related

Using Python Click to read a JSON file

I am new to python and I am trying to read in a JSON file, that for now I can just write out to a new file without any changes. I have been attempting to use the python package Click to do this but keep running into errors.
I'm sure this is relatively basic but any help would be appreciated. The latest version of the code I've tried is below.
import json
import os
import click
def dazprops():
"""Read Daz3D property File"""
path = click.prompt('Please specify a location for the Daz3D Properties File')
path = os.path.realpath(path)
#dir_name = os.path.dirname(path)
print(path)
dazpropsf = open('path')
print(dazpropsf)
if __name__ == '__main__':
dazprops()
Something like this could give you an idea how to achieve that using click:
import click
def read_file(fin):
content = None
with open(fin, "r") as f_in:
content = f_in.read()
return content
def write_file(fout, content):
try:
print("Writing file...")
with open(fout, "w") as f_out:
f_out.write(content)
print(f"File created: {fout}")
except IOError as e:
print(f"Couldn't write a file at {fout}. Error: {e}")
#click.command()
#click.argument('fin', type=click.Path(exists=True))
#click.argument('fout', type=click.Path())
def init(fin, fout):
"""
FINT is an input filepath
FOUT is an output filepath
"""
content = read_file(fin)
if content:
write_file(fout, content)
if __name__ == "__main__":
init()

python reading header from word docx

I am trying to read a header from a word document using python-docx and watchdog.
What I am doing is, whenever a new file is created or modified the script reads the file and get the contents in the header, but I am getting an
docx.opc.exceptions.PackageNotFoundError: Package not found at 'Test6.docx'
error and I tried everything including opening it as a stream but nothing has worked, and yes the document is populated.
For reference, this is my code.
**main.py**
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import watchdog.observers
import watchdog.events
import os
import re
import xml.dom.minidom
import zipfile
from docx import Document
class Watcher:
DIRECTORY_TO_WATCH = "/path/to/my/directory"
def __init__(self):
self.observer = Observer()
def run(self):
event_handler = Handler()
self.observer.schedule(event_handler,path='C:/Users/abdsak11/OneDrive - Lärande', recursive=True)
self.observer.start()
try:
while True:
time.sleep(5)
except:
self.observer.stop()
print ("Error")
self.observer.join()
class Handler(FileSystemEventHandler):
#staticmethod
def on_any_event(event):
if event.is_directory:
return None
elif event.event_type == 'created':
# Take any action here when a file is first created.
path = event.src_path
extenstion = '.docx'
base = os.path.basename(path)
if extenstion in path:
print ("Received created event - %s." % event.src_path)
time.sleep(10)
print(base)
doc = Document(base)
print(doc)
section = doc.sections[0]
header = section.header
print (header)
elif event.event_type == 'modified':
# Taken any action here when a file is modified.
path = event.src_path
extenstion = '.docx'
base = os.path.basename(path)
if extenstion in base:
print ("Received modified event - %s." % event.src_path)
time.sleep(10)
print(base)
doc = Document(base)
print(doc)
section = doc.sections[0]
header = section.header
print (header)
if __name__ == '__main__':
w = Watcher()
w.run()
Edit:
Tried to change the extension from doc to docx and that worked but is there anyway to open docx because thats what i am finding.
another thing. When opening the ".doc" file and trying to read the header all i am getting is
<docx.document.Document object at 0x03195488>
<docx.section._Header object at 0x0319C088>
and what i am trying to do is to extract the text from the header
You are trying to print the object itself, however you should access its property:
...
doc = Document(base)
section = doc.sections[0]
header = section.header
print(header.paragraphs[0].text)
according to https://python-docx.readthedocs.io/en/latest/user/hdrftr.html)
UPDATE
As I played with python-docx package, it turned out that PackageNotFoundError is very generic as it can occur simply because file is not accessible by some reason - not exist, not found or due to permissions, as well as if file is empty or corrupted. For example, in case of watchdog, it may very well happen that after triggering "created" event and before creating Document file can be renamed, deleted, etc. And for some reason you make this situation more probable by waiting 10 seconds before creating Document? So, try checking if file exists before:
if not os.path.exists(base):
raise OSError('{}: file does not exist!'.format(base))
doc = Document(base)
UPDATE2
Note also, that this may happen when opening program creates some lock file based on file name, e.g. running your code on linux and opening the file with libreoffice causes
PackageNotFoundError: Package not found at '.~lock.xxx.docx#'
because this file is not docx file! So you should update your filtering condition with
if path.endswith(extenstion):
...

Invalid file path or buffer object type: <class 'list'> when trying to loop through files

I have the below code updated; in effort to allow my script to loop through multiple files in a directory (as opposed to one):
#classmethod
def find_file(cls):
all_files = list()
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Site '):
all_files.append(archive.extract(file.filename, config.UNZIP_LOCATION))
return all_files
Before declaring 'all files = list()' above in my find_files method, this was working on one file in the directory. I added the all_files in attempt to allow loop through all files in a directory.
Also, in the below main.py I just added the for right before PENDING_RECORDS for this objective.
"""Start Point"""
from data.find_pending_records import FindPendingRecords
from vital.vital_entry import VitalEntry
from time import sleep
if __name__ == "__main__":
try:
for PENDING_RECORDS in FindPendingRecords().get_excel_data():
# Do operations on PENDING_RECORDS
# Reads excel to map data from excel to vital
MAP_DATA = FindPendingRecords().get_mapping_data()
# Configures Driver
VITAL_ENTRY = VitalEntry()
# Start chrome and navigate to vital website
VITAL_ENTRY.instantiate_chrome()
# Begin processing Records
VITAL_ENTRY.process_records(PENDING_RECORDS, MAP_DATA)
print (PENDING_RECORDS)
print("All done")
except Exception as exc:
print(exc)
The addition of adding the all_files() and for now outputs the following error in the Anaconda Prompt:
(base) C:\Python>python main.py
Invalid file path or buffer object type: <class 'list'>
this is the config.py
FILE_LOCATION = r"C:\Zip\DATA Docs.zip"
UNZIP_LOCATION = r"C:\Zip\Pending"
VITAL_URL = 'http://horriblewebsite:8080/START'
HEADLESS = False
PROCESSORS = 4
MAPPING_DOC = ".//map/mappingDOC.xlsx"

Win32Com Save As Variable Name in Python 3.6

I'm trying to loop through .xlsb file types in a folder and convert them to .csv in Python 3+ and Windows 10 and have pieced together the code below with help from SO. I want to save the new .csv as the original .xlsb name but am having issues - I have this so far:
import os
import glob
import win32com.client
path = r'C:\Users\folder\Desktop\Test Binary'
all_files_test = glob.glob(os.path.join(path, "*.xlsb"))
for file in all_files_test:
excel = win32com.client.Dispatch("Excel.Application")
excel.Visible = False
doc = excel.Workbooks.Open(file)
doc.SaveAs(Filename="C:\\Users\\folder\\Desktop\\Test Binary\\file.csv",FileFormat = 6) #overwrites file each time, need to substitute 'file'
doc.Close(True)
excel.Quit()
excel.Quit()
Which of course just overwrites each new iteration each time as 'file.csv'. How can I substitute the .xlsb name for each .csv name to SaveAs separate files? Thanks in advance.
Simply use str.replace on file variable to change extension. And consider wrapping in try/except to cleanly release COM objects regardless of error or not.
path = r'C:\Users\folder\Desktop\Test Binary'
all_files_test = glob.glob(os.path.join(path, "*.xlsb"))
for file in all_files_test:
try:
excel = win32com.client.Dispatch("Excel.Application")
excel.Visible = False
doc = excel.Workbooks.Open(file)
csv_name = file.replace('.xlsb', '.csv')
doc.SaveAs(Filename = csv_name, FileFormat = 6)
doc.Close(True)
excel.Quit()
except Exception as e:
print(e)
finally:
doc = None
excel = None
And to go one level deeper use combination of os.path.basename and os.path.join:
path = r'C:\Users\folder\Desktop\Test Binary'
...
csv_name = os.path.basename(file).replace('.xlsb', '.csv')
doc.SaveAs(Filename = os.path.join(path, 'Conversion_Files', csv_name), FileFormat = 6)
Parfait's answer is good, but has a few flaws. I have remedied those (that I have noticed) in this answer, and refactored out some context managers to make the logic easier to understand (and hence easier to modify).
It now prints failed files to sys.stdout (to let you recover, Unix-style, by replacing the for loop with repeated input() / f.readline()[:-1] calls), and only opens the Excel COM object once; this should be a lot faster.
I have also added support for recursively performing this match, but this feature requires Python 3.5 or above in order to work.
import os
import glob
import traceback
from contextlib import contextmanager
import win32com.client
from pythoncom import com_error
PATH = r'C:\Users\folder\Desktop\Test Binary'
#contextmanager
def open_excel():
excel = win32com.client.Dispatch("Excel.Application")
excel.Visible = False
try:
yield excel
finally:
excel.Quit()
#contextmanager
def open_workbook(excel, filename):
doc = excel.Workbooks.Open(filename)
try:
yield doc
finally:
doc.Close(True)
all_files_test = glob.glob(os.path.join(PATH, "**.xlsb"), recursive=True)
with excel_cm() as excel:
for file in all_files_test:
try:
with open_workbook(file) as doc:
doc.SaveAs(Filename=file[:-4] + 'csv', FileFormat=6)
except com_error as e:
print(file)
traceback.print_exc()

check if a file is open in Python

In my app, I write to an excel file. After writing, the user is able to view the file by opening it. But if the user forgets to close the file before any further writing, a warning message should appear. So I need a way to check this file is open before the writing process. Could you supply me with some python code to do this task?
If all you care about is the current process, an easy way is to use the file object attribute "closed"
f = open('file.py')
if f.closed:
print 'file is closed'
This will not detect if the file is open by other processes!
source: http://docs.python.org/2.4/lib/bltin-file-objects.html
I assume that you're writing to the file, then closing it (so the user can open it in Excel), and then, before re-opening it for append/write operations, you want to check that the file isn't still open in Excel?
This is how you could do that:
while True: # repeat until the try statement succeeds
try:
myfile = open("myfile.csv", "r+") # or "a+", whatever you need
break # exit the loop
except IOError:
input("Could not open file! Please close Excel. Press Enter to retry.")
# restart the loop
with myfile:
do_stuff()
For windows only
None of the other provided examples would work for me when dealing with this specific issue with excel on windows 10. The only other option I could think of was to try and rename the file or directory containing the file temporarily, then rename it back.
import os
try:
os.rename('file.xls', 'tempfile.xls')
os.rename('tempfile.xls', 'file.xls')
except OSError:
print('File is still open.')
You could use with open("path") as file: so that it automatically closes, else if it's open in another process you can maybe try
as in Tims example you should use except IOError to not ignore any other problem with your code :)
try:
with open("path", "r") as file: # or just open
# Code here
except IOError:
# raise error or print
Using
try:
with open("path", "r") as file:#or just open
may cause some troubles when file is opened by some other processes (i.e. user opened it manually).
You can solve your poblem using win32com library.
Below code checks if any excel files are opened and if none of them matches the name of your particular one, openes a new one.
import win32com.client as win32
xl = win32.gencache.EnsureDispatch('Excel.Application')
my_workbook = "wb_name.xls"
xlPath="my_wb_path//" + my_workbook
if xl.Workbooks.Count > 0:
# if none of opened workbooks matches the name, openes my_workbook
if not any(i.Name == my_workbook for i in xl.Workbooks):
xl.Workbooks.Open(Filename=xlPath)
xl.Visible = True
#no workbooks found, opening
else:
xl.Workbooks.Open(Filename=xlPath)
xl.Visible = True
'xl.Visible = True is not necessary, used just for convenience'
Hope this will help
Try this method if the above methods corrupt your excel file.
This function attempts to rename the file with its own name. If the file has already been opened, the edit will be reject by the os and an OSError exception will be raised. It does not touch the inner code so it will not corrupt your excel files. LMK if it worked for you.
def check_file_status(self):
try:
os.rename("file1.xlsx", "file1.xlsx")
print("File is closed.")
except OSError:
print("File is opened.")
if myfile.closed == False:
print("File is still open ################")
Just use this function. It will close any already opened excel file
import os
def close():
try:
os.system('TASKKILL /F /IM excel.exe')
except Exception:
print("KU")
close()

Categories