python watchdog observe certain files but all directories - python

I need to observe files of type, say .tsv but also need to observe all events on directories.
Currently, with the patterns argument, this is ignoring directories.
I want it to observe every event on directories.
Here's my code:
import time, sys
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
from datetime import datetime
import os
class MyHandler(PatternMatchingEventHandler):
patterns = ["*.tsv"]
ignore_patterns = []
ignore_directories = False
case_sensitive = True
def process(self, event):
log_file = open('log.txt', 'a')
path = os.path.join(event.src_path)
line = path + "\t" + event.event_type + "\t" + str(datetime.now()) + "\n"
log_file.write(line)
log_file.close()
def on_created(self, event):
self.process(event)
if __name__ == '__main__':
args = sys.argv[1:]
observer = Observer()
observer.schedule(MyHandler(), path=args[0] if args else '.', recursive=True)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
How can it be done?

Yes. Instead of using the pattern just configure the directory and when the event is triggered what you do is check the src_path to see if it is a .tsv file then you can implement whatever logic you need at this point...

Related

Python Script won't run against all files, and won't end when I hit Ctrl-C

I'm new to Python, and I'm having a problem with executing some code with Watchdog. The code is supposed to copy files to their respective folders when they're modified or created. It will work against one file, but then it quits if there are more files matching. I also can't stop the program with Ctrl-C for some reason. Full code below:
import os
import os.path
import shutil
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from watchdog.events import PatternMatchingEventHandler
sourcepath='C:/Users/bhart/Downloads/'
sourcefiles = os.listdir(sourcepath)
destinationpath = 'C:/Users/bhart/Downloads/xls'
destinationpathcsv = 'C:/Users/bhart/Downloads/csv'
destinationpathtxt = 'C:/Users/bhart/Downloads/txt'
destinationpathpdf = 'C:/Users/bhart/Downloads/pdf'
path = sourcepath
event_handler = FileSystemEventHandler()
def on_created(event):
for file in sourcefiles:
if os.path.exists(file):
if file.endswith('.xls') or file.endswith('.xlsx'):
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpath,file))
if file.endswith('.csv'):
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpathcsv,file))
print("CSV file moved.")
if file.endswith('.txt'):
print("TXT file moved")
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpathtxt,file))
if file.endswith('.pdf'):
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpathpdf,file))
def on_modified(event):
for file in sourcefiles:
if os.path.exists(file):
if file.endswith('.xls') or file.endswith('.xlsx'):
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpath,file))
if file.endswith('.csv'):
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpathcsv,file))
if file.endswith('.txt'):
print("TXT file moved")
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpathtxt,file))
if file.endswith('.pdf'):
shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpathpdf,file))
if __name__ == "__main__":
event_handler.on_modified = on_modified
observer = Observer()
observer.start()
observer.schedule(event_handler, path, recursive=True)
observer.join()
event_handler.on_created = on_created
observer = Observer()
observer.start()
observer.schedule(event_handler, path, recursive=True)
observer.join()
try:
print("test")
except KeyboardInterrupt:
exit()
I don't know if I resolve all problems but:
listdir() gives filenames without directory and you have to use os.path.join() even when you check os.path.exists()
if os.path.exists( os.path.join(sourcepath, file) ):
listdir() gives filenames only once and you have to use it inside for-loop to get new filenames.
def on_created(event):
sourcefilenames = os.listdir(sourcepath)
for filename in sourcefilenames:
src = os.path.join(sourcepath, filename)
if os.path.exists(src):
# ... code ...
def on_modified(event):
sourcefilenames = os.listdir(sourcepath)
for filename in sourcefilenames:
src = os.path.join(sourcepath, filename)
if os.path.exists(src):
# ... code ...
.join() blocks code and wait until you close program so it create first Observer and wait for its end before it create second Observer - but you could do all with one Observer
It seems you have the same code in on_created and on_modified so you could use one function for both situations
def move_it(event):
sourcefilenames = os.listdir(sourcepath)
for filename in sourcefilenames:
src = os.path.join(sourcepath, filename)
if os.path.exists(src):
# ... code ...
if __name__ == "__main__":
event_handler = FileSystemEventHandler()
event_handler.on_modified = move_it
event_handler.on_created = move_it
observer = Observer()
observer.start()
observer.schedule(event_handler, sourcepath, recursive=True)
observer.join()
If you want to catch Ctrl+C then you should put all code in try/except (or at least put join() inside try/except).
I don't know what problem you have with Ctrl+C but it works for me on Linux.
if __name__ == "__main__":
try:
event_handler = FileSystemEventHandler()
event_handler.on_modified = move_it
event_handler.on_created = move_it
observer = Observer()
observer.start()
observer.schedule(event_handler, sourcepath, recursive=True)
observer.join()
except KeyboardInterrupt:
print('Stopped by Ctrl+C')
One suggestion:
Code can be much simpler and more universal if you will use dictionary
{
".xls": "C:/.../xls",
".xlsx": "C:/.../xls",
# ...
}
This way you can use for-loop to check all extensions. And you can always add new extension to dictionary without changing code in functions.
import os
import shutil
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
sourcepath = 'C:/Users/bhart/Downloads/'
destinationpath = {
'.xls' : 'C:/Users/bhart/Downloads/xls',
'.xlsx': 'C:/Users/bhart/Downloads/xls',
'.csv' : 'C:/Users/bhart/Downloads/csv',
'.txt' : 'C:/Users/bhart/Downloads/txt',
'.pdf' : 'C:/Users/bhart/Downloads/pdf',
}
def move_it(event):
sourcefilenames = os.listdir(sourcepath)
for filename in sourcefilenames:
src = os.path.join(sourcepath, filename)
if os.path.exists(src):
for ext, dst in destinationpath.items():
if filename.lower().endswith(ext):
print('move:', filename, '->', dst)
shutil.move(src, os.path.join(dst, filename))
if __name__ == "__main__":
try:
event_handler = FileSystemEventHandler()
event_handler.on_modified = move_it
event_handler.on_created = move_it
observer = Observer()
observer.start()
observer.schedule(event_handler, sourcepath, recursive=True)
observer.join()
except KeyboardInterrupt:
print('Stopped by Ctrl+C')
EDIT:
event gives event.src_path, event.event_type, ect. and you could use it instead of listdir() to get path to file.
import os
import shutil
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
sourcepath = 'C:/Users/bhart/Downloads/'
destinationpath = {
'.xls' : 'C:/Users/bhart/Downloads/xls',
'.xlsx': 'C:/Users/bhart/Downloads/xls',
'.csv' : 'C:/Users/bhart/Downloads/csv',
'.txt' : 'C:/Users/bhart/Downloads/txt',
'.pdf' : 'C:/Users/bhart/Downloads/pdf',
}
def move_it(event):
#print(dir(event))
#print('event:', event)
#print('event_type:', event.event_type)
#print('is_directory:', event.is_directory)
#print('src_path:', event.src_path)
#print('key:', event.key)
#print('----')
if not event.is_directory:
parts = os.path.split(event.src_path)
#print('parts:', parts)
filename = parts[-1]
for ext, dst in destinationpath.items():
if filename.lower().endswith(ext):
shutil.move(event.src_path, os.path.join(dst, filename))
print('move:', filename, '->', dst)
if __name__ == "__main__":
try:
event_handler = FileSystemEventHandler()
event_handler.on_modified = move_it
event_handler.on_created = move_it
#event_handler.on_moved = move_it # ie. rename (but this need to check `dest_path`)
observer = Observer()
observer.start()
observer.schedule(event_handler, sourcepath, recursive=True)
observer.join()
except KeyboardInterrupt:
print('Stopped by Ctrl+C')

Python - Exit script from outside command

I have a proof of concept script based on the watchdog module, it registers when a new file is added to a set folder and sends off a command, this script runs constantly, but the final design is to be put on a server, meaning we will not have access to the command line to "CTRL + C" it. How do I kill it from an outside source (e.g. second script that activates a function within the primary script)?
Here is my current script, which contains a "stop_watchdog" function at the bottom.
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import os, sys, time
import sqlite3
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if __name__ == "__main__":
patterns = ["*"]
ignore_patterns = None
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
def file_detected(textInput):
str(textInput)
if ".txt" not in textInput:
conn = sqlite3.connect(textInput) # You can create a new database by changing the name within the quotes
c = conn.cursor() # The database will be saved in the location where your 'py' file is saved
c.execute("SELECT * FROM sqlite_master where type = 'table'")
##print(c.fetchall())
textTest = "{}.txt".format(textInput)
f = open(textTest, "w")
f.write(str(c.fetchall()))
f.close()
def on_created(event):
print(f"hey, {event.src_path} has been created!")
file_detected(event.src_path)
##test("{event.src_path}", shell=True)
my_event_handler.on_created = on_created
path = "./xyz"
go_recursively = True
file_observer = Observer()
file_observer.schedule(my_event_handler, path, recursive=go_recursively)
file_observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
file_observer.stop()
file_observer.join()
def stop_watchdog():
print(f"Quitting!")
file_observer.stop()
sys.exit()

Python - add multiple files to folder but run event only once

I'm trying to make a watchdog to listen to a folder changes (adding/deleting) files.
My problem is, that every time I copy-create/delete several files from this folder (and its subfolders), the event chain starts one by one for each and every file.
How can I make the on_event() method to be invoked only once, after multiple files creation/deletion?
Let's say I'm copying to this folders two images.
I want the event handler to be invoked only once after file transfer finishes, and not twice - once for each image - as it currently works.
Thanks!
The code runs on a raspberry pi 3 with python 3.7.
Here's the code:
import os
import time
import psutil
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
i = 0
def show_stats():
global i
read = "read #" + str(i) + ":"
mem = "\nmemory in use: " + str(psutil.virtual_memory().percent)+"%"
cpu = "\ncpu load: " + str(psutil.cpu_percent())+"%"
temp = "\ncurrent " + \
os.popen("vcgencmd measure_temp").readline().replace(
"=", ": ").replace("'C", " C°")
end = "\n=================="
i += 1
stats = read + mem + cpu + temp + end
return stats
class Watcher:
DIRECTORY_TO_WATCH = r'/home/pi/Desktop/jsSlider/images'
def __init__(self):
self.observer = Observer()
print("watching ", self.DIRECTORY_TO_WATCH, "...")
def run(self):
event_handler = Handler()
self.observer.schedule(
event_handler, self.DIRECTORY_TO_WATCH, recursive=True)
self.observer.start()
try:
while True:
time.sleep(5)
print(show_stats())
except Exception as e:
self.observer.stop()
print(e)
self.observer.join()
class Handler(FileSystemEventHandler):
#staticmethod
def on_event(event):
wait = 1
elif event.event_type == 'created' or event.event_type == 'deleted':
print("Received event - %s. " %event.src_path, str(event.event_type))
time.sleep(wait) #i found that its best to give some timeout between commands because it overwhelmed the pi for some reason (one second seems to be enough)...
os.system('python /home/pi/Desktop/Slider/scripts/arr_edit.py') #recreate the JS array
time.sleep(wait)
os.system('cp -r /home/pi/Desktop/jsSlider/scripts/imgArr.js /home/pi/Desktop/jsSlider/themes/1') #copy the newly created JS array to its place
time.sleep(wait)
os.system('sudo pkill chromium') #"refresh" the page -the kiosk mode reactivates the process...
# os.system('cls')
print('done!')
if __name__ == '__main__':
w = Watcher()
w.run()
Edit I
There is a poor rpi3 connected to a tv in some clinic, working in kiosk mode to display images from a local html file (with some js code - the slide show run with an existing JS script - i can upload everything if requested | the images are also on the pi itself).
What I'm trying to achieve is to automatically:
rebulid the JS array (with a working python script - code below (arr_edit.py)).
copy the new array to its desired location. (shell command)
and restart chromium with "pkill chromium". (shell command)
Now, I cannot allow that every time someone copies/deletes multiple images, the commands will run each time - which means:
whenever 2+ images are being added, i cannot "restart" the kiosk
(sudo pkill chromium) each and every time a file is created.
Every time you copy multiple files (images in that case), for each individual image that was created in the folder, an entirely individual event.created is invoked, therefore for 5 images, there will be 5 different event.created events that will fire the on_event() method each on its own turn, making the kiosk restart 5 times in a row. (now think of what will happen if a 50 files transfer occurs - the pi will just crash)
Therefore, I need a method to invoke the command only 1 time after file transfer finishes, regardless of how many files has changed/created/deleted in the folder.
arr_edit.py (not entirely my code):
import os
dir_path = r'/home/pi/Desktop/jsSlider/images'
file_path = r'/home/pi/Desktop/jsSlider/scripts/imgArr.js'
directory = os.fsencode(dir_path)
arr_name = 'images=[\n'
start_str = '{"img":"./images/'
end_str = '"},\n'
images = ''
def writer(array, imagesList):
str_to_write = array + imagesList + ']'
f = open(file_path, 'w')
f.write(str_to_write)
f.close
file_list = os.listdir(directory)
for file in file_list:
filename = os.fsdecode(file)
if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".webp") or filename.endswith(".webp"):
if file == file_list[len(file_list)-1]:
end_str = '"}\n'
images += start_str + filename + end_str
continue
else:
continue
writer(arr_name, images)
output JS array (sample from inside imgArr.js):
images=[
{"img":"./images/246.jpg"},
{"img":"./images/128.jpg"},
{"img":"./images/238.webp"},
{"img":"./images/198.jpg"},
{"img":"./images/247.webp"}
]
As Mark suggested in the comments,
i added a check to see if the js file has changed in the past 5 minutes.
if the file changed,
wait for another 5 minutes and re-initiate the cange (if more files have been added to the folder) so the new, larger files will also be shown in this run.
Works like a charm!
many thanks!!
here's the final watchdog.py
import os
import time
import psutil
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
i = 0
def show_stats():
global i
read = "read #" + str(i) + ":"
mem = "\nmemory in use: " + str(psutil.virtual_memory().percent)+"%"
cpu = "\ncpu load: " + str(psutil.cpu_percent())+"%"
temp = "\ncurrent " + \
os.popen("vcgencmd measure_temp").readline().replace(
"=", ": ").replace("'C", " C°")
end = "\n=================="
i += 1
stats = read + mem + cpu + temp + end
return stats
def wait_for_file(file):
time.sleep(300)
if age(file) >= 5:
modify()
def modify():
os.system('python /home/pi/Desktop/jsSlider/scripts/arr_edit.py')
os.system(
'cp -r /home/pi/Desktop/jsSlider/scripts/imgArr.js /home/pi/Desktop/jsSlider/themes/1')
time.sleep(1)
os.system('sudo pkill chromium')
# os.system('cls')
print("done!\nwatching...")
def age(filename):
return ((time.time() - os.path.getmtime(filename))//60)
class Watcher:
DIRECTORY_TO_WATCH = r'/home/pi/Desktop/jsSlider/images'
def __init__(self):
self.observer = Observer()
print("watching ", self.DIRECTORY_TO_WATCH, "...")
def run(self):
event_handler = Handler()
self.observer.schedule(
event_handler, self.DIRECTORY_TO_WATCH, recursive=True)
self.observer.start()
try:
while True:
time.sleep(5)
print(show_stats())
except Exception as e:
self.observer.stop()
print(e)
self.observer.join()
class Handler(FileSystemEventHandler):
# staticmethod
def on_any_event(event):
file = r'/home/pi/Desktop/jsSlider/scripts/imgArr.js'
if event.event_type == 'created' or event.event_type == 'deleted':
print("Received event - %s. " %
event.src_path, str(event.event_type))
time.sleep(5)
if age(file) < 5:
wait_for_file(file)
else:
modify()
if __name__ == '__main__':
w = Watcher()
w.run()

FileNotFoundError: [WinError 3] The system cannot find the path specified. Windows OS

I am try to deploy and run my automation program on my windows laptop. But I ran into an error. I don't know how to figure out the problem. I tried searching on the internet, but I didn't find anything. I am 14 years old and I am a beginner in Python. It is a project about automatically moving files and folders so my laptop can be organized.
$ C:/Users/siddt/python.exe c:/Users/siddt/NodeJs/auto.py
Traceback (most recent call last):
File "c:/Users/siddt/NodeJs/auto.py", line 32, in <module>
observer.start()
File "C:\Users\siddt\lib\site-packages\watchdog\observers\api.py", line 260, in start
emitter.start()
File "C:\Users\siddt\lib\site-packages\watchdog\utils\__init__.py", line 110, in start
self.on_thread_start()
File "C:\Users\siddt\lib\site-packages\watchdog\observers\read_directory_changes.py", line 66, in on_thread_start
self._handle = get_directory_handle(self.watch.path)
File "C:\Users\siddt\lib\site-packages\watchdog\observers\winapi.py", line 307, in get_directory_handle
return CreateFileW(path, FILE_LIST_DIRECTORY, WATCHDOG_FILE_SHARE_FLAGS,
File "C:\Users\siddt\lib\site-packages\watchdog\observers\winapi.py", line 113, in _errcheck_handle
raise ctypes.WinError()
FileNotFoundError: [WinError 3] The system cannot find the path specified.
Here is the code to my program.
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import os
import json
import time
class myHandler(FileSystemEventHandler):
i = 1
def on_modified(self, event):
new_name = "new_file_" + str(self.i) + ".txt"
for filename in os.listdir(folder_to_track):
file_exists = os.path.isfile(folder_destination + "/" + new_name)
while file_exists:
self.i += 1
new_name = "new_file_" + str(self.i) + ".txt"
file_exists = os.path.isfile(
folder_destination + "/" + new_name)
src = folder_to_track + "/" + filename
new_destination = folder_destination + "/" + new_name
os.rename(src, new_destination)
folder_to_track = '/Users/Desktop/myFolder'
folder_destination = '/Users/Desktop/newFolder'
event_handler = myHandler()
observer = Observer()
observer.schedule(event_handler, folder_to_track, recursive=True)
observer.start()
try:
while True:
time.sleep(10)
except KeyboardInterrupt:
observer.stop()
observer.join()
I would really appreciate some help.
Just try this:
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, PatternMatchingEventHandler
import os
import time
from pathlib import Path
def on_modified(event):
for filename in os.listdir(folder_to_track):
print('Checking file {}....'.format(filename))
chk_name = folder_to_track + "/" + filename
if os.path.isfile(folder_destination + "/" + filename):
print('The file {} exists'.format(filename))
else:
newfile = os.path.join(folder_destination, os.path.basename(filename))
print (chk_name)
print (newfile)
Path(chk_name).rename(newfile)
print('File {} moved'.format(filename))
folder_to_track = '/YOUR START PATH'
folder_destination = '/YOUR DESTINATION PATH'
if __name__ == "__main__":
patterns = "*"
ignore_patterns = ""
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
my_event_handler.on_modified = on_modified
go_recursively = True
observer = Observer()
observer.schedule(my_event_handler, folder_to_track, recursive=go_recursively)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
For moving files I use Path from Pathlib (it works on python 3.4+)
You must replace the directory.
I hope it's useful!

Problems with multiprocess-watchdog script in Python 3

I have a watchdog-script to monitor a directory recursively. When an event happens I want to do something with the file.
This worked super fine, but some files are very big, so the treatment of this file blocked the watcher and some files were later missing because the watcher didn't recognize them due to the blocking.
So I thought, multiprocessing could help. My idea was, that the event (created, modified, ....) would start a new process and then execute the function.
I do now have a sample script that combines watchdog with multiprocessing, but I am having trouble getting it working.
import os
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
from multiprocessing import Process
def print_msg(text):
proc = os.getpid()
print("{0} über Prozess ID: {1}".format(text, proc))
def on_created(event):
text = "hey, {0} has been created!".format(event.src_path)
proc = Process(target=print_msg, args=(text))
proc.start()
proc.join()
def on_deleted(event):
text = "what the f**k! Someone deleted {0}!".format(event.src_path)
proc = Process(target=print_msg, args=(text))
proc.start()
proc.join()
def on_modified(event):
text = "hey buddy, {0} has been modified".format(event.src_path)
proc = Process(target=print_msg, args=(text))
proc.start()
proc.join()
def on_moved(event):
text = "ok ok ok, someone moved {0} to {1}".format(event.src_path, event.dest_path)
proc = Process(target=print_msg, args=(text))
proc.start()
proc.join()
if __name__ == "__main__":
patterns = "*"
ignore_patterns = ""
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
my_event_handler.on_created = on_created
my_event_handler.on_deleted = on_deleted
my_event_handler.on_modified = on_modified
my_event_handler.on_moved = on_moved
path = "\\\swibinacl01-cifs\\BelVis\\PROD\\Importer\\Messdaten"
go_recursively = True
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)
my_observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
my_observer.stop()
my_observer.join()
When I test the script I get following message just when the first event comes up:
my_event_handler.on_created = on_created
NameError: name 'my_event_handler' is not defined
So I think that after the event (or starting the new process) the my_event_handler object has gone and needs to be re-initialized.
But why is that? My thinking was, that when the event starts the function within a new process, the original process (the watcher) would continue and the new process acts independently.
What is my mistake? Can anyone help me out?

Categories