Using watchdog to put newly created file names into variables - python

I would like to use watchdog to find new files that are created in one folder. The file name will then be used in a different function. I am using this code here:
import sys
import time
import logging
from watchdog.observers import Observer
from watchdog.events import LoggingEventHandler
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
format=' %(message)s')
path = sys.argv[1] if len(sys.argv) > 1 else '.'
event_handler = LoggingEventHandler()
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
This gives the output for example in the console:
Created file: ./test_file.h5
Modified directory: .
Modified file: ./test_file.h5
Modified directory: .
What I would like is only when new files are created, that the name of them be returned and I don't need it returned in the console but just rather returned in a variable so I can use it as an input for a different function. Is there a way to do this?

You need to create custom handler, it can be done by inheriting FileSystemEventHandler and overriding event you want to use.
class CustomHandler(FileSystemEventHandler):
def __init__(self, callback: Callable):
self.callback = callback
def on_created(self, event: Union[DirCreatedEvent, FileCreatedEvent]):
print(f"Event type: {event.event_type}\nAt: {event.src_path}\n")
if isinstance(event, FileCreatedEvent):
file = pathlib.Path(event.src_path)
print(f"Processing file {file.name}\n")
self.callback(file)
Available events are:
on_modified(self, event)
on_deleted(self, event)
on_closed(self, event)
on_moved(self, event)
on_any_event(self, event)
Each event could vary, for on_created - it would be safe to assume it's only DirCreatedEvent and FileCreatedEvent.
--
Example code
import time
import pathlib
from typing import Union
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, DirCreatedEvent, FileCreatedEvent
class CustomHandler(FileSystemEventHandler):
"""Custom handler for Watchdog"""
def __init__(self):
# List to store path
self.path_strings = []
# callback for File/Directory created event, called by Observer.
def on_created(self, event: Union[DirCreatedEvent, FileCreatedEvent]):
print(f"Event type: {event.event_type}\nAt: {event.src_path}")
# check if it's File creation, not Directory creation
if isinstance(event, FileCreatedEvent):
# if so, do something with event.src_path - it's path of the created file.
self.path_strings.append(event.src_path)
print(f"Path content: \n{self.path_strings}")
def main():
# get current path as absolute, linux-style path.
working_path = pathlib.Path(".").absolute().as_posix()
# create instance of observer and CustomHandler
observer = Observer()
handler = CustomHandler()
# start observer, checks files recursively
observer.schedule(handler, path=working_path, recursive=True)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
if __name__ == '__main__':
main()
Example output
Event type: created
At: E:/github/ProjectIncubator/single_run_scripts\test.a
Path content:
['E:/github/ProjectIncubator/single_run_scripts\\test.a']
Event type: created
At: E:/github/ProjectIncubator/single_run_scripts\nyan.txt
Path content:
['E:/github/ProjectIncubator/single_run_scripts\\test.a', 'E:/github/ProjectIncubator/single_run_scripts\\nyan.txt']
Old
Full Example Code
import time
import pathlib
import argparse
from typing import Union, Callable
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, DirCreatedEvent, FileCreatedEvent
class CustomHandler(FileSystemEventHandler):
def __init__(self, callback: Callable):
self.callback = callback
# Store callback to be called on every on_created event
def on_created(self, event: Union[DirCreatedEvent, FileCreatedEvent]):
print(f"Event type: {event.event_type}\nAt: {event.src_path}\n")
# check if it's File creation, not Directory creation
if isinstance(event, FileCreatedEvent):
file = pathlib.Path(event.src_path)
print(f"Processing file {file.name}\n")
# call callback
self.callback(file)
def main():
path: pathlib.Path = args.dir
# list for new files
created_files = []
# create callback
def callback(path_: pathlib.Path):
print(f"Adding {path_.name} to list!")
created_files.append(path_)
# create instance of observer and CustomHandler
observer = Observer()
handler = CustomHandler(callback)
observer.schedule(handler, path=path.absolute().as_posix(), recursive=True)
observer.start()
print("Observer started")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
print(f"{len(created_files)} new files was created!", "\n".join(p.name for p in created_files), sep="\n")
input("Press enter to exit!")
if __name__ == '__main__':
# get script's root
ROOT = pathlib.Path(__file__).parent
# parse argument - if provided given directory is used, otherwise
parser = argparse.ArgumentParser(description="Listen for file change in directory.")
parser.add_argument("dir", metavar="DIR", type=pathlib.Path, default=ROOT, nargs="?", help="Directory to listen for. If omitted, script path is used.")
args = parser.parse_args()
main()
Example output (Line breaks are mess, sorry!)
❯ py .\watchdog_test.py X:\test
Observer started
Event type: created
At: X:/test\새 폴더
Event type: created
At: X:/test\meow.txt
Processing file meow.txt
Adding meow.txt to list!
Event type: created
At: X:/test\meow.txt
Processing file meow.txt
Adding meow.txt to list!
Event type: created
At: X:/test\meow - 복사본.txt
Processing file meow - 복사본.txt
Adding meow - 복사본.txt to list!
Event type: created
At: X:/test\meow - 복사본 (2).txt
Processing file meow - 복사본 (2).txt
Adding meow - 복사본 (2).txt to list!
Event type: created
At: X:/test\meow - 복사본 (3).txt
Processing file meow - 복사본 (3).txt
Adding meow - 복사본 (3).txt to list!
5 new files was created!
meow.txt
meow.txt
meow - 복사본.txt
meow - 복사본 (2).txt
meow - 복사본 (3).txt
Press enter to exit!

Related

Python Watchdog with Slurm Output

I'm trying to use python-watchdog to monitor output of SLURM jobs on a supercomputer. For some reason, the watchdog program isn't detecting changes in the files, even if a tail -f shows that the file is indeed being changed. Here's my watchdog program:
import logging
import socket
import sys
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
filename="/work/ollie/pgierz/PISM/pindex_vostok_ds50/scripts/pindex_vostok_ds50.watchdog")
def on_created(event):
logging.info(f"hey, {event.src_path} has been created!")
def on_deleted(event):
logging.info(f"what the f**k! Someone deleted {event.src_path}!")
def on_modified(event):
logging.info(f"hey buddy, {event.src_path} has been modified")
def on_moved(event):
logging.info(f"ok ok ok, someone moved {event.src_path} to {event.dest_path}")
if __name__ == "__main__":
if "ollie" in socket.gethostname():
logging.info("Not watching on login node...")
sys.exit()
# Only do this on compute node:
patterns = "*"
ignore_patterns = "*.watchdog"
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(
patterns, ignore_patterns, ignore_directories, case_sensitive
)
my_event_handler.on_created = on_created
my_event_handler.on_deleted = on_deleted
my_event_handler.on_modified = on_modified
my_event_handler.on_moved = on_moved
path = "/work/ollie/pgierz/PISM/pindex_vostok_ds50/scripts"
#path = "/work/ollie/pgierz/PISM/pindex_vostok_ds30/"
go_recursively = True
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)
my_observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
my_observer.stop()
my_observer.join()
This is just a suspicion, but could it be that the filesystem doesn't actually register the file as being "changed" since it is still open from the batch job? Doing an ls -l or stat on the output files shows it was "modified" when the job started. Do I need to tell slurm to "flush" the file?

Python Watchdog process existing files on startup

I have a simple Watchdog and Queue process to monitor files in a directory.
Code taken from https://camcairns.github.io/python/2017/09/06/python_watchdog_jobs_queue.html
import time
from watchdog.events import PatternMatchingEventHandler
from watchdog.observers import Observer
from queue import Queue
from threading import Thread
dir_path = "/data"
def process_queue(q):
while True:
if not q.empty():
event = q.get()
print("New event %s" % event)
time.sleep(5)
class FileWatchdog(PatternMatchingEventHandler):
def __init__(self, queue, patterns):
PatternMatchingEventHandler.__init__(self, patterns=patterns)
self.queue = queue
def process(self, event):
self.queue.put(event)
def on_created(self, event):
self.process(event)
if __name__ == '__main__':
watchdog_queue = Queue()
worker = Thread(target=process_queue, args=(watchdog_queue,))
worker.setDaemon(True)
worker.start()
event_handler = FileWatchdog(watchdog_queue, patterns="*.ini")
observer = Observer()
observer.schedule(event_handler, path=dir_path)
observer.start()
try:
while True:
time.sleep(2)
except KeyboardInterrupt:
observer.stop()
observer.join()
Once the process is running new files are processed correctly.
However if I restart the process and a file already exists in the directory it is ignored.
I have tried to create a dict to add to the queue
for file in os.listdir(dir_path):
if file.endswith(".ini"):
file_path = os.path.join(dir_path, file)
event = {'event_type' : 'on_created', 'is_directory' : 'False', 'src_path' : file_path}
watchdog_queue.put(event)
but it's expecting an object of type (class 'watchdog.events.FileCreatedEvent') and I can't work out how to create this.
Alternatively I can see in the Watchdog documentation (class watchdog.utils.dirsnapshot.DirectorySnapshot) but I cannot work out how to run this and add it to the queue.
Any suggestions on how I can add existing files to the queue on startup ?
This code should do what you are trying to achieve.
from watchdog.events import FileCreatedEvent
# Loop to get all files; dir_path is your lookup folder.
for file in os.listdir(dir_path):
filename = os.path.join(dir_path, file)
event = FileCreatedEvent(filename)
watchdog_queue.put(event)
I stumbled over the same problem and maybe this solution is for you too. At least on linux this works like a charm.
Add the "on_modified" method
class FileWatchdog(PatternMatchingEventHandler):
def __init__(self, queue, patterns):
PatternMatchingEventHandler.__init__(self, patterns=patterns)
self.queue = queue
...
def on_modified(self, event):
self.process(event)
Now after starting the observer, loop through all files in directory and "touch" them, so they will be "modified".
# Loop to get all files; dir_path is your lookup folder.
for file in os.listdir(dir_path):
filename = os.path.join(dir_path, file)
os.popen(f'touch {filename}')
No need to add special filters as your FileHandler will handle that.

Monitor big text file for change and show what was added

I try to monitor text log file for changes and want to see lines that was added
I try with watchdog it seems to work with some manually created/edited files for testing, but my log file (in the same directory) isn't detected as "changed", but it was changed. It's probably because this file is already open (maybe in a specific mode). If I close the app that uses this log file and change it manually then watchdog works ok.
How do I check if the file was changed and if the changes were written to
the console?
#!/usr/bin/python
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class MyHandler(FileSystemEventHandler):
def on_modified(self, event):
print(f'event type: {event.event_type} path : {event.src_path}')
if __name__ == "__main__":
event_handler = MyHandler()
observer = Observer()
observer.schedule(event_handler, path='C:\gory\parcienaszklo\logs', recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()

Detect the changes in a file and write them in a file

I produced a script that detects the changes on files that are located in a specific directory. I'm trying to write all these changes to a changes.txt file. For this purpose I'm using the sys.stdout = open('changes.txt','w') instruction.
The problem is that whenever I run the script and change a file in the directory and save it, an empty file called changes.txt is created. This file is never written!
#!/usr/bin/python
import time
import sys
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
sys.stdout = open('changes.txt','w')
class MyHandler(FileSystemEventHandler):
def on_modified(self, event):
print "something happened!"
if __name__ == "__main__":
event_handler = MyHandler()
observer = Observer()
observer.schedule(event_handler, path='.', recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
I'd recommend something like
#!/usr/bin/python
import time
import sys
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class MyHandler(FileSystemEventHandler):
def __init__(self, f):
self.f = f
def on_modified(self, event):
self.f.write("something happened!\n")
self.f.flush()
if __name__ == "__main__":
with open('changes.txt','w') as f:
event_handler = MyHandler(f)
observer = Observer()
observer.schedule(event_handler, path='.', recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
as you can see, the control over where your outputwill be written to has been handed to the caller (the one instanciating MyHandler) instead of the callee (on_modified).
This means you can also do
event_handler = MyHandler(sys.stdout)
and see the output instead of the output being put into the file.
An additional benefit: using a context manager you can be sure the file is closed properly, even if errors occurr.

Getting the filename using python Watchdog

I am trying to get the name of a file that changes periodically.
I am using watchdog to do this.
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
timestr = time.strftime("%Y.%m.%d-%H.%M.%S")
class MyHandler(FileSystemEventHandler):
def on_modified(self, event):
change_log = open('change_log_' + timestr + '.txt', 'aw')
change_log.write('Time the file changed: ' + timestr + '\n')
change_log.close()
if __name__ == "__main__":
event_handler = MyHandler()
observer = Observer()
observer.schedule(event_handler, path='.', recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
For some reason this prints out about 62 lines in the "change_log" file. This is not very useful.
What I would like to do is to print the name of the file that changed, or store it in a variable to pass to my other module.
In your example, if you need a file name, it is necessary to replace 'change_log_' with event.src_path. See the official code for more details.
You can also see the use of event.src_path in this answer as I used it in the printout.
It looks like the event object that is sent to your handler includes the information that you seek:
http://pythonhosted.org/watchdog/api.html#watchdog.events.FileSystemEvent
Use the src_path property of the event object that's passed into your FileSystemEvent subclass handler method to get the filename.

Categories