Python watchdog for files from two different directories - python

I am trying to listen to filesystem changes using the watchdog module of Python. I want to monitor the files from two different directories. For a single file watch, I used PatternMatchingEventHandler from watchdog.events. I want to use the same for multiple directories.
code:
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class EventHandler(PatternMatchingEventHandler):
def on_modified(self, event):
super(EventHandler, self).on_modified(event)
print event
if __name__ == "__main__":
dir_name = ["/home/user1/first", "/home/user1/second"]
observer = Observer()
patterns = ["/home/user1/first/first.log","/home/user1/second/second.log")]
for i in xrange(len(dir_name)):
event_handler = EventHandler(patterns = patterns[i])
observer.schedule(event_handler, dir_name[i], recursive=True)
observer.start()
try:
while True:
time.sleep(0.1)
except KeyboardInterrupt:
observer.stop()
In the above code, I tried to do multiple directory observing format and create an event handler for each of the files. It's not working for me. Is there anything that I am missing here?? What is the way to do this??
Thanks.

some wrong in here
EventHandler(patterns = patterns[i])
arg patterns is a type of list, so you can use like this
patterns = [["/home/user1/first/first.log"], ["/home/user1/second/second.log"]]
EventHandler(patterns = patterns[i])

Though it does not use the watchdog library , this will the easy way just to check if the specific type of files are added or removed
if u want to check which files u can append them using any variable and store them in array
import os
import fnmatch
import threading
import time
initial_count = 0
flag = 0
files = []
path = ["/home/kirti/workspace/pythonproject6/img", "/home/kirti/workspace/pythonproject6/copy"]
def taskcount(path, flag, initial_count):
while 1:
time.sleep(3)
new_count = len(fnmatch.filter(os.listdir(path), "*.jpg"))
if new_count > initial_count:
if flag != 0:
print("Added \nCount :", new_count, "=", path)
else:
print(new_count)
if new_count < initial_count:
print("Removed \nCount :", new_count, "=", path)
initial_count = new_count
flag = 1
for j in range(len(path)):
t = threading.Thread(target=taskcount, args=(path[j], flag, initial_count))
t.start()

I am using python3, LINUX OS
With a minor modification as suggested above and some additions from myside too this is working now
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class EventHandler(PatternMatchingEventHandler):
def on_modified(self, event):
super(EventHandler, self).on_modified(event)
print(event)
if __name__ == "__main__":
dir_name = ["/home/don/test1", "/home/don/test2"]
observer = Observer()
threads=[]
patterns = [['*.log'],['*.ok']]
for i in range(len(dir_name)):
event_handler = EventHandler(patterns = patterns[i],ignore_directories=True,case_sensitive=False)
observer.schedule(event_handler, dir_name[i], recursive=True)
threads.append(observer)
observer.start()
try:
while True:
time.sleep(0.1)
except KeyboardInterrupt:
observer.stop()
observer.join()

Comparing with mine, you are lacking of observer.join() at the EOF. Try with that.
EDIT
Try this code below:
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class EventHandler(PatternMatchingEventHandler):
def on_modified(self, event):
super(EventHandler, self).on_modified(event)
print event
if __name__ == "__main__":
observer = Observer()
patterns = ["/home/user1/first/first.log","/home/user1/second/second.log"]
for pattern in patterns:
event_handler = EventHandler(patterns=pattern)
observer.schedule(event_handler, dir_name[i], recursive=True)
observer.start()
try:
while True:
time.sleep(0.1)
except KeyboardInterrupt:
observer.stop()
observer.join()

Related

Trying to get watchdog to trigger async code

I'm trying to get watchdog to trigger this test ascyncio snippet as well as fire a pyaudio sound. The sound works fine (fires when the directory is modified) but the asyncio sample runs when I start watchdog and not when the directory is modified. Any help appreciated.
Watchdog code:
import watchdog.events
import watchdog.observers
import time
from ib_insync import *
import pyaudio_01
import asynciotest
class Handler(watchdog.events.PatternMatchingEventHandler):
def __init__(self):
# Set the patterns for PatternMatchingEventHandler
watchdog.events.PatternMatchingEventHandler.__init__(self, patterns=['*.csv'],
ignore_directories=True, case_sensitive=False)
def on_created(self, event):
print("Watchdog received created event - % s." % event.src_path)
# Event is created, you can process it now
def on_modified(self, event,):
#print("Watchdog received modified event - % s." % event.src_path)
x = event.src_path
print(x)
if x == 'c:/WATCHDOGTEST/x.csv':
pyaudio_01.PLAY_SOUND()
asynciotest.main
if __name__ == "__main__":
src_path = r"c:/WATCHDOGTEST/"
src_path2 = r"c:/WATCHDOGTEST2/"
event_handler = Handler()
observer = watchdog.observers.Observer()
observer.schedule(event_handler, path=src_path, recursive=True)
observer.schedule(event_handler, path=src_path2, recursive=True)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
Asyncio snippet:
import asyncio
async def factorial(name, number):
f = 1
for i in range(2, number + 1):
print(f"Task {name}: Compute factorial({i})...")
await asyncio.sleep(1)
f *= i
print(f"Task {name}: factorial({number}) = {f}")
async def main():
# Schedule three calls *concurrently*:
await asyncio.gather(
factorial("A", 2),
factorial("B", 3),
factorial("C", 4),
)
asyncio.run(main())

Python parallel thread that consume Watchdog queue events

I have this code that should put an event in a queue each time an external program (TCPdump) creates a *.pcap file in my directory.
My problem is that I always get an empty queue, although I got the print from process() function.
What am I doing wrong? Is the queue correctly defined and shared between the two classes?
EDIT-----------------
I maybe understood why I got an empty queue, I think it is because I'm printing the queue that I initialized before it gets filled by Handler class.
I modified my code and created two processes that should consume the same queue, but now the execution stuck on queue.put() and the thread ReadPcapFiles() stop running.
Here the updated code:
import time
import pyshark
import concurrent.futures
import threading
import logging
from queue import Queue
from multiprocessing import Process
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
#print(f'event type: {event.event_type} path : {event.src_path}')
self.queue.put(event.src_path)
logging.info(f"Storing message: {self.queue.qsize()}")
print("Producer queue: ", list(self.queue.queue))
#self.queue.get()
def on_created(self, event):
self.process(event)
def StartWatcher(watchdogq, event):
path = 'C:\\...'
handler = Handler(watchdogq)
observer = Observer()
while not event.is_set():
observer.schedule(handler, path, recursive=False)
print("About to start observer")
observer.start()
try:
while True:
time.sleep(1)
except Exception as error:
observer.stop()
print("Error: " + str(error))
observer.join()
def ReadPcapFiles(consumerq, event):
while not event.is_set() or not consumerq.empty():
print("Consumer queue: ", consumerq.get())
#print("Consumer queue: ", list(consumerq.queue))
# pcapfile = pyshark.FileCapture(self.queue.get())
# for packet in pcapfile:
# countPacket +=1
if __name__ == '__main__':
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
logging.getLogger().setLevel(logging.DEBUG)
queue = Queue()
event = threading.Event()
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
executor.submit(StartWatcher,queue, event)
executor.submit(ReadPcapFiles,queue, event)
time.sleep(0.1)
logging.info("Main: about to set event")
event.set()
OLD CODE:
import time
from queue import Queue
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
print(f'event type: {event.event_type} path : {event.src_path}')
self.queue.put(event.src_path)
def on_created(self, event):
self.process(event)
class Watcher():
def __init__(self, path):
self.queue = Queue()
self.observer = Observer()
self.handler = Handler(self.queue)
self.path = path
def start(self):
self.observer.schedule(self.handler, self.path, recursive=True)
self.observer.start()
try:
while True:
time.sleep(1)
self.queue.get()
print(list(self.queue.queue))
except Exception as error:
self.observer.stop()
print("Error: " + str(error))
self.observer.join()
if __name__ == '__main__':
watcher = Watcher('C:\\...')
watcher.start()
This is working for me (I got the main idea from this answer, thanks!) but notice that I consider this a workaround, so if someone has a better solution to this or can better explain the reason of such behavior in Python, please do not hesitate to answer!
My guess is that I had two main problems:
- I was starting Watchdog process inside another thread (and that was blocking somehow my queue consuming thread).
- Python threading does not work really in parallel and therefore starting an independent process was necessary.
Here my code:
import time
import pyshark
import threading
import logging
import os
from queue import Queue
from multiprocessing import Process, Pool
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler
from concurrent.futures import ThreadPoolExecutor
class Handler(PatternMatchingEventHandler):
patterns = ["*.pcap", "*.pcapng"]
def __init__(self, queue):
PatternMatchingEventHandler.__init__(self)
self.queue = queue
def process(self, event):
self.queue.put(event.src_path)
logging.info(f"Storing message: {self.queue.qsize()}")
print("Producer queue: ", list(self.queue.queue))
def on_created(self, event):
#wait that the transfer of the file is finished before processing it
file_size = -1
while file_size != os.path.getsize(event.src_path):
file_size = os.path.getsize(event.src_path)
time.sleep(1)
self.process(event)
def ConsumeQueue(consumerq):
while True:
if not consumerq.empty():
pool = Pool()
pool.apply_async(ReadPcapFiles, (consumerq.get(), ))
else:
time.sleep(1)
def ReadPcapFiles(get_event):
createdFile = get_event
print(f"This is my event in ReadPacapFile {createdFile}")
countPacket = 0
bandwidth = 0
pcapfile = pyshark.FileCapture(createdFile)
for packet in pcapfile:
countPacket +=1
bandwidth = bandwidth + int(packet.length)
print(f"Packet nr {countPacket}")
print(f"Byte per second {bandwidth}")
if __name__ == '__main__':
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
logging.getLogger().setLevel(logging.DEBUG)
queue = Queue()
path = 'C:\\...'
worker = threading.Thread(target=ConsumeQueue, args=(queue, ), daemon=True)
print("About to start worker")
worker.start()
event_handler = Handler(queue)
observer = Observer()
observer.schedule(event_handler, path, recursive=False)
print("About to start observer")
observer.start()
try:
while True:
time.sleep(1)
except Exception as error:
observer.stop()
print("Error: " + str(error))
observer.join()
There is an excellent library which provides concurrent access to the items within that queue. The queue is also persistent[file based as well as database based], so if the program crashes, you can still consume events from the point where the program crashed.
persist-queue

File Monitoring using watchdog library

I have written one python script to monitor one local folder which is having only .txt files and i want to start this script automatically if some changes happened to the folder(created,deleted or updated)
I tried to run this script and also tried to make changes in the directory, but i couldn't see any output and no error messages. It always says "Process finished with exit code 0" can any one review my code and give me some tips where to correct to get the expected out put.
import os
import sys
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
#Step 1 Create the event handler
if __name__ == "__main__":
patterns = ".txt"
ignore_patterns = None
ignore_directories = False
case_sensitive = True
event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
#step 2 Handle all the events
def on_created(event):
print("new files has been created!")
def on_deleted(event):
print("Some files has been Deleted")
def on_modified(event):
print("Some files has been modified")
def on_moved(event):
print("Some files has been moved")
#step 3 specify to the handler that we want these functions to be called
event_handler.on_created = on_created
event_handler.on_deleted = on_deleted
event_handler.on_modified = on_modified
event_handler.on_moved = on_moved
#step 4 create an observer
path = "T:\Laboratory\Instruments\Worklists\TrackMateRacks\old"
go_recursively = True
my_observer = Observer()
my_observer.path(event_handler, path, recursive=go_recursively)
# start the observer
my_observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
my_observer.stop()
my_observer.join()
you have to move the lines after " # start the observer" to the very left.
otherwise there will be executed nothing. that lines are part of on_moved(). but you want them to be executed if you start the script.
or
for most programs it's useful to add this line:
if __name__ == '__main__':
bevore line "# start the observer"
than your my_observer.start() will be executed, if you call your script. but if you import your script in another script, this will not be executed, but the other script can use all the functions, you created.
It seems, that you're really new to Python. You've to watch the indents, they're part of the syntax.
Exceptionally I reformat the complete code for you:
import os
import sys
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
#Step 1 Create the event handler
patterns = ".txt"
ignore_patterns = None
ignore_directories = False
case_sensitive = True
event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
#step 2 Handle all the events
def on_created(event):
print("new files has been created!")
def on_deleted(event):
print("Some files has been Deleted")
def on_modified(event):
print("Some files has been modified")
def on_moved(event):
print("Some files has been moved")
#step 3 specify to the handler that we want these functions to be called
event_handler.on_created = on_created
event_handler.on_deleted = on_deleted
event_handler.on_modified = on_modified
event_handler.on_moved = on_moved
#step 4 create an observer
def main():
path = "T:\Laboratory\Instruments\Worklists\TrackMateRacks\old"
go_recursively = True
my_observer = Observer()
my_observer.path(event_handler, path, recursive=go_recursively)
# start the observer
my_observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
my_observer.stop()
my_observer.join()
if __name__ == "__main__":
main()
#end of file
Good luck!

watchdog in directory monitoring is not working

I want to watch a folder for addition, modification and deletion of file and execute a command whenever any of this event occurs.
I found this tutorial that helped https://www.michaelcho.me/article/using-pythons-watchdog-to-monitor-changes-to-a-directory
so here is the code I now have
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class Watcher:
DIRECTORY_TO_WATCH = "/Users/***/desktop/google drive/protpics"
def __init__(self):
self.observer = Observer()
def run(self):
event_handler = Handler()
self.observer.schedule(event_handler, self.DIRECTORY_TO_WATCH, recursive=True)
self.observer.start()
try:
while True:
time.sleep(5)
except:
self.observer.stop()
print("Error")
self.observer.join()
class Handler(FileSystemEventHandler):
#staticmethod
def on_my_event(event):
if event.is_directory:
return None
elif event.event_type == 'created':
#Take any action here when a file is first created.
print ("Recived created event - %s" % event.src_path)
elif event.event_type == 'modified':
# Take any action here when a file is modified.
print ("Recieved modified event - %s" % event.src_path)
if __name__ == '__main__':
W = Watcher()
W.run()
the problem now is that when I added a new file to the directory no message gets printed out. What am I doing wrong and how can I fix it?
Couldn't you figure out the difference between your code and example's? In your link, the author use on_any_event, but you are using on_my_event. There isn't a method named on_my_event.
Have a check at official document: http://pythonhosted.org/watchdog/api.html#watchdog.events.FileSystemEventHandler

How to pass an argument to my Python watchdog.events.PatternMatchingEventHandler

I'm quite new to Python and hope the answer to this is obvious to most of you.
I'm creating a class in Python that represents a ScanFolder.
In the __init__ of that class, I start a watchdog.observers
This observer will fire a watchdog.events.PatternMatchingEventHandler whenever a file is changed under the watched directory.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import time
from watchdog.observers import Observer
import watchdog.events
path = sys.argv[1] if len(sys.argv) > 1 else '.'
class MyEventHandler(watchdog.events.PatternMatchingEventHandler):
def on_any_event(self, event):
print(event.src_path, event.event_type)
class ScanFolder:
'Class defining a scan folder'
def __init__(self, path):
self.path = path
self.documents = dict() # key = document label value = Document reference
self.event_handler = MyEventHandler(patterns=["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.pdf"],
ignore_patterns=[],
ignore_directories=True)
self.observer = Observer()
self.observer.schedule(self.event_handler, self.path, recursive=False)
self.observer.start()
def stop(self):
self.observer.stop()
self.observer.join()
scan_folder = ScanFolder(path)
try:
while True:
time.sleep(1)
"""Here, I'll act on my scan_folder object that lists the discovered files"""
except KeyboardInterrupt:
log.warning("Ouch !!! Keyboard interrupt received.")
scan_folder.stop()
My problem is the following:
How can I have my scan_folder object modified by my scan_folder.event_handler() ?
Actually, I would like to populate the scan_folder.documents dictionary wherever a file is detected in the scan folder.
Thank you very much and sorry for my ignorance.
There are a lot of way to do it: but the simplest way is set a bound method of ScanFolder as on_any_event callback function of watchdog.events.PatternMatchingEventHandler. So your code become:
class ScanFolder:
'Class defining a scan folder'
def __init__(self, path):
self.path = path
self.documents = dict() # key = document label value = Document reference
self.event_handler = watchdog.events.PatternMatchingEventHandler(patterns=["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.pdf"],
ignore_patterns=[],
ignore_directories=True)
self.event_handler.on_any_event = self.on_any_event
self.observer = Observer()
self.observer.schedule(self.event_handler, self.path, recursive=False)
self.observer.start()
def on_any_event(self, event):
print(event.src_path, event.event_type)
print("Complete ScanFolder() access")
def stop(self):
self.observer.stop()
self.observer.join()
Other way could be derive ScanFolder from watchdog.events.PatternMatchingEventHandler .... But injecting function is one of the power of python

Categories