this might be a dup but I couldn't find exactly what I was looking for. Feel free to link any previous answer.
I need to write a python script (bash also would be ok) that continuously watches a directory. When the content of this directory changes (because another program generates a new directory inside of it), I want to run automatically a command line that has the name of the newly created directory as an argument.
Example:
I need to watch directory /home/tmp/:
the actual content of the directory is:
$ ls /home/tmp
Patient Patient2 Patient3
Suddenly, Patient4 dir arrives in /home/tmp.
I want a code that runs automatically
$ my_command --target_dir /home/tmp/Patient4/
I hope I'm clear in explaining what I need.
Thanks
The answer that i found works on Linux only, and it makes use of the pyinotify wrapper. below is the wroking code:
class EventProcessor(pyinotify.ProcessEvent):
_methods = ["IN_CREATE",
# "IN_OPEN",
# "IN_ACCESS",
]
def process_generator(cls, method):
def _method_name(self, event):
if event.maskname=="IN_CREATE|IN_ISDIR":
print(f"Starting pipeline for {event.pathname}")
os.system(f"clearlung --single --automatic --base_dir {event.pathname} --target_dir CT " + \
f"--model {MODEL} --subroi --output_dir {OUTPUT} --tag 0 --history_path {HISTORY}")
pass
_method_name.__name__ = "process_{}".format(method)
setattr(cls, _method_name.__name__, _method_name)
for method in EventProcessor._methods:
process_generator(EventProcessor, method)
class PathWatcher():
"""Class to watch for changes"""
def __init__(self, path_to_watch) -> None:
"""Base constructor"""
self.path = path_to_watch
if not os.path.isdir(self.path):
raise FileNotFoundError()
def watch(self,):
"""Main method of the PathWatcher class"""
print(f"Waiting for changes in {self.path}...")
watch_manager = pyinotify.WatchManager()
event_notifier = pyinotify.Notifier(watch_manager, EventProcessor())
watch_this = os.path.abspath(self.path)
watch_manager.add_watch(watch_this, pyinotify.ALL_EVENTS)
event_notifier.loop()
Related
I've written a small module for work and I'm adding some unittesting. The module writes files that need to be read back in for testing purposes and I'm using tempfile.TemporaryDirectory(). I create the temporary directory in the setUp() method of my test class and tear it down in the tearDown() method because clearly, I don't want to trash up the file structure when tests are run in prod. My understanding is that the setUp and tearDown methods are run before and after each test and while this adds some overhead to the testing process (i.e., each test takes about 0.5 seconds to run), I'm not all that worried about the time.
My test class
class MyUtilsTest(unittest.TestCase):
def setUp(self) -> None:
self.dir_name = tempfile.TemporaryDirectory()
file_name = "file_used_for_testing.pptx"
self.presentation_path = self.dir_name.name + "/" + file_name
# Do some things, including writing some stuff to the temporary directory
def tearDown(self) -> None:
self.dir_name.cleanup()
def test1(self) -> None:
self.assertTrue(
os.path.isfile(self.presentation_path), f"{self.presentation_path} exists."
)
def test2(self) -> None:
prs = Presentation(self.presentation_path)
self.assertEqual(len(prs.slides), 4)
def test3(self) -> None:
prs = Presentation(pptx=self.presentation_path)
titles = [prs.slides[s].placeholders[0].text for s in np.arange(0, 4)]
self.assertEqual(
titles, ["Test title", "Image title", "Table title", "Bullet title"]
)
# More tests that rely in files in self.dir_name here
When I have my test class set up like this, all of my tests run and pass without issue, but I get a linter warning:
I'm new-ish to writing software-like code in Python and in doing research on this linter warning, I ran into several questions on SO (e.g., Python: How do I make temporary files in my test suite?). I've tried editing my test class to incorporate using with tempfile.TemporaryDirectory() as dir_name: but when I do this, my tests fail. For example, when I edit the setup() method (and remove the tearDown()) to
def setUp(self) -> None:
with tempfile.TemporaryDirectory() as dir_name:
file_name = "file_used_for_testing.pptx"
self.presentation_path = dir_name + "/" + file_name
# Do some things, including writing some stuff to the temporary directory
the later tests can't find the temp directories, I suspect because the temp dir was destroyed after leaving the context/function and prior to the test running.
How can I keep the context manager open for subsequent tests that rely on the files that are produced in the setUp() method?
Assuming you don't need to recreate the directory for each test case, you can override unittest.TestCase.run() to customize the behavior of the test execution. docs
For example:
import tempfile, unittest
class MyTest(unittest.TestCase):
def run(self, result=None):
with tempfile.TemporaryDirectory() as dir_name:
self.dir_name = dir_name
super().run()
def test(self):
print(self.dir_name)
self.assertIsNotNone(self.dir_name)
My waf project has two dependencies, built with CMake.
What I'm trying to do, is following the dynamic_build3 example found in waf git repo, create a tool which spawns CMake and after a successful build, performs an install into waf's output subdirectory:
#extension('.txt')
def spawn_cmake(self, node):
if node.name == 'CMakeLists.txt':
self.cmake_task = self.create_task('CMake', node)
self.cmake_task.name = self.target
#feature('cmake')
#after_method('process_source')
def update_outputs(self):
self.cmake_task.add_target()
class CMake(Task.Task):
color = 'PINK'
def keyword(self):
return 'CMake'
def run(self):
lists_file = self.generator.source[0]
bld_dir = self.generator.bld.bldnode.make_node(self.name)
bld_dir.mkdir()
# process args and append install prefix
try:
cmake_args = self.generator.cmake_args
except AttributeError:
cmake_args = []
cmake_args.append(
'-DCMAKE_INSTALL_PREFIX={}'.format(bld_dir.abspath()))
# execute CMake
cmd = '{cmake} {args} {project_dir}'.format(
cmake=self.env.get_flat('CMAKE'),
args=' '.join(cmake_args),
project_dir=lists_file.parent.abspath())
try:
self.generator.bld.cmd_and_log(
cmd, cwd=bld_dir.abspath(), quiet=Context.BOTH)
except WafError as err:
return err.stderr
# execute make install
try:
self.generator.bld.cmd_and_log(
'make install', cwd=bld_dir.abspath(), quiet=Context.BOTH)
except WafError as err:
return err.stderr
try:
os.stat(self.outputs[0].abspath())
except:
return 'library {} does not exist'.format(self.outputs[0])
# store the signature of the generated library to avoid re-running the
# task without need
self.generator.bld.raw_deps[self.uid()] = [self.signature()] + self.outputs
def add_target(self):
# override the outputs with the library file name
name = self.name
bld_dir = self.generator.bld.bldnode.make_node(name)
lib_file = bld_dir.find_or_declare('lib/{}'.format(
(
self.env.cshlib_PATTERN
if self.generator.lib_type == 'shared' else self.env.cstlib_PATTERN
) % name))
self.set_outputs(lib_file)
def runnable_status(self):
ret = super(CMake, self).runnable_status()
try:
lst = self.generator.bld.raw_deps[self.uid()]
if lst[0] != self.signature():
raise Exception
os.stat(lst[1].abspath())
return Task.SKIP_ME
except:
return Task.RUN_ME
return ret
I'd like to spawn the tool and then link the waf target to the installed libraries, which I perform using the "fake library" mechanism by calling bld.read_shlib():
def build(bld):
bld.post_mode = Build.POST_LAZY
# build 3rd-party CMake dependencies first
for lists_file in bld.env.CMAKE_LISTS:
if 'Chipmunk2D' in lists_file:
bld(
source=lists_file,
features='cmake',
target='chipmunk',
lib_type='shared',
cmake_args=[
'-DBUILD_DEMOS=OFF',
'-DINSTALL_DEMOS=OFF',
'-DBUILD_SHARED=ON',
'-DBUILD_STATIC=OFF',
'-DINSTALL_STATIC=OFF',
'-Wno-dev',
])
bld.add_group()
# after this, specifying `use=['chipmunk']` in the target does the job
out_dir = bld.bldnode.make_node('chipmunk')
bld.read_shlib(
'chipmunk',
paths=[out_dir.make_node('lib')],
export_includes=[out_dir.make_node('include')])
I find this * VERY UGLY * because:
The chipmunk library is needed ONLY during final target's link phase, there's no reason to block the whole build (by using Build.POST_LAZY mode and bld.add_group()), though unblocking it makes read_shlib() fail. Imagine if there was also some kind of git clone task before that...
Calling read_shlib() in build() command implies that the caller knows about how and where the tool installs the files. I'd like the tool itself to perform the call to read_shlib() (if necessary at all). But I failed doing this in run() and in runnable_status(), as suggested paragraph 11.4.2 of Waf Book section about Custom tasks, seems that I have to incapsulate in some way the call to read_shlib() in ANOTHER task and put it inside the undocumented more_tasks attribute.
And there are the questions:
How can I incapsulate the read_shlib() call in a task, to be spawned by the CMake task?
Is it possible to let the tasks go in parallel in a non-blocking way for other tasks (suppose a project has 2 or 3 of these CMake dependencies, which are to be fetched by git from remote repos)?
Well in fact you have already done most of the work :)
read_shlib only create a fake task pretending to build an already existing lib. In your case, you really build the lib, so you really don't need read_shlib. You can just use your cmake task generator somewhere, given that you've set the right parameters.
The keyword use recognizes some parameters in the used task generators:
export_includes
export_defines
It also manage libs and tasks order if the used task generator has a link_task.
So you just have to set the export_includes and export_defines correctly in your cmake task generator, plus set a link_task attribute which reference your cmake_task attribute. You must also set your cmake_task outputs correctly for this to work, ie the first output of the list must be the lib node (what you do in add_target seems ok). Something like:
#feature('cmake')
#after_method('update_outputs')
def export_for_use(self):
self.link_task = self.cmake_task
out_dir = self.bld.bldnode.make_node(self.target)
self.export_includes = out_dir.make_node('include')
This done, you will simply write in your main wscript:
def build(bld):
for lists_file in bld.env.CMAKE_LISTS:
if 'Chipmunk2D' in lists_file:
bld(
source=lists_file,
features='cmake',
target='chipmunk',
lib_type='shared',
cmake_args=[
'-DBUILD_DEMOS=OFF',
'-DINSTALL_DEMOS=OFF',
'-DBUILD_SHARED=ON',
'-DBUILD_STATIC=OFF',
'-DINSTALL_STATIC=OFF',
'-Wno-dev',
])
bld.program(source="main.cpp", use="chipmunk")
You can of course simplify/factorize the code. I think add_target should not be in the task, it manages mainly task generator attributes.
I have a directory that I visit from time to time to check its contents, so I have created this code to retrieve list paths of all the files within this directory and its subdirectories:
our_dir='c:\\mydocs'
walk=os.walk(our_dir)
for path, folders, files in walk:
for f in files:
file_path=os.path.join(path,f)
print file_path
This directory has 200K+ files and frequent file changes and additions, so while the code finishes running, more files will have been added/changed. The question is how to do the following:
conduct an initial run for the code, to list all the file paths
in this directory that were created/changed before the initial run
starting time
Store somehow the files added/changed during the
initial run (between the initial run start time and the initial run end time)
with every subsequent run, list only the paths
created before the current run- and after the end time of the initial run, and during the current run (between the current run start time and the current run end time)
Any idea on how to do this? I just want to make it clear that I am not "watching/monitoring" the directory, but I am visiting it from time to time.
Here's a really basic structure idea: each folder gets it's own thread. You would have 2 classes, one that gathers the data "directoryHelper", and one to store it "Directory".
Two classes are required because a thread can only be started once, and you need to be able to generate a new thread for a directory that has already been listed without losing it's data.
The root directory would be a Directory instance that lists it's given path ('C:\mydocs'). It would store the file list in self.files and create a new Directory instance for every directory it contains (without forgetting to them in self.dirs to be able to access them.
Refreshing could be timed, and checks for the directory's modification date as you suggested.
Here's some code to help you understand my idea:
class Helper(threading.Thread):
def __init__(self, directory):
super(Helper, self).__init__()
self.directory = directory
self.start()
def run(self):
for path, folders, files in os.walk(self.directory.path):
for f in files:
self.directory.files.append(os.path.join(path, f))
for d in folders:
self.directory.dirs.append(Directory(os.path.join(path, d), self.directory.interval, self.directory.do))
self = None
class Directory(threading.Thread):
def __init__(self, path, interval=5, do=None):
super(Directory, self).__init__()
self.path = path
self.files, self.dirs = ([], [])
self.interval = interval
self.last_update = 0
self.helper = None
self.do = do # One flag to stop refreshing all instances
if do == None:
self.do = True
def run(self):
while self.do:
self.refresh()
time.sleep(self.interval)
def refresh(self):
# Only start a refresh if there self.helper is done and directory was changed
if not self.helper and self.has_changed():
self.last_update = int(time.time())
self.helper = Helper(self)
def has_changed(self):
return int(os.path.getmtime(self.path)) > self.last_update
I think this should be enough to get you started!
Edit: I changed the code a bit to actually be in a working state. Or at least I hope it is (I haven't tested it)!
Edit 2: I actually took the time to test this, and fix it. I ran:
if __name__ == '__main__':
root = Directory('/home/plg')
root.refresh()
root.helper.join()
for d in [root] + root.dirs:
for f in d.files:
print f
And:
$ time python bin/dirmon.py | wc -l # wc -l == len(sys.stdout.readlines())
7805
real 0m0.078s
user 0m0.048s
sys 0m0.028s
That's 7805 / 0.078 = 100,064 files per second. Not too bad! :)
Edit 3 (last one!):
I ran the test on '/', first run (without cache):
147551 / 4.103 = 35,961 files per second
Second and third:
$ time python bin/dirmon.py | wc -l
147159
real 0m1.213s
user 0m0.940s
sys 0m0.272s
$ time python bin/dirmon.py | wc -l
147159
real 0m1.209s
user 0m0.928s
sys 0m0.284s
147551 / 1.213 = 121,641 files per second
147551 / 1.209 = 122,044 files per second
I started using Python few days back and I think I have a very basic question where I am stuck. Maybe I am not doing it correctly in Python so wanted some advice from the experts:
I have a config.cfg & a class test in one package lib as follows:
myProj/lib/pkg1/config.cfg
[api_config]
url = https://someapi.com/v1/
username=sumitk
myProj/lib/pkg1/test.py
class test(object):
def __init__(self, **kwargs):
config = ConfigParser.ConfigParser()
config.read('config.cfg')
print config.get('api_config', 'username')
#just printing here but will be using this as a class variable
def some other foos()..
Now I want to create an object of test in some other module in a different package
myProj/example/useTest.py
from lib.pkg1.test import test
def temp(a, b, c):
var = test()
def main():
temp("","","")
if __name__ == '__main__':
main()
Running useTest.py is giving me error:
...
print config.get('api_config', 'username')
File "C:\Python27\lib\ConfigParser.py", line 607, in get
raise NoSectionError(section)
ConfigParser.NoSectionError: No section: 'api_config'
Now if I place thie useTest.py in the same package it runs perfectly fine:
myProj/lib/pkg1/useTest.py
myProj/lib/pkg1/test.py
myProj/lib/pkg1/config.cfg
I guess there is some very basic package access concept in Python that I am not aware of or is there something I am doing wrong here?
The issue here is that you have a different working directory depending on which module is your main script. You can check the working directory by adding the following lines to the top of each script:
import os
print os.getcwd()
Because you just provide 'config.cfg' as your file name, it will attempt to find that file inside of the working directory.
To fix this, give an absolute path to your config file.
You should be able to figure out the absolute path with the following method since you know that config.cfg and test.py are in the same directory:
# inside of test.py
import os
config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'config.cfg')
I'm using a crontab to run a maintenance script for my minecraft server. Most of the time it works fine, unless the crontab tries to use the restart script. If I run the restart script manually, there aren't any issues. Because I believe it's got to do with path names, I'm trying to make sure it's always doing any minecraft command FROM the minecraft directory. So I'm encasing the command in pushd/popd:
os.system("pushd /directory/path/here")
os.system("command to sent to minecraft")
os.system("popd")
Below is an interactive session taking minecraft out of the equation. A simple "ls" test. As you can see, it does not at all run the os.system command from the pushd directory, but instead from /etc/ which is the directory in which I was running python to illustrate my point.Clearly pushd isn't working via python, so I'm wondering how else I can achieve this. Thanks!
>>> def test():
... import os
... os.system("pushd /home/[path_goes_here]/minecraft")
... os.system("ls")
... os.system("popd")
...
>>> test()
~/minecraft /etc
DIR_COLORS cron.weekly gcrypt inputrc localtime mime.types ntp ppp rc3.d sasldb2 smrsh vsftpd.ftpusers
DIR_COLORS.xterm crontab gpm-root.conf iproute2 login.defs mke2fs.conf ntp.conf printcap rc4.d screenrc snmp vsftpd.tpsave
X11 csh.cshrc group issue logrotate.conf modprobe.d odbc.ini profile rc5.d scsi_id.config squirrelmail vz
adjtime csh.login group- issue.net logrotate.d motd odbcinst.ini profile.d rc6.d securetty ssh warnquota.conf
aliases cyrus.conf host.conf java lvm mtab openldap protocols redhat-release security stunnel webalizer.conf
alsa dbus-1 hosts jvm lynx-site.cfg multipath.conf opt quotagrpadmins resolv.conf selinux sudoers wgetrc
alternatives default hosts.allow jvm-commmon lynx.cfg my.cnf pam.d quotatab rndc.key sensors.conf sysconfig xinetd.conf
bashrc depmod.d hosts.deny jwhois.conf mail named.caching-nameserver.conf passwd rc rpc services sysctl.conf xinetd.d
blkid dev.d httpd krb5.conf mail.rc named.conf passwd- rc.d rpm sestatus.conf termcap yum
cron.d environment imapd.conf ld.so.cache mailcap named.rfc1912.zones pear.conf rc.local rsyslog.conf setuptool.d udev yum.conf
cron.daily exports imapd.conf.tpsave ld.so.conf mailman netplug php.d rc.sysinit rwtab shadow updatedb.conf yum.repos.d
cron.deny filesystems init.d ld.so.conf.d makedev.d netplug.d php.ini rc0.d rwtab.d shadow- vimrc
cron.hourly fonts initlog.conf libaudit.conf man.config nscd.conf pki rc1.d samba shells virc
cron.monthly fstab inittab libuser.conf maven nsswitch.conf postfix rc2.d sasl2 skel vsftpd
sh: line 0: popd: directory stack empty
===
(CentOS server with python 2.4)
In Python 2.5 and later, I think a better method would be using a context manager, like so:
import contextlib
import os
#contextlib.contextmanager
def pushd(new_dir):
previous_dir = os.getcwd()
os.chdir(new_dir)
try:
yield
finally:
os.chdir(previous_dir)
You can then use it like the following:
with pushd('somewhere'):
print os.getcwd() # "somewhere"
print os.getcwd() # "wherever you started"
By using a context manager you will be exception and return value safe: your code will always cd back to where it started from, even if you throw an exception or return from inside the context block.
You can also nest pushd calls in nested blocks, without having to rely on a global directory stack:
with pushd('somewhere'):
# do something
with pushd('another/place'):
# do something else
# do something back in "somewhere"
Each shell command runs in a separate process. It spawns a shell, executes the pushd command, and then the shell exits.
Just write the commands in the same shell script:
os.system("cd /directory/path/here; run the commands")
A nicer (perhaps) way is with the subprocess module:
from subprocess import Popen
Popen("run the commands", shell=True, cwd="/directory/path/here")
pushd and popd have some added functionality: they store previous working directories in a stack - in other words, you can pushd five times, do some stuff, and popd five times to end up where you started. You're not using that here, but it might be useful for others searching for the questions like this. This is how you can emulate it:
# initialise a directory stack
pushstack = list()
def pushdir(dirname):
global pushstack
pushstack.append(os.getcwd())
os.chdir(dirname)
def popdir():
global pushstack
os.chdir(pushstack.pop())
I don't think you can call pushd from within an os.system() call:
>>> import os
>>> ret = os.system("pushd /tmp")
sh: pushd: not found
Maybe just maybe your system actually provides a pushd binary that triggers a shell internal function (I think I've seen this on FreeBSD beforeFreeBSD has some tricks like this, but not for pushd), but the current working directory of a process cannot be influenced by other processes -- so your first system() starts a shell, runs a hypothetical pushd, starts a shell, runs ls, starts a shell, runs a hypothetical popd... none of which influence each other.
You can use os.chdir("/home/path/") to change path: http://docs.python.org/library/os.html#os-file-dir
No need to use pushd -- just use os.chdir:
>>> import os
>>> os.getcwd()
'/Users/me'
>>> os.chdir('..')
>>> os.getcwd()
'/Users'
>>> os.chdir('me')
>>> os.getcwd()
'/Users/me'
Or make a class to use with 'with'
import os
class pushd: # pylint: disable=invalid-name
__slots__ = ('_pushstack',)
def __init__(self, dirname):
self._pushstack = list()
self.pushd(dirname)
def __enter__(self):
return self
def __exit__(self, exec_type, exec_val, exc_tb) -> bool:
# skip all the intermediate directories, just go back to the original one.
if self._pushstack:
os.chdir(self._pushstack.pop(0)))
if exec_type:
return False
return True
def popd(self) -> None:
if len(self._pushstack):
os.chdir(self._pushstack.pop())
def pushd(self, dirname) -> None:
self._pushstack.append(os.getcwd())
os.chdir(dirname)
with pushd(dirname) as d:
... do stuff in that dirname
d.pushd("../..")
d.popd()
If you really need a stack, i.e. if you want to do several pushd and popd,
see naught101 above.
If not, simply do:
olddir = os.getcwd()
os.chdir('/directory/path/here')
os.system("command to sent to minecraft")
os.chdir(olddir)