Python: module for creating PID-based lockfile? - python

I'm writing a Python script that may or may not (depending on a bunch of things) run for a long time, and I'd like to make sure that multiple instances (started via cron) don't step on each others toes. The logical way to do this seems to be a PID-based lockfile… But I don't want to re-invent the wheel if there is already code to do this.
So, is there a Python module out there which will manage the details of a PID-based lockfile?

This might be of help to you: lockfile

If you can use GPLv2, Mercurial has a module for that:
http://bitbucket.org/mirror/mercurial/src/tip/mercurial/lock.py
Example usage:
from mercurial import error, lock
try:
l = lock.lock("/path/to/lock", timeout=600) # wait at most 10 minutes
# do something
except error.LockHeld:
# couldn't take the lock
else:
l.release()

i've been pretty unhappy with all of those, so i wrote this:
class Pidfile():
def __init__(self, path, log=sys.stdout.write, warn=sys.stderr.write):
self.pidfile = path
self.log = log
self.warn = warn
def __enter__(self):
try:
self.pidfd = os.open(self.pidfile, os.O_CREAT|os.O_WRONLY|os.O_EXCL)
self.log('locked pidfile %s' % self.pidfile)
except OSError as e:
if e.errno == errno.EEXIST:
pid = self._check()
if pid:
self.pidfd = None
raise ProcessRunningException('process already running in %s as pid %s' % (self.pidfile, pid));
else:
os.remove(self.pidfile)
self.warn('removed staled lockfile %s' % (self.pidfile))
self.pidfd = os.open(self.pidfile, os.O_CREAT|os.O_WRONLY|os.O_EXCL)
else:
raise
os.write(self.pidfd, str(os.getpid()))
os.close(self.pidfd)
return self
def __exit__(self, t, e, tb):
# return false to raise, true to pass
if t is None:
# normal condition, no exception
self._remove()
return True
elif t is PidfileProcessRunningException:
# do not remove the other process lockfile
return False
else:
# other exception
if self.pidfd:
# this was our lockfile, removing
self._remove()
return False
def _remove(self):
self.log('removed pidfile %s' % self.pidfile)
os.remove(self.pidfile)
def _check(self):
"""check if a process is still running
the process id is expected to be in pidfile, which should exist.
if it is still running, returns the pid, if not, return False."""
with open(self.pidfile, 'r') as f:
try:
pidstr = f.read()
pid = int(pidstr)
except ValueError:
# not an integer
self.log("not an integer: %s" % pidstr)
return False
try:
os.kill(pid, 0)
except OSError:
self.log("can't deliver signal to %s" % pid)
return False
else:
return pid
class ProcessRunningException(BaseException):
pass
to be used something like this:
try:
with Pidfile(args.pidfile):
process(args)
except ProcessRunningException:
print "the pid file is in use, oops."

I know this is an old thread, but I also created a simple lock which only relies on python native libraries:
import fcntl
import errno
class FileLock:
def __init__(self, filename=None):
self.filename = os.path.expanduser('~') + '/LOCK_FILE' if filename is None else filename
self.lock_file = open(self.filename, 'w+')
def unlock(self):
fcntl.flock(self.lock_file, fcntl.LOCK_UN)
def lock(self, maximum_wait=300):
waited = 0
while True:
try:
fcntl.flock(self.lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
except IOError as e:
if e.errno != errno.EAGAIN:
raise e
else:
time.sleep(1)
waited += 1
if waited >= maximum_wait:
return False

I believe you will find the necessary information here. The page in question refers to a package for building daemons in python: this process involves creating a PID lockfile.

There is a recipe on ActiveState on creating lockfiles.
To generate the filename you can use os.getpid() to get the PID.

You can try PID: https://pypi.org/project/pid/
As the documentation shows, you can lock a function simply adding the decorator #pidfile() on the top of function/method name.
from pid.decorator import pidfile
#pidfile()
def main():
pass
if __name__ == "__main__":
main()
The default location for pidfile self check (the file who says if you can execute the code or not) is '/var/run'. You can change it as follows:
#pidfile(piddir='/path/to/a/custom/location')
For other params, see: https://github.com/trbs/pid/blob/95499b30e8ec4a473c0e6b407c03ce644f61c643/pid/base.py#L41
Unfortunatly, this lib's documentation is a little bit poor.

Related

Python - find PID function using psutil - does not return anything

I have a function that should return the process ID of a given process using psutil, but for some reason it doesn't return anything.
Printing the required variable shows the correct value. I think I'm doing something wrong.
def pid_find(process_name):
pid = []
for proc in psutil.process_iter():
try:
if proc.name() == process_name:
pid.append(proc.pid)
except psutil.AccessDenied:
pass
try:
process = pid[0]
except IndexError:
raise Exception("Process %s not found" % process_name)
print process # prints the correct value
return process # does not return anything
with proper indentions it should look like this -
def pid_find(process_name):
pid = []
for proc in psutil.process_iter():
try:
if proc.name() == process_name:
pid.append(proc.pid)
except psutil.AccessDenied:
pass
try:
process = pid[0]
except IndexError:
raise Exception("Process %s not found" % process_name)
print process #prints the correct value
return process # return value
can be done with wmi query without iterations:
from win32com.client import Dispatch
import wmi
server = Dispatch("WbemScripting.SWbemLocator")
c = server.ConnectServer("localhost", "root\\cimv2")
process_query = "Select * from Win32_Process Where Namse like '%{0}%'".format(cmd_argument)
process = c.ExecQuery(process_query)
for i in process[0].Properties_:
if i.Name == 'ProcessId':
return i.Value
I just try and test your code. It's works, except this :
if proc.name() == process_name:
Must be replace by:
if proc.name == process_name:
With some simplification, but with your code spirit :
import psutil
def pid_find(process_name):
for proc in psutil.process_iter():
try:
if proc.name == process_name:
return proc.pid
except psutil.AccessDenied:
pass
raise Exception("Process %s not found" % process_name)
print pid_find("bash")
I think I found the problem. The function is working fine, but I was not debugging correctly to check if the function works:
This is how I tried:
Function is located in file functions.py file
I was calling the function in the test.py file like this:
import functions
def debug
functions.pid_find("chrome.exe)
if __name__ == "__main__":
debug()
But if I put the function into a variable, I should be able to store the result and use it further:
import functions
def debug(proc):
pid = functions.pid_find(proc)
print pid #shows that pid has the correct value

logging seems to have memory leak for multi-thread usage

I meet one scenario of memory leak in Python, I guess it's related with logging module for multi-thread, but I don't find why.
Version1 (With memory-leak and multi-thread call)
campaign_id_queue = Queue.Queue()
campaign_worker = {} # it has data inside, key is ID, value is Class object
for campaign_id, worker in campaign_worker.iteritems():
campaign_id.queue.put(campaign_id)
thread_list = []
for n in range(THREAD_NUM): # defined already
thread_list.append( Thread(target=parallel_run, args=(campaign_id_queue, now, n, logger)))
for thread in thread_list:
thread.daemon = True
thread.start()
campaign_id_queue.join()
# another file
def parallel_run(campaign_id_queue, now, n, logger):
while True:
try:
campaign_id = campaign_id_queue.get()
except Queue.Empty:
logger.warning('Queue empty')
else:
try:
if worker.open_clients(logger) < 0:
logger.error('error here')
continue
worker.run(now, logger)
except Exception, e:
logger.exception(e)
finally:
campaign_id_queue.task_done()
Version2 (Without memory-leak and single-thread call)
campaign_worker = {} # it has data inside, key is ID, value is Class object
for campaign_id, worker in campaign_worker.iteritems():
if worker.open_clients(logger) < 0:
logger.error('error here')
continue
worker.run(now, logger)
It's related with thread not killed after use, not related with logging module, it's solved now, thanks for attention.

Skipping execution of -with- block

I am defining a context manager class and I would like to be able to skip the block of code without raising an exception if certain conditions are met during instantiation. For example,
class My_Context(object):
def __init__(self,mode=0):
"""
if mode = 0, proceed as normal
if mode = 1, do not execute block
"""
self.mode=mode
def __enter__(self):
if self.mode==1:
print 'Exiting...'
CODE TO EXIT PREMATURELY
def __exit__(self, type, value, traceback):
print 'Exiting...'
with My_Context(mode=1):
print 'Executing block of codes...'
According to PEP-343, a with statement translates from:
with EXPR as VAR:
BLOCK
to:
mgr = (EXPR)
exit = type(mgr).__exit__ # Not calling it yet
value = type(mgr).__enter__(mgr)
exc = True
try:
try:
VAR = value # Only if "as VAR" is present
BLOCK
except:
# The exceptional case is handled here
exc = False
if not exit(mgr, *sys.exc_info()):
raise
# The exception is swallowed if exit() returns true
finally:
# The normal and non-local-goto cases are handled here
if exc:
exit(mgr, None, None, None)
As you can see, there is nothing obvious you can do from the call to the __enter__() method of the context manager that can skip the body ("BLOCK") of the with statement.
People have done Python-implementation-specific things, such as manipulating the call stack inside of the __enter__(), in projects such as withhacks. I recall Alex Martelli posting a very interesting with-hack on stackoverflow a year or two back (don't recall enough of the post off-hand to search and find it).
But the simple answer to your question / problem is that you cannot do what you're asking, skipping the body of the with statement, without resorting to so-called "deep magic" (which is not necessarily portable between python implementations). With deep magic, you might be able to do it, but I recommend only doing such things as an exercise in seeing how it might be done, never in "production code".
If you want an ad-hoc solution that uses the ideas from withhacks (specifically from AnonymousBlocksInPython), this will work:
import sys
import inspect
class My_Context(object):
def __init__(self,mode=0):
"""
if mode = 0, proceed as normal
if mode = 1, do not execute block
"""
self.mode=mode
def __enter__(self):
if self.mode==1:
print 'Met block-skipping criterion ...'
# Do some magic
sys.settrace(lambda *args, **keys: None)
frame = inspect.currentframe(1)
frame.f_trace = self.trace
def trace(self, frame, event, arg):
raise
def __exit__(self, type, value, traceback):
print 'Exiting context ...'
return True
Compare the following:
with My_Context(mode=1):
print 'Executing block of code ...'
with
with My_Context(mode=0):
print 'Executing block of code ... '
A python 3 update to the hack mentioned by other answers from
withhacks (specifically from AnonymousBlocksInPython):
class SkipWithBlock(Exception):
pass
class SkipContextManager:
def __init__(self, skip):
self.skip = skip
def __enter__(self):
if self.skip:
sys.settrace(lambda *args, **keys: None)
frame = sys._getframe(1)
frame.f_trace = self.trace
def trace(self, frame, event, arg):
raise SkipWithBlock()
def __exit__(self, type, value, traceback):
if type is None:
return # No exception
if issubclass(type, SkipWithBlock):
return True # Suppress special SkipWithBlock exception
with SkipContextManager(skip=True):
print('In the with block') # Won't be called
print('Out of the with block')
As mentioned before by joe, this is a hack that should be avoided:
The method trace() is called when a new local scope is entered, i.e. right when the code in your with block begins. When an exception is raised here it gets caught by exit(). That's how this hack works. I should add that this is very much a hack and should not be relied upon. The magical sys.settrace() is not actually a part of the language definition, it just happens to be in CPython. Also, debuggers rely on sys.settrace() to do their job, so using it yourself interferes with that. There are many reasons why you shouldn't use this code. Just FYI.
Based on #Peter's answer, here's a version that uses no string manipulations but should work the same way otherwise:
from contextlib import contextmanager
#contextmanager
def skippable_context(skip):
skip_error = ValueError("Skipping Context Exception")
prev_entered = getattr(skippable_context, "entered", False)
skippable_context.entered = False
def command():
skippable_context.entered = True
if skip:
raise skip_error
try:
yield command
except ValueError as err:
if err != skip_error:
raise
finally:
assert skippable_context.entered, "Need to call returned command at least once."
skippable_context.entered = prev_entered
print("=== Running with skip disabled ===")
with skippable_context(skip=False) as command:
command()
print("Entering this block")
print("... Done")
print("=== Running with skip enabled ===")
with skippable_context(skip=True) as command:
command()
raise NotImplementedError("... But this will never be printed")
print("... Done")
What you're trying to do isn't possible, unfortunately. If __enter__ raises an exception, that exception is raised at the with statement (__exit__ isn't called). If it doesn't raise an exception, then the return value is fed to the block and the block executes.
Closest thing I could think of is a flag checked explicitly by the block:
class Break(Exception):
pass
class MyContext(object):
def __init__(self,mode=0):
"""
if mode = 0, proceed as normal
if mode = 1, do not execute block
"""
self.mode=mode
def __enter__(self):
if self.mode==1:
print 'Exiting...'
return self.mode
def __exit__(self, type, value, traceback):
if type is None:
print 'Normal exit...'
return # no exception
if issubclass(type, Break):
return True # suppress exception
print 'Exception exit...'
with MyContext(mode=1) as skip:
if skip: raise Break()
print 'Executing block of codes...'
This also lets you raise Break() in the middle of a with block to simulate a normal break statement.
Context managers are not the right construct for this. You're asking for the body to be executed n times, in this case zero or one. If you look at the general case, n where n >= 0, you end up with a for loop:
def do_squares(n):
for i in range(n):
yield i ** 2
for x in do_squares(3):
print('square: ', x)
for x in do_squares(0):
print('this does not print')
In your case, which is more special purpose, and doesn't require binding to the loop variable:
def should_execute(mode=0):
if mode == 0:
yield
for _ in should_execute(0):
print('this prints')
for _ in should_execute(1):
print('this does not')
Another slightly hacky option makes use of exec. This is handy because it can be modified to do arbitrary things (e.g. memoization of context-blocks):
from contextlib import contextmanager
#contextmanager
def skippable_context_exec(skip):
SKIP_STRING = 'Skipping Context Exception'
old_value = skippable_context_exec.is_execed if hasattr(skippable_context_exec, 'is_execed') else False
skippable_context_exec.is_execed=False
command = "skippable_context_exec.is_execed=True; "+("raise ValueError('{}')".format(SKIP_STRING) if skip else '')
try:
yield command
except ValueError as err:
if SKIP_STRING not in str(err):
raise
finally:
assert skippable_context_exec.is_execed, "You never called exec in your context block."
skippable_context_exec.is_execed = old_value
print('=== Running with skip disabled ===')
with skippable_context_exec(skip=False) as command:
exec(command)
print('Entering this block')
print('... Done')
print('=== Running with skip enabled ===')
with skippable_context_exec(skip=True) as command:
exec(command)
print('... But this will never be printed')
print('... Done')
Would be nice to have something that gets rid of the exec without weird side effects, so if you can think of a way I'm all ears. The current lead answer to this question appears to do that but has some issues.

Opening multiple (an unspecified number) of files at once and ensuring they are correctly closed

I am aware that I can open multiple files with something like,
with open('a', 'rb') as a, open('b', 'rb') as b:
But I have a situation where I have a list of files to open and am wondering what the preferred method is of doing the same when the number of files is unknown in advance. Something like,
with [ open(f, 'rb') for f in files ] as fs:
(but this fails with an AttributeError since list doesn't implement __exit__)
I don't mind using something like,
try:
fs = [ open(f, 'rb') for f in files ]
....
finally:
for f in fs:
f.close()
But am not sure what will happen if some files throw when trying to open them. Will fs be properly defined, with the files that did manage to open, in the finally block?
No, your code wouldn't initialise fs unless all open() calls completed successfully. This should work though:
fs = []
try:
for f in files:
fs.append(open(f, 'rb'))
....
finally:
for f in fs:
f.close()
Note also that f.close() could fail so you may want to catch and ignore (or otherwise handle) any failures there.
Sure, why not, Here's a recipe that should do it. Create a context manager 'pool' that can enter an arbitrary number of contexts (by calling it's enter() method) and they will be cleaned up at the end of the end of the suite.
class ContextPool(object):
def __init__(self):
self._pool = []
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_tb):
for close in reversed(self._pool):
close(exc_type, exc_value, exc_tb)
def enter(self, context):
close = context.__exit__
result = context.__enter__()
self._pool.append(close)
return result
For example:
>>> class StubContextManager(object):
... def __init__(self, name):
... self.__name = name
... def __repr__(self):
... return "%s(%r)" % (type(self).__name__, self.__name)
...
... def __enter__(self):
... print "called %r.__enter__()" % (self)
...
... def __exit__(self, *args):
... print "called %r.__exit__%r" % (self, args)
...
>>> with ContextPool() as pool:
... pool.enter(StubContextManager("foo"))
... pool.enter(StubContextManager("bar"))
... 1/0
...
called StubContextManager('foo').__enter__()
called StubContextManager('bar').__enter__()
called StubContextManager('bar').__exit__(<type 'exceptions.ZeroDivisionError'>, ZeroDivisionError('integer division or modulo by zero',), <traceback object at 0x02958648>)
called StubContextManager('foo').__exit__(<type 'exceptions.ZeroDivisionError'>, ZeroDivisionError('integer division or modulo by zero',), <traceback object at 0x02958648>)
Traceback (most recent call last):
File "<pyshell#67>", line 4, in <module>
1/0
ZeroDivisionError: integer division or modulo by zero
>>>
Caveats: context managers aren't supposed to raise exceptions in their __exit__() methods, but if they do, this recipe doesn't do the cleanup for all the context managers. Similarly, even if every context manager indicates that an exception should be ignored (by returning True from their exit methods), this will still allow the exception to be raised.
The class ExitStack from the contextlib module provides the functionality you are looking for.
The canonical use-case that is mentioned in the documentation is managing a dynamic number of files.
with ExitStack() as stack:
files = [stack.enter_context(open(fname)) for fname in filenames]
# All opened files will automatically be closed at the end of
# the with statement, even if attempts to open files later
# in the list raise an exception
Errors can occur when attempting to open a file, when attempting to read from a file, and (very rarely) when attempting to close a file.
So a basic error handling structure might look like:
try:
stream = open(path)
try:
data = stream.read()
finally:
stream.close()
except EnvironmentError as exception:
print 'ERROR:', str(exception)
else:
print 'SUCCESS'
# process data
This ensures that close will always be called if the stream variable exists. If stream doesn't exist, then open must have failed, and so there is no file to close (in which case, the except block will be executed immediately).
Do you really need to have the files open in parallel, or can they be processed sequentially? If the latter, then something like the above file-processing code should be put in a function, which is then called for each path in the list.
Thanks for all your answers. Taking inspiration from all of you, I have come up with the following. I think (hope) it works as I intended. I wasn't sure whether to post it as an answer or an addition to the question, but thought an answer was more appropriate as then if it fails to do what I'd asked it can be commented on appropriately.
It can be used for example like this ..
with contextlist( [open, f, 'rb'] for f in files ) as fs:
....
or like this ..
f_lock = threading.Lock()
with contextlist( f_lock, ([open, f, 'rb'] for f in files) ) as (lock, *fs):
....
And here it is,
import inspect
import collections
import traceback
class contextlist:
def __init__(self, *contexts):
self._args = []
for ctx in contexts:
if inspect.isgenerator(ctx):
self._args += ctx
else:
self._args.append(ctx)
def __enter__(self):
if hasattr(self, '_ctx'):
raise RuntimeError("cannot reenter contextlist")
s_ctx = self._ctx = []
try:
for ctx in self._args:
if isinstance(ctx, collections.Sequence):
ctx = ctx[0](*ctx[1:])
s_ctx.append(ctx)
try:
ctx.__enter__()
except Exception:
s_ctx.pop()
raise
return s_ctx
except:
self.__exit__()
raise
def __exit__(self, *exc_info):
if not hasattr(self, '_ctx'):
raise RuntimeError("cannot exit from unentered contextlist")
e = []
for ctx in reversed(self._ctx):
try:
ctx.__exit__()
except Exception:
e.append(traceback.format_exc())
del self._ctx
if not e == []:
raise Exception('\n> '*2+(''.join(e)).replace('\n','\n> '))

Where do I change this Python code snippet to save the temp file in the tmp-folder?

this is a code example implementing a file lock, so the application can only open one instance. It currently works, but saves the the lock file in the Home-folder (Ubuntu). If the application crashes, the lock file does not get removed which is not good....
I can not easily see where I should change the code to save it in the c:/tmp-folder instead?
#!/usr/bin/python
# -*- coding: utf-8 -*-
#implements a lockfile if program already is open
import os
import socket
from fcntl import flock
class flock(object):
'''Class to handle creating and removing (pid) lockfiles'''
# custom exceptions
class FileLockAcquisitionError(Exception): pass
class FileLockReleaseError(Exception): pass
# convenience callables for formatting
addr = lambda self: '%d#%s' % (self.pid, self.host)
fddr = lambda self: '<%s %s>' % (self.path, self.addr())
pddr = lambda self, lock: '<%s %s#%s>' %\
(self.path, lock['pid'], lock['host'])
def __init__(self, path, debug=None):
self.pid = os.getpid()
self.host = socket.gethostname()
self.path = path
self.debug = debug # set this to get status messages
def acquire(self):
'''Acquire a lock, returning self if successful, False otherwise'''
if self.islocked():
if self.debug:
lock = self._readlock()
print 'Previous lock detected: %s' % self.pddr(lock)
return False
try:
fh = open(self.path, 'w')
fh.write(self.addr())
fh.close()
if self.debug:
print 'Acquired lock: %s' % self.fddr()
except:
if os.path.isfile(self.path):
try:
os.unlink(self.path)
except:
pass
raise (self.FileLockAcquisitionError,
'Error acquiring lock: %s' % self.fddr())
return self
def release(self):
'''Release lock, returning self'''
if self.ownlock():
try:
os.unlink(self.path)
if self.debug:
print 'Released lock: %s' % self.fddr()
except:
raise (self.FileLockReleaseError,
'Error releasing lock: %s' % self.fddr())
return self
def _readlock(self):
'''Internal method to read lock info'''
try:
lock = {}
fh = open(self.path)
data = fh.read().rstrip().split('#')
fh.close()
lock['pid'], lock['host'] = data
return lock
except:
return {'pid': 8**10, 'host': ''}
def islocked(self):
'''Check if we already have a lock'''
try:
lock = self._readlock()
os.kill(int(lock['pid']), 0)
return (lock['host'] == self.host)
except:
return False
def ownlock(self):
'''Check if we own the lock'''
lock = self._readlock()
return (self.fddr() == self.pddr(lock))
def __del__(self):
'''Magic method to clean up lock when program exits'''
self.release()
#now testing to see if file is locked = other instance of this program is running already
lock = flock('tmp.lock', True).acquire()
if lock:
print 'doing stuff'
else:
print 'locked!'
exit()
#end of lockfile
Use tempfile
At the end of the script:
lock = flock('tmp.lock', True).acquire()
The 'tmp.lock' is the path to the file in the current directory. Change it to the path you need, i.e. 'c:/tmp-folder/tmp.lock'.
lock = flock('c:/tmp-folder/tmp.lock', True).acquire()
However, as #g19fanatic notes: are you on a Windows ('c:/...') or Linux (Ubuntu) system?

Categories