How critical is pylint's no-self-use? - python

As an example I have a Django custom management command which periodically (APScheduler + CronTrigger) sends tasks to Dramatiq.
Why the following code with separate functions:
def get_crontab(options):
"""Returns crontab whether from options or settings"""
crontab = options.get("crontab")
if crontab is None:
if not hasattr(settings, "REMOVE_TOO_OLD_CRONTAB"):
raise ImproperlyConfigured("Whether set settings.REMOVE_TOO_OLD_CRONTAB or use --crontab argument")
crontab = settings.REMOVE_TOO_OLD_CRONTAB
return crontab
def add_cron_job(scheduler: BaseScheduler, actor, crontab):
"""Adds cron job which triggers Dramatiq actor"""
module_path = actor.fn.__module__
actor_name = actor.fn.__name__
trigger = CronTrigger.from_crontab(crontab)
job_path = f"{module_path}:{actor_name}.send"
job_name = f"{module_path}.{actor_name}"
scheduler.add_job(job_path, trigger=trigger, name=job_name)
def run_scheduler(scheduler):
"""Runs scheduler in a blocking way"""
def shutdown(signum, frame):
scheduler.shutdown()
signal.signal(signal.SIGINT, shutdown)
signal.signal(signal.SIGTERM, shutdown)
scheduler.start()
class Command(BaseCommand):
help = "Periodically removes too old publications from the RSS feed"
def add_arguments(self, parser: argparse.ArgumentParser):
parser.add_argument("--crontab", type=str)
def handle(self, *args, **options):
scheduler = BlockingScheduler()
add_cron_job(scheduler, tasks.remove_too_old_publications, get_crontab(options))
run_scheduler(scheduler)
is better than a code with methods?
class Command(BaseCommand):
help = "Periodically removes too old publications from the RSS feed"
def add_arguments(self, parser: argparse.ArgumentParser):
parser.add_argument("--crontab", type=str)
def get_crontab(self, options):
"""Returns crontab whether from options or settings"""
crontab = options.get("crontab")
if crontab is None:
if not hasattr(settings, "REMOVE_TOO_OLD_CRONTAB"):
raise ImproperlyConfigured(
"Whether set settings.REMOVE_TOO_OLD_CRONTAB or use --crontab argument"
)
crontab = settings.REMOVE_TOO_OLD_CRONTAB
return crontab
def handle(self, *args, **options):
scheduler = BlockingScheduler()
self.add_cron_job(scheduler, tasks.remove_too_old_publications, self.get_crontab(options))
self.run_scheduler(scheduler)
def add_cron_job(self, scheduler: BaseScheduler, actor, crontab):
"""Adds cron job which triggers Dramatiq actor"""
module_path = actor.fn.__module__
actor_name = actor.fn.__name__
trigger = CronTrigger.from_crontab(crontab)
job_path = f"{module_path}:{actor_name}.send"
job_name = f"{module_path}.{actor_name}"
scheduler.add_job(job_path, trigger=trigger, name=job_name)
def run_scheduler(self, scheduler):
"""Runs scheduler in a blocking way"""
def shutdown(signum, frame):
scheduler.shutdown()
signal.signal(signal.SIGINT, shutdown)
signal.signal(signal.SIGTERM, shutdown)
scheduler.start()
This code is used in a one single place and will not be reused.
StackOverflow requires more details, so:
The second code is the version that I originally wrote. After that, I runned Prospector with Pylint and besides other useful messages I've got pylint: no-self-use / Method could be a function (col 4). To solve this issue I rewrote my code as in the first example. But I still don't understand why it is better this way.

At least, in this case, it is not better. Pylint is notifying you about "self" being unused, just like it would notify you about a variable or an import being unused.
Couple of other options for fixing the pylint-messages would be to actually use "self" in the functions or add staticmethod (or classmethod) decorator. Examples for both are after the horizontal line. Here are the docs for staticmethod and here's the difference between staticmethod and classmethod.
Since this is a Django-command and you likely won't have multiple instances of the class or other classes that inherit Command (that would i.e. overload the functions) or something that would benefit from the functions being inside the class, pick the one you find most readable/easiest to change.
And just for completeness, StackExchange Code Review could have further insight for which is best, if any.
Example that uses self, main difference is that scheduler is created in __init__ and not passed as an argument to the functions that use it:
class Command(BaseCommand):
help = "Periodically removes too old publications from the RSS feed"
def __init__(self):
super().__init__()
self.scheduler = BlockingScheduler()
def add_arguments(self, parser: argparse.ArgumentParser):
parser.add_argument("--crontab", type=str)
def handle(self, *args, **options):
self.add_cron_job(tasks.remove_too_old_publications, self.get_crontab(options))
self.run_scheduler()
# ...
def run_scheduler(self):
"""Runs scheduler in a blocking way"""
def shutdown(signum, frame):
self.scheduler.shutdown()
signal.signal(signal.SIGINT, shutdown)
signal.signal(signal.SIGTERM, shutdown)
self.scheduler.start()
Example that uses staticmethod, where the only difference is the staticmethod-decorator and the functions with the decorator don't have self-argument:
class Command(BaseCommand):
help = "Periodically removes too old publications from the RSS feed"
def add_arguments(self, parser: argparse.ArgumentParser):
parser.add_argument("--crontab", type=str)
def handle(self, *args, **options):
scheduler = BlockingScheduler()
self.add_cron_job(scheduler, tasks.remove_too_old_publications, self.get_crontab(options))
self.run_scheduler(scheduler)
# ...
#staticmethod
def run_scheduler(scheduler):
"""Runs scheduler in a blocking way"""
def shutdown(signum, frame):
scheduler.shutdown()
signal.signal(signal.SIGINT, shutdown)
signal.signal(signal.SIGTERM, shutdown)
scheduler.start()

Related

How to make an AutoReloader in python

I'm writing a class named reloader that receives a main_function (or any callable) and a FileWatcher instance.
FileWatcher is just a class that watches files and invokes a callback function if any of the files are modified.
When the run() method of an instance of the Reloader is invoked it starts a Thread whose target is main_function and it also runs the FileWatcher.
What I want to achive is that whenever any of the files were modified I could reload the main_function.
(this is similar to Django's autoreloader)
here's the realoder class:
class Reloader:
def __init__(self, watcher: FileWatcher, func: callable, *args, **kwargs) -> None:
self.func = func
self.args = args
self.kwargs = kwargs
self.watcher = watcher
self.watcher.set_callback(self._callback)
self.thread = Thread(target=self._main)
self.thread.daemon = True
def _callback(self, file):
print(f'{file} was modified.')
self.thread.join()
self.thread = Thread(target=self._main)
self.thread.daemon = True
self.thread.start()
def _main(self):
self.func(*self.args, **self.kwargs)
def run(self):
self.thread.start()
self.watcher.run()
The whole thing works except that it doesn't reload the main function. It just reruns it.
If I change the file which contains the main_function, the changes are not seen!

call function x number of times and execute it from another script

I am running 2 python scripts, say main.py and test.py
In main.py i am executing get_details function "x" number of times every 30 seconds.
NOTE: I want to execute funA,funcB funC in sequence. The issue i am facing here is - when i run test.py, it first runs funcC(), even though i am calling funcA() first.
test.py
def funcA():
#do something
funcB()
def funcB():
#do something
funcC()
def funcC():
#here i want to execute script main.py
#My attempt 1 :
import subprocess
import sys
theproc = subprocess.Popen([sys.executable, "main.py"])
theproc.communicate()
#------OR-----------
#My attempt 2:
execfile("main.py")
main.py
import threading
def get_details(a,b,c):
#do something ...
class RepeatEvery(threading.Thread):
def __init__(self, interval, func, *args, **kwargs):
threading.Thread.__init__(self)
self.interval = interval # seconds between calls
self.func = func # function to call
self.args = args # optional positional argument(s) for call
self.kwargs = kwargs # optional keyword argument(s) for call
self.runable = True
def run(self):
while self.runable:
self.func(*self.args, **self.kwargs)
time.sleep(self.interval)
def stop(self):
self.runable = False
thread = RepeatEvery(30, get_details,"arg1","arg2","arg3")
print "starting"
thread.start()
thread.join(21) # allow thread to execute a while...
I want to execute script main.py only after all functions (funcA,funcB) executed properly. But in my case, main.py executed first and then control goes back to test.py and it executes funcA() and funcB().
What am i missing here ?
Okay. I rewrote your code so it would work as you said it should.
main.py...
#Good design for small classes: keep global functions separate for people who want
#to explore the type, but not everything that comes along with it.
#I moved the the global functions and code execution from top and bottom to test.py
import threading
import time #You forgot to import time.
class RepeatEvery(threading.Thread):
def __init__(self, interval, func, *args, **kwargs):
threading.Thread.__init__(self)
self.interval = interval # seconds between calls
self.func = func # function to call
self.args = args # optional positional argument(s) for call
self.kwargs = kwargs # optional keyword argument(s) for call
self.runable = True
def run(self):
while self.runable:
self.func(*self.args, **self.kwargs)
time.sleep(self.interval)
def stop(self):
self.runable = False
""" We couuuld have done this, but why bother? It is hard to work with.
def get_details(self,a,b,c):
#do something else as a function of the class...
"""
test.py...
import main #File where class lives.
def funcA():
#do something
print ("In A...") #Helps us observe scope.
funcB()
def funcB():
#do something
print("In B...") #scope
funcC()
def funcC():
#here i want to execute script main.py
#My attempt 1 :
print("In C...") #scope
main() #Reached C, lets run main now...
#This is one way to do allow a function to be accessible to your class.
def get_details(a,b,c):
#do something else as a function of ¬[class] test.py operating on
#a RepeatEvery object...
pass
def main(): #Function main is separate from class main. It houses our opening code.
thread = main.RepeatEvery(30, get_details,"arg1","arg2","arg3")
print ("starting")
thread.start()
thread.join(21) # allow thread to execute a while...
funcA()

How-To: Python TimedRotatingFileHandle for multiple process-instances and files?

I just got thrown into the deep end with my new contract. The current system uses the python logging module to do timed log-file rotation. The problem is that the log-file of the process running as a daemon gets rotated correctly, while the other log-file of the process instances that get created and destroyed when done does not rotate. Ever. I have now got to find a solution to this problem. After 2 days of research on the internet and python documentation I'm only halfway out of the dark. Since I'm new to the logging module I can't see the answer to the problem since I'm probably looking with my eyes closed!
The process is started with:
python /admin/bin/fmlog.py -l 10 -f /tmp/fmlog/fmapp_log.log -d
where:
-l 10 => DEBUG logging-level
-f ... => Filename to log to for app-instance
-d => run as daemon
The following shows a heavily edited version of my code:
#!/usr/bin python
from comp.app import app, yamlapp
...
from comp.utils.log4_new import *
# Exceptions handling class
class fmlogException(compException): pass
class fmlog(app):
# Fmlog application class
def __init__(self, key, config, **kwargs):
# Initialise the required variables
app.__init__(self, key, config, **kwargs)
self._data = {'sid': self._id}
...
def process(self, tid=None):
if tid is not None:
self.logd("Using thread '%d'." % (tid), data=self._data)
# Run the fmlog process
self.logi("Processing this '%s'" % (filename), data=self._data)
...
def __doDone__(self, success='Failure', msg='', exception=None):
...
self.logd("Process done!")
if __name__ == '__main__':
def main():
with yamlapp(filename=config, cls=fmlog, configcls=fmlogcfg, sections=sections, loglevel=loglevel, \
logfile=logfile, excludekey='_dontrun', sortkey='_priority', usethreads=threads, maxthreads=max, \
daemon=daemon, sleep=sleep) as a:
a.run()
main()
The yamlapp process (sub-class of app) is instantiated and runs as a daemon until manually stopped. This process will only create 1 or more instance(s) of the fmlog class and call the process() function when needed (certain conditions met). Up to x instances can be created per thread if the yamlapp process is run in thread-mode.
The app process code:
#!/usr/bin/env python
...
from comp.utils.log4_new import *
class app(comp.base.comp, logconfig, log):
def __init__(self, cls, **kwargs):
self.__setdefault__('_configcls', configitem)
self.__setdefault__('_daemon', True)
self.__setdefault__('_maxthreads', 5)
self.__setdefault__('_usethreads', False)
...
comp.base.comp.__init__(self, **kwargs)
logconfig.__init__(self, prog(), **getlogkwargs(**kwargs))
log.__init__(self, logid=prog())
def __enter__(self):
self.logi(msg="Starting application '%s:%s' '%d'..." % (self._cls.__name__, \
self.__class__.__name__, os.getpid()))
return self
def ...
def run(self):
...
if self._usethreads:
...
while True:
self.logd(msg="Start of run iteration...")
if not self._usethreads:
while not self._q.empty():
item = self._q.get()
try:
item.process()
self.logd(msg="End of run iteration...")
time.sleep(self._sleep)
The logging config and setup is done via the log4_new.py classes:
#!/usr/bin/env python
import logging
import logging.handlers
import re
class logconfig(comp):
def __init__(self, logid, **kwargs):
comp.__init__(self, **kwargs)
self.__setdefault__('_logcount', 20)
self.__setdefault__('_logdtformat', None)
self.__setdefault__('_loglevel', DEBUG)
self.__setdefault__('_logfile', None)
self.__setdefault__('_logformat', '[%(asctime)-15s][%(levelname)5s] %(message)s')
self.__setdefault__('_loginterval', 'S')
self.__setdefault__('_logintervalnum', 30)
self.__setdefault__('_logsuffix', '%Y%m%d%H%M%S')
self._logid = logid
self.__loginit__()
def __loginit__(self):
format = logging.Formatter(self._logformat, self._logdtformat)
if self._logfile:
hnd = logging.handlers.TimedRotatingFileHandler(self._logfile, when=self._loginterval, interval=self._logintervalnum, backupCount=self._logcount)
hnd.suffix = self._logsuffix
hnd.extMatch = re.compile(strftoregex(self._logsuffix))
else:
hnd = logging.StreamHandler()
hnd.setFormatter(format)
l = logging.getLogger(self._logid)
for h in l.handlers:
l.removeHandler(h)
l.setLevel(self._loglevel)
l.addHandler(hnd)
class log():
def __init__(self, logid):
self._logid = logid
def __log__(self, msg, level=DEBUG, data=None):
l = logging.getLogger(self._logid)
l.log(level, msg, extra=data)
def logd(self, msg, **kwargs):
self.__log__(level=DEBUG, msg=msg, **kwargs)
def ...
def logf(self, msg, **kwargs):
self.__log__(level=FATAL, msg=msg, **kwargs)
def getlogkwargs(**kwargs):
logdict = {}
for key, value in kwargs.iteritems():
if key.startswith('log'): logdict[key] = value
return logdict
Logging is done as expected: logs from yamlapp (sub-class of app) is written to fmapp_log.log, and logs from fmlog is written to fmlog.log.
The problem is that fmapp_log.log is rotated as expected, but fmlog.log is never rotated. How do I solve this? I know the process must run continuously for the rotation to happen, that is why only one logger is used. I suspect another handle must be created for the fmlog process which must never be destroyed when the process exits.
Requirements:
The app (framework or main) log and the fmlog (process) log must be to different files.
Both log-files must be time-rotated.
Hopefully someone will understand the above and be able to give me a couple of pointers.

Thread start hook in Python

Is there a way to run an arbitrary method whenever a new thread is started in Python (2.7)? My goal is to use setproctitle to set an appropriate title for each spawned thread.
Just inherit from threading.Thread and use this class instead of Thread - as long as you have control over the Threads.
import threading
class MyThread(threading.Thread):
def __init__(self, callable, *args, **kwargs):
super(MyThread, self).__init__(*args, **kwargs)
self._call_on_start = callable
def start(self):
self._call_on_start()
super(MyThread, self).start()
Just as a coarse sketch.
Edit
From the comments the need arose to kind of "inject" the new behavior into an existing application. Let's assume you have a script that itself imports other libraries. These libraries use the threading module:
Before importing any other modules, first execute this;
import threading
import time
class MyThread(threading.Thread):
_call_on_start = None
def __init__(self, callable_ = None, *args, **kwargs):
super(MyThread, self).__init__(*args, **kwargs)
if callable_ is not None:
self._call_on_start = callable_
def start(self):
if self._call_on_start is not None:
self._call_on_start
super(MyThread, self).start()
def set_thread_title():
print "Set thread title"
MyThread._call_on_start = set_thread_title()
threading.Thread = MyThread
def calculate_something():
time.sleep(5)
print sum(range(1000))
t = threading.Thread(target = calculate_something)
t.start()
time.sleep(2)
t.join()
As subsequent imports only do a lookup in sys.modules, all other libraries using this should be using our new class now. I regard this as a hack, and it might have strange side effects. But at least it's worth a try.
Please note: threading.Thread is not the only way to implement concurrency in python, there are other options like multiprocessing etc.. These will be unaffected here.
Edit 2
I just took a look at the library you cited and it's all about processes, not Threads! So, just do a :%s/threading/multiprocessing/g and :%s/Thread/Process/g and things should be fine.
Use threading.setprofile. You give it your callback and Python will invoke it every time a new thread starts.
Documentation here.

celery task and customize decorator

I'm working on a project using django and celery(django-celery). Our team decided to wrap all data access code within (app-name)/manager.py(NOT wrap into Managers like the django way), and let code in (app-name)/task.py only dealing with assemble and perform tasks with celery(so we don't have django ORM dependency in this layer).
In my manager.py, I have something like this:
def get_tag(tag_name):
ctype = ContentType.objects.get_for_model(Photo)
try:
tag = Tag.objects.get(name=tag_name)
except ObjectDoesNotExist:
return Tag.objects.none()
return tag
def get_tagged_photos(tag):
ctype = ContentType.objects.get_for_model(Photo)
return TaggedItem.objects.filter(content_type__pk=ctype.pk, tag__pk=tag.pk)
def get_tagged_photos_count(tag):
return get_tagged_photos(tag).count()
In my task.py, I like to wrap them into tasks (then maybe use these tasks to do more complicated tasks), so I write this decorator:
import manager #the module within same app containing data access functions
class mfunc_to_task(object):
def __init__(mfunc_type='get'):
self.mfunc_type = mfunc_type
def __call__(self, f):
def wrapper_f(*args, **kwargs):
callback = kwargs.pop('callback', None)
mfunc = getattr(manager, f.__name__)
result = mfunc(*args, **kwargs)
if callback:
if self.mfunc_type == 'get':
subtask(callback).delay(result)
elif self.mfunc_type == 'get_or_create':
subtask(callback).delay(result[0])
else:
subtask(callback).delay()
return result
return wrapper_f
then (still in task.py):
##task
#mfunc_to_task()
def get_tag():
pass
##task
#mfunc_to_task()
def get_tagged_photos():
pass
##task
#mfunc_to_task()
def get_tagged_photos_count():
pass
Things work fine without #task.
But, after applying that #task decorator(to the top as celery documentation instructed), things just start to fall apart. Apparently, every time the mfunc_to_task.__call__ gets called, the same task.get_tag function gets passed as f. So I ended up with the same wrapper_f every time, and now the only thing I cat do is to get a single tag.
I'm new to decorators. Any one can help me understand what went wrong here, or point out other ways to achieve the task? I really hate to write the same task wrap code for every of my data access functions.
Not quite sure why passing arguments won't work?
if you use this example:
#task()
def add(x, y):
return x + y
lets add some logging to the MyCoolTask:
from celery import task
from celery.registry import tasks
import logging
import celery
logger = logging.getLogger(__name__)
class MyCoolTask(celery.Task):
def __call__(self, *args, **kwargs):
"""In celery task this function call the run method, here you can
set some environment variable before the run of the task"""
logger.info("Starting to run")
return self.run(*args, **kwargs)
def after_return(self, status, retval, task_id, args, kwargs, einfo):
#exit point of the task whatever is the state
logger.info("Ending run")
pass
and create an extended class (extending MyCoolTask, but now with arguments):
class AddTask(MyCoolTask):
def run(self,x,y):
if x and y:
result=add(x,y)
logger.info('result = %d' % result)
return result
else:
logger.error('No x or y in arguments')
tasks.register(AddTask)
and make sure you pass the kwargs as json data:
{"x":8,"y":9}
I get the result:
[2013-03-05 17:30:25,853: INFO/MainProcess] Starting to run
[2013-03-05 17:30:25,855: INFO/MainProcess] result = 17
[2013-03-05 17:30:26,739: INFO/MainProcess] Ending run
[2013-03-05 17:30:26,741: INFO/MainProcess] Task iamscheduler.tasks.AddTask[6a62641d-16a6-44b6-a1cf-7d4bdc8ea9e0] succeeded in 0.888684988022s: 17
Instead of use decorator why you don't create a base class that extend celery.Task ?
In this way all your tasks can extend your customized task class, where you can implement your personal behavior by using methods __call__ and after_return
.
You can also define common methods and object for all your task.
class MyCoolTask(celery.Task):
def __call__(self, *args, **kwargs):
"""In celery task this function call the run method, here you can
set some environment variable before the run of the task"""
return self.run(*args, **kwargs)
def after_return(self, status, retval, task_id, args, kwargs, einfo):
#exit point of the task whatever is the state
pass

Categories