Live Profiling of Python Server - python

I want to know where the python interpreter spends the most time. I use it on a live django application, but it should work for all long running python processes.
I answer my own question.

import os, re, sys, time, datetime, collections, thread, threading, atexit, traceback
u'''
debug_live.start(seconds_float) starts a monitor thread which print
the stacktrace of all threads into a logfile.
You can report which lines are executed the most with this script:
app_foo_d#server:~$ python djangotools/utils/debug_live.py -h
usage: debug_live.py [-h] [--most-common N] {sum-all-frames,sum-last-frame}
Read stacktrace log
positional arguments:
{sum-all-frames,sum-last-frame}
optional arguments:
-h, --help show this help message and exit
--most-common N Display the N most common lines in the stacktraces
---------------------------------
You can start the watching thread your django middleware like this:
class FOOMiddleware:
def __init__(self):
u'This code gets executed once after the start of the wsgi worker process. Not for every request!'
seconds=getattr(settings, 'debug_live_interval', None)
if seconds:
seconds=float(seconds)
from djangotools.utils import debug_live
debug_live.start(seconds)
# settings.py
debug_live_interval=0.3 # ever 0.3 second
# Inspired by http://code.google.com/p/modwsgi/wiki/DebuggingTechniques
You can get a simple report of the log file of stacktraces like below. The lines
which are not from django are marked with "<====". That's most likely your code
and this could be a bottle neck.
python ..../debug_live.py read
1971 File: "/home/foo_bar_p/django/core/handlers/wsgi.py", line 272, in __call__
response = self.get_response(request)
1812 File: "/home/foo_bar_p/django/core/handlers/base.py", line 111, in get_response
response = callback(request, *callback_args, **callback_kwargs)
1725 File: "/home/foo_bar_p/django/db/backends/postgresql_psycopg2/base.py", line 44, in execute
return self.cursor.execute(query, args)
1724 File: "/home/foo_bar_p/django/db/models/sql/compiler.py", line 735, in execute_sql
cursor.execute(sql, params)
1007 File: "/home/foo_bar_p/django/db/models/sql/compiler.py", line 680, in results_iter
for rows in self.execute_sql(MULTI):
796 File: "/home/foo_bar_p/django/db/models/query.py", line 273, in iterator
for row in compiler.results_iter():
763 File: "/home/foo_bar_p/foo/utils/ticketutils.py", line 135, in __init__ <====
filter=type_filter(root_node=self.root_node)
684 File: "/home/foo_bar_p/django/db/models/query.py", line 334, in count
return self.query.get_count(using=self.db)
679 File: "/home/foo_bar_p/django/db/models/sql/query.py", line 367, in get_aggregation
result = query.get_compiler(using).execute_sql(SINGLE)
677 File: "/home/foo_bar_p/django/db/models/sql/query.py", line 401, in get_count
number = obj.get_aggregation(using=using)[None]
'''
from django.conf import settings
outfile = os.path.expanduser('~/tmp/debug_live.log')
other_code=re.compile(r'/(django|python...)/')
def stacktraces():
code=[]
now=datetime.datetime.now()
pid=os.getpid()
my_thread_id=thread.get_ident()
for thread_id, stack in sys._current_frames().items():
if thread_id==my_thread_id:
continue # Don't print this monitor thread
code.append("\n\n#START date: %s\n# ProcessId: %s\n# ThreadID: %s" % (now, pid, thread_id))
for filename, lineno, name, line in traceback.extract_stack(stack):
code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
if line:
code.append(" %s" % (line.strip()))
code.append('#END')
if not code:
return
fd=open(outfile, 'at')
fd.write('\n'.join(code))
fd.close()
def monitor(interval):
while monitor_thread:
stacktraces()
time.sleep(interval)
monitor_thread=None
def exiting():
global monitor_thread
monitor_thread=None
def start(interval):
global monitor_thread
if monitor_thread:
return
assert not os.path.islink(outfile), outfile # well known temporary name.... symlink attack...
monitor_thread = threading.Thread(target=monitor, args=[interval])
monitor_thread.setDaemon(True)
atexit.register(exiting)
monitor_thread.start()
def read_logs(args):
# The outfile can be huge, don't read the whole file into memory.
counter=collections.Counter()
cur_stack=[]
py_line=''
code_line=''
if args.action=='sum-all-frames':
sum_all_frames=True
else:
sum_all_frames=False
for line in open(outfile):
if line.startswith('#END'):
if sum_all_frames:
frames=cur_stack
else:
frames=cur_stack[-1:]
counter.update(frames)
cur_stack=[]
continue
if line[0] in '\n#':
continue
if line.startswith('File:'):
py_line=line.rstrip()
continue
if line.startswith(' '):
code_line=line.rstrip()
if not (py_line, code_line) in cur_stack:
# If there is a recursion, count the line only once per stacktrace
cur_stack.append((py_line, code_line))
continue
print 'ERROR unparsed', line
for (py, code), c in counter.most_common(args.most_common):
if not other_code.search(py):
py='%s <====' % py
print '% 5d %s\n %s' % (c, py, code)
def main():
import argparse
parser=argparse.ArgumentParser(description='Read stacktrace log')
parser.add_argument('action', choices=['sum-all-frames', 'sum-last-frame'])
parser.add_argument('--most-common', metavar='N', default=30, type=int, help='Display the N most common lines in the stacktraces')
args=parser.parse_args()
return read_logs(args)
if __name__=='__main__':
main()

Related

too many files open error with multiprocessing

I have a code that uses multiprocessing over about 10000 files on a 12 core vcpu on Ubuntu.
def process_file(name):
inp = open(name)
out = open(name.split('.')[0]+'wikiout.txt','a')
for row in inp:
row = row.strip()
sent_text = nltk.sent_tokenize(text)
for sent in sent_text:
# process sentence
inp.close()
out.close()
if __name__ == '__main__':
processes = []
for i in 'ABCDEF':
for j in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
for k in range(100)
filename = os.path.join(os.path.dirname(__file__), (i + j + '/' + 'wiki_' + str(k) + '.txt'))
p = multiprocessing.Process(target=process_file, args=(filename,))
processes.append(p)
p.start()
for process in processes:
process.join()
For some reason I get this issue
File "wikirules.py", line 37, in <module>
p.start()
File "/usr/lib/python3.8/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/usr/lib/python3.8/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/usr/lib/python3.8/multiprocessing/context.py", line 277, in _Popen
return Popen(process_obj)
File "/usr/lib/python3.8/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/usr/lib/python3.8/multiprocessing/popen_fork.py", line 69, in _launch
child_r, parent_w = os.pipe()
OSError: [Errno 24] Too many open files
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
File "wikirules.py", line 13, in process_file
File "/usr/local/lib/python3.8/dist-packages/nltk/tokenize/__init__.py", line 106, in sent_tokenize
File "/usr/local/lib/python3.8/dist-packages/nltk/data.py", line 752, in load
File "/usr/local/lib/python3.8/dist-packages/nltk/data.py", line 877, in _open
File "/usr/local/lib/python3.8/dist-packages/nltk/data.py", line 327, in open
OSError: [Errno 24] Too many open files: '/root/nltk_data/tokenizers/punkt/PY3/english.pickle'
Any clue why this might be happening? Im still new to multiprocessing. So shouldn't this not open more than 12 files at once.
Your code is trying to run
len('ABCDEF') * len('ABCD...Z') * len(range(100)) = 6 * 26 * 100 = 15 600
operating system processes simultaneously.
Actually multiprocessing module contains relatively low level primitives to work with multiprocessing, and for basic tasks standard library suggests more safe and convenient option - module concurrent.futures which contains Pools implementations for threads and processes, and could be very useful especially for "embarrassingly parallel" workloads.
Here is example how the code from your question could be transformed using concurrent.futures and some other python features like generators, context managers and pathlib module.
import concurrent.futures as futures
import itertools
import pathlib
import nltk
BASE_PATH = pathlib.Path(__file__).parent.absolute()
def filename_generator():
"""produce filenames sequence"""
for i, j, k in itertools.product("ABCDEF", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", range(100)):
yield BASE_PATH / f"{i}{j}/wiki_{k}.txt"
def worker(filename: pathlib.Path):
"""do all the job"""
out_filename = filename.with_suffix('.wikiout.txt')
with open(filename) as inp, open(out_filename, "a") as out:
for row in inp:
text = row.strip()
sent_text = nltk.sent_tokenize(text)
for sent in sent_text:
"""process sentence"""
def main():
with futures.ProcessPoolExecutor() as pool:
# mapping future->filename, useful in case of error
task_to_filename = {pool.submit(worker, f): f for f in filename_generator()}
for f in futures.as_completed(task_to_filename):
try:
f.result()
except Exception as e:
filename = task_to_filename[f]
print(f"{filename} processing failed: {e}")
if __name__ == "__main__":
main()

How to create Google Cloud Dataflow Wordcount custom template in Python?

I can't create a custom Google Cloud Dataflow template using the wordcount example following the instructions here: https://cloud.google.com/dataflow/docs/guides/templates/creating-templates
I get an error relating to the RuntimeValueProvider being unaccessible. What am I doing wrong?
My main function wordcount.py:
"""A word-counting workflow."""
from __future__ import absolute_import
import argparse
import logging
import re
from past.builtins import unicode
import apache_beam as beam
from apache_beam.io import ReadFromText
from apache_beam.io import WriteToText
from apache_beam.metrics import Metrics
from apache_beam.metrics.metric import MetricsFilter
from apache_beam.options.pipeline_options import PipelineOptions, GoogleCloudOptions
from apache_beam.options.pipeline_options import SetupOptions
class WordExtractingDoFn(beam.DoFn):
"""Parse each line of input text into words."""
def __init__(self):
self.words_counter = Metrics.counter(self.__class__, 'words')
self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
self.word_lengths_dist = Metrics.distribution(
self.__class__, 'word_len_dist')
self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
def process(self, element):
"""Returns an iterator over the words of this element.
The element is a line of text. If the line is blank, note that, too.
Args:
element: the element being processed
Returns:
The processed element.
"""
text_line = element.strip()
if not text_line:
self.empty_line_counter.inc(1)
words = re.findall(r'[\w\']+', text_line, re.UNICODE)
for w in words:
self.words_counter.inc()
self.word_lengths_counter.inc(len(w))
self.word_lengths_dist.update(len(w))
return words
def run(argv=None):
"""Main entry point; defines and runs the wordcount pipeline."""
class WordcountOptions(PipelineOptions):
#classmethod
def _add_argparse_args(cls, parser):
# Use add_value_provider_argument for arguments to be templatable
# Use add_argument as usual for non-templatable arguments
parser.add_value_provider_argument(
'--input',
default='gs://wordcount_custom_template/input/example.txt',
help='Path of the file to read from')
parser.add_value_provider_argument(
'--output',
required=True,
default='gs//wordcount_custom_template/output/count',
help='Output file to write results to.')
pipeline_options = PipelineOptions(['--output', 'some/output_path'])
pipeline_options.view_as(SetupOptions).save_main_session = True
p = beam.Pipeline(options=pipeline_options)
wordcount_options = pipeline_options.view_as(WordcountOptions)
# Read the text file[pattern] into a PCollection.
lines = p | 'read' >> ReadFromText(wordcount_options.input)
# Count the occurrences of each word.
def count_ones(word_ones):
(word, ones) = word_ones
return (word, sum(ones))
counts = (lines
| 'split' >> (beam.ParDo(WordExtractingDoFn())
.with_output_types(unicode))
| 'pair_with_one' >> beam.Map(lambda x: (x, 1))
| 'group' >> beam.GroupByKey()
| 'count' >> beam.Map(count_ones))
# Format the counts into a PCollection of strings.
def format_result(word_count):
(word, count) = word_count
return '%s: %d' % (word, count)
output = counts | 'format' >> beam.Map(format_result)
# Write the output using a "Write" transform that has side effects.
# pylint: disable=expression-not-assigned
output | 'write' >> WriteToText(wordcount_options.output)
result = p.run()
result.wait_until_finish()
# Do not query metrics when creating a template which doesn't run
if (not hasattr(result, 'has_job') # direct runner
or result.has_job): # not just a template creation
empty_lines_filter = MetricsFilter().with_name('empty_lines')
query_result = result.metrics().query(empty_lines_filter)
if query_result['counters']:
empty_lines_counter = query_result['counters'][0]
logging.info('number of empty lines: %d', empty_lines_counter.result)
word_lengths_filter = MetricsFilter().with_name('word_len_dist')
query_result = result.metrics().query(word_lengths_filter)
if query_result['distributions']:
word_lengths_dist = query_result['distributions'][0]
logging.info('average word length: %d', word_lengths_dist.result.mean)
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
My template creation code:
#!/usr/bin/env bash
python wordcount.py \
--runner DataflowRunner \
--project $PROJECT \
--staging_location gs://wordcount_custom_template/staging \
--temp_location gs://wordcount_custom_template/temp \
--template_location gs://wordcount_custom_template/template/wordcount_template
The error I receive:
raise error.RuntimeValueProviderError('%s not accessible' % obj)
apache_beam.error.RuntimeValueProviderError: RuntimeValueProvider(option: input, type: str, default_value: 'gs://wordcount_custom_template/input/example.txt') not accessible
I don't really understand what this error message means as gs://wordcount_custom_template/input/example.txt is accessible
Full stacktrace:
INFO:root:Missing pipeline option (runner). Executing pipeline using the default runner: DirectRunner.
INFO:root:==================== <function annotate_downstream_side_inputs at 0x108e5fa28> ====================
INFO:root:==================== <function lift_combiners at 0x108e5ff50> ====================
INFO:root:==================== <function expand_gbk at 0x108e5fde8> ====================
INFO:root:==================== <function sink_flattens at 0x108e5fe60> ====================
INFO:root:==================== <function greedily_fuse at 0x108e5f848> ====================
INFO:root:==================== <function sort_stages at 0x108e5faa0> ====================
INFO:root:Running (ref_AppliedPTransform_read/Read_3)+((ref_AppliedPTransform_split_4)+((ref_AppliedPTransform_pair_with_one_5)+(group/Write)))
INFO:root:start <DataOutputOperation group/Write >
INFO:root:start <DoOperation pair_with_one output_tags=['out']>
INFO:root:start <DoOperation split output_tags=['out']>
INFO:root:start <ReadOperation read/Read source=SourceBundle(weight=1.0, source=<apache_beam.io.textio._TextSource object at 0x108cfcd50>, start_position=None, stop_position=None)>
Traceback (most recent call last):
File "wordcount.py", line 121, in <module>
run()
File "wordcount.py", line 100, in run
result = p.run()
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/pipeline.py", line 369, in run
self.to_runner_api(), self.runner, self._options).run(False)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/pipeline.py", line 382, in run
return self.runner.run_pipeline(self)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/direct/direct_runner.py", line 129, in run_pipeline
return runner.run_pipeline(pipeline)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 215, in run_pipeline
return self.run_via_runner_api(pipeline.to_runner_api())
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 218, in run_via_runner_api
return self.run_stages(*self.create_stages(pipeline_proto))
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 837, in run_stages
pcoll_buffers, safe_coders).process_bundle.metrics
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 938, in run_stage
self._progress_frequency).process_bundle(data_input, data_output)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1110, in process_bundle
result_future = self._controller.control_handler.push(process_bundle)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/portability/fn_api_runner.py", line 1003, in push
response = self.worker.do_instruction(request)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 185, in do_instruction
request.instruction_id)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 202, in process_bundle
processor.process_bundle(instruction_id)
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/runners/worker/bundle_processor.py", line 286, in process_bundle
op.start()
File "apache_beam/runners/worker/operations.py", line 227, in apache_beam.runners.worker.operations.ReadOperation.start
File "apache_beam/runners/worker/operations.py", line 228, in apache_beam.runners.worker.operations.ReadOperation.start
File "apache_beam/runners/worker/operations.py", line 229, in apache_beam.runners.worker.operations.ReadOperation.start
File "apache_beam/runners/worker/operations.py", line 231, in apache_beam.runners.worker.operations.ReadOperation.start
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/io/filebasedsource.py", line 197, in get_range_tracker
return self._get_concat_source().get_range_tracker(start_position,
File "/Users/chris/.pyenv/versions/cl2/lib/python2.7/site-packages/apache_beam/options/value_provider.py", line 123, in _f
raise error.RuntimeValueProviderError('%s not accessible' % obj)
apache_beam.error.RuntimeValueProviderError: RuntimeValueProvider(option: input, type: str, default_value: 'gs://wordcount_custom_template/input/example.txt') not accessible
Another thing I don't understand is how can it be that I specify the DataflowRunner yet the DirectRunner is called as shown in the stacktrace?
I successfully generated the pipeline after I modified run(argv) to pick up the args from commandline:
parser = argparse.ArgumentParser()
known_args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args)
So I think the problem is that argv is not passed to your program correctly.
Also I think if you'd like to make output a template arg, please do not mark it as required.

Python 3 script that passes each member of an array to a function

I'm trying to write a small python 3 utility script that checks to see if a file exists on my server.
So I have the code below that has a big array of string values that I pass to a simple function that returns the url and the response code.
However, when I run it I get all these errors I don't even know where to start:
$ python ReturnPath.py
Traceback (most recent call last):
File "ReturnPath.py", line 86, in <module>
checkResponse(u)
File "ReturnPath.py", line 5, in checkResponse
code = urllib.request.urlopen(url).getcode()
File "C:\Program Files\Python37\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python37\lib\urllib\request.py", line 510, in open
req = Request(fullurl, data)
File "C:\Program Files\Python37\lib\urllib\request.py", line 328, in __init__
self.full_url = url
File "C:\Program Files\Python37\lib\urllib\request.py", line 354, in full_url
self._parse()
File "C:\Program Files\Python37\lib\urllib\request.py", line 383, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: '"https://myserver.org/Media/CharacterAvatarImages/ae275ecb-183e-4e8d-8465-9d6d36c1323f.jpg"'
Here is my code:
import urllib.request
def checkResponse(url):
code = urllib.request.urlopen(url).getcode()
print(url + " = " + code)
return
arrCases = []
arrCases.extend([
"https://myserver.org/Media/CharacterAvatarImages/ae275ecb-183e-4e8d-8465-9d6d36c1323f.jpg",
"https://myserver.org/Media/CharacterAvatarImages/3ea92fa3-1ef0-4358-b38d-bb04e653aa53.jpg",
"https://myserver.org/Media/CharacterAvatarImages/7958a0e3-171b-46b5-875e-970368389bdf.jpg",
"https://myserver.org/Media/CharacterAvatarImages/e9a6cb00-6811-4b47-9aac-88480578dd44.jpg",
"https://myserver.org/Media/CharacterAvatarImages/73df88c3-b829-4519-9523-2bbe1f2c8549.jpg",
"https://myserver.org/Media/CharacterAvatarImages/61aa614b-5c95-487c-b4e3-783231b43677.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8be7811f-18dc-4a81-a557-8b81605e3452.jpg",
"https://myserver.org/Media/CharacterAvatarImages/56539acb-2b1b-4410-a4bc-ac2eb0dc00fa.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8bcf93fc-b435-4fd4-9c82-4aba78c58529.jpg",
])
for u in arrCases:
checkResponse(u)
What am I doing wrong?
You have to catch errors from broken URLs. I also increased speed through multiprocessing.Pool.
import urllib.request
from urllib.error import HTTPError, URLError
import multiprocessing
def checkResponse(url):
try:
code = urllib.request.urlopen(url, timeout=1).getcode()
except (HTTPError, URLError) as error:
print(url, " = ", error)
else:
print(url, " = ", code)
return
arrCases = []
arrCases.extend([
"https://i.stack.imgur.com/DsNOB.jpg",
"https://myserver.org/Media/CharacterAvatarImages/ae275ecb-183e-4e8d-8465-9d6d36c1323f.jpg",
"https://myserver.org/Media/CharacterAvatarImages/3ea92fa3-1ef0-4358-b38d-bb04e653aa53.jpg",
"https://myserver.org/Media/CharacterAvatarImages/7958a0e3-171b-46b5-875e-970368389bdf.jpg",
"https://myserver.org/Media/CharacterAvatarImages/e9a6cb00-6811-4b47-9aac-88480578dd44.jpg",
"https://myserver.org/Media/CharacterAvatarImages/73df88c3-b829-4519-9523-2bbe1f2c8549.jpg",
"https://myserver.org/Media/CharacterAvatarImages/61aa614b-5c95-487c-b4e3-783231b43677.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8be7811f-18dc-4a81-a557-8b81605e3452.jpg",
"https://myserver.org/Media/CharacterAvatarImages/56539acb-2b1b-4410-a4bc-ac2eb0dc00fa.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8bcf93fc-b435-4fd4-9c82-4aba78c58529.jpg",
])
with multiprocessing.Pool(processes=4) as pool:
pool.map(checkResponse, arrCases)

Sometime pathos.multiprocessing.Pool can't be terminated correctly

I try to use pathos.multiprocessing.Pool in my project.
However, it will meet the following problem when I terminate the Pool.
I use CentOS 6.5, I'm not sure if it is caused by pathos.multiprocessing.Pool or other thing, can anyone help me on it?
Traceback (most recent call last):
File "/usr/local/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/usr/local/lib/python2.7/threading.py", line 1073, in run
self.function(*self.args, **self.kwargs)
File "receiver.py", line 132, in kill_clients
pool.terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/pool.py", line 465, in terminate
self._terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/util.py", line 207, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/local/lib/python2.7/site-packages/multiprocess/pool.py", line 513, in _terminate_pool
p.terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/process.py", line 137, in terminate
self._popen.terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/forking.py", line 174, in terminate
os.kill(self.pid, signal.SIGTERM)
OSError: [Errno 3] No such process
The wired thing is that at the beginning, it works well. But when the 4th job is received, there will be such problem.
class Receiver:
def __init__(self):
....
self.results={}
def kill_clients(self, client_list, pool):
for client in client_list:
client.kill()
pool.terminate()
def process_result(self, result):
if result is None:
self.results = {}
return
res = result.split(':')
if len(res) != 4:
raise Exception("result with wrong format: %s" % result)
self.results['%s_%s' % (res[0], res[1])] = {"code": res[3], "msg": res[4]}
...
def handler(self, job):
self.lg.debug("Receive job in rtmp_start_handler.")
self.lg.debug("<%s>" % str(job))
# each client corresponding one process
cli_counts = job['count']
pool = Pool(processes=cli_counts)
clients = []
try:
for i in xrange(cli_counts):
rtmp_cli = RtmpClient(job['case'], i)
clients.append(rtmp_cli)
[pool.apply_async(client.run, callback=self.process_result)
for client in clients]
pool.close()
sleep(1)
self.lg.debug("All clients are started.")
t = Timer(
job['timeout'],
self.kill_clients,
args=(clients, pool)
)
t.start()
self.lg.debug("Timer is started. timeout %s s" % job['timeout'])
pool.join()
except Exception, e:
self.lg.warning("Exception occurred: %s" % e)
self.lg.warning(format_exc())
return "0"
# here the self.results shall be ready
return self.parse_results()
The OSError is not caused by the Pool but by my program issue.
When I use Popen to create a subprocess and exec ffmpeg, it will exit immediately(due to other problem), so when I try to kill the subprocess, it it not existed by then. That's why OSError will be raised.

Fabric - Test SSH connection to multiple hosts

I have a Python script that uses fabric library to test SSH connection to multiple hosts. I want to gather all the results in one list:
...
import fabric
from fabric.api import *
results = []
#parallel
def test_connection():
global results
try:
run('ls')
results += "%s: SUCCESS" % env.host
except Exception as e:
results += "%s: FAILURE. Exception: %e" % (env.host, e)
if __name__ == '__main__':
tasks.execute(test_connection)
print results
When I execute the script, I get the following:
Traceback (most recent call last):
File "./test_ssh.py", line 99, in <module>
tasks.execute(test_connection)
File "/Library/Python/2.7/site-packages/fabric/tasks.py", line 387, in execute
multiprocessing
File "/Library/Python/2.7/site-packages/fabric/tasks.py", line 277, in _execute
return task.run(*args, **kwargs)
File "/Library/Python/2.7/site-packages/fabric/tasks.py", line 174, in run
return self.wrapped(*args, **kwargs)
File "./test_ssh.py", line 96, in test_connection
results += "%s: FAILURE. Exception: %e" % (env.host, e)
UnboundLocalError: local variable 'results' referenced before assignment
I think it is because test_connection runs it's own context so it doesn't have access to results.
Is there another way I can gather my results then ?
The trick is that you can actually return results from parallel execution:
#parallel
def test_connection():
try:
run('ls')
return True
except Exception:
return False
Now when you call your task, you will get:
result = execute(test_connection)
results = [ ('HOST %s succeeded' % key) if value else ('HOST %s failed' % key) for key, value in result.items() ]

Categories