What I require is a simple hack for running function synchronously if celery is not active.
What I tried is:
is_celery_working returns False although celery and Redis both are running (ran celery -A project worker -l debug and redis-server respectively). Also get_celery_worker_status is always giving error in status.
I am using celery with Django.
from project.celery import app
def is_celery_working():
result = app.control.broadcast('ping', reply=True, limit=1)
return bool(result) # True if at least one result
def sync_async(func):
if is_celery_working():
return func.delay
else:
return func
sync_async(some_func)(**its_args, **its_kwrgs)
def get_celery_worker_status():
error_key = 'error'
try:
from celery.task.control import inspect
insp = inspect()
d = insp.stats()
if not d:
d = {error_key: 'No running Celery workers were found.'}
except IOError as e:
from errno import errorcode
msg = "Error connecting to the backend: " + str(e)
if len(e.args) > 0 and errorcode.get(e.args[0]) == 'ECONNREFUSED':
msg += ' Check that the RabbitMQ server is running.'
d = {error_key: msg}
except ImportError as e:
d = {error_key: str(e)}
return d
def sync_async(func):
status = get_celery_worker_status()
if 'error' not in status:
return func.delay
else:
return func
sync_async(some_func)(**its_args, **its_kwrgs)
Your simple is_celery_working function looks correct. If you're getting False, you may want to increase your timeout to 5 or 10 seconds using the optional timeout parameter.
def is_celery_working():
result = app.control.broadcast('ping', reply=True, limit=1, timeout=5.0)
return bool(result) # True if at least one result
def sync_async(func, *args, **kwargs):
try:
func.delay(*args, **kwargs)
except Exception as error:
print('Celery not active', error)
func(*args, **kwargs)
This just gives an error if the Redis server is not working. This worked fine for me as I am assuming that if Redis is not working then celery is stopped.
Related
We are using:
platform linux -- Python 3.9.5, pytest-6.2.5, py-1.10.0, pluggy-0.13.1
plugins: forked-1.4.0, xdist-2.5.0, pytest_check-1.0.4, teamcity-messages-1.29, anyio-3.3.4, testrail-2.9.1, dependency-0.5.1
When trying to execute pytest using xdist on remote windows host by loadfile get hanging test run.
The command:
python3 -m pytest -vv --dist=loadfile --tx ssh=admin#test-host-ip --rsyncdir /tmp/autotests_rsync C:\\users\\admin\\pyexecnetcache\\autotests_rsync\\autotests\\testsuite\\positive
The hanging appears in test, which using waiter to get value from postgresql db via SQLAlchemy ORM.
We're passing value from test suite to the following test:
start_time = Waiter.wait_new(lambda: DbTestData.get_session_records_column_by_record_id(
DbTestData.start_time, record_id)[0][0],
check_func=CheckFunctions.check_none,
error_message=f"Error")
assert start_time is not None, f"Record start_time in db = {start_time}, expected not None"
def query(*args):
session = SessionHolder.get_session()
result = session.query(*args)
session.commit()
return result
which using this waiter:
#staticmethod
def wait_new(func: Callable, check_func: Callable = CheckFunctions.check_empty, timeout_value: int = 20,
timeout_interval: int = 1, error_message: str = ""):
print(f"Func = {func}")
value = waiter_exception
exc_raise_if_fail = TestWaiterException()
timeout = 0
in_while = True
Logger.utils_logger.debug(f"timeout_value = {timeout_value}, timeout_interval = {timeout_interval})")
while in_while:
print(f"in_while loop")
try:
print(f"Trying execute func")
value = func()
except Exception as ex:
print(f"Exception")
if timeout == timeout_value:
exc_raise_if_fail.with_traceback(sys.exc_info()[2])
exc_raise_if_fail.txt += ": " + ex.args[0]
in_while = False
value = waiter_exception
Logger.utils_logger.debug(f"Exception", exc_info=True)
finally:
print(f"Finally")
Logger.utils_logger.debug(f"Current value: {value}")
if (timeout > timeout_value) or (value != waiter_exception and not check_func(value)):
print(f"Break")
break
else:
print(f"Else")
timeout += timeout_interval
time.sleep(timeout_interval)
if value == waiter_exception:
Logger.utils_logger.critical(f"{exc_raise_if_fail.txt}, {error_message}")
raise exc_raise_if_fail
return value
It just hangs permanently while executing waiter only when we using xdist plugin.
And we add close_all_sessions for SQL queries, but there are no results.
I'm using Redis sentinel with three nodes. One is the master and the other two are slaves.
I have gone through this, but it expect that the error comes less frequently and can be re-tried, but my approach might be wrong here.
Here I am handling the reconfiguration of the nodes.
import redis
# Initialize on system boot
slave_node_1: redis.Redis = None
slave_node_2: redis.Redis = None
master_node: redis.Redis = None
# Handling the reconfiguration of the nodes.
def reconfig_redis_nodes():
Sentinel = redis.Sentinel([
(REDIS_HOST_0, SENTINEL_PORT),
(REDIS_HOST_1, SENTINEL_PORT),
(REDIS_HOST_2, SENTINEL_PORT)
], sentinel_kwargs={'password': REDIS_SENTINEL_PASSWORD})
host, port = Sentinel.discover_master(REDIS_MASTER_NAME)
globals()['master_node'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
slave_nodes = Sentinel.discover_slaves(REDIS_MASTER_NAME)
try:
host, port = slave_nodes[0]
globals()['slave_node_1'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
except IndexError as e:
pass
try:
host, port = slave_nodes[1]
globals()['slave_node_2'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
except IndexError as e:
pass
# Decorator to handle config change
def handle_redis_failover_master_switch(func):
def inner(*args, **kwargs):
retries = 0
max_retry = 5
while True:
try:
return func(*args, **kwargs)
except Exception as e:
reconfig_redis_nodes()
retries += 1
if retries > max_retry:
logger.critical(str(e))
raise Exception(e)
return inner
# And this is the Redis method I am using to set lock.
#handle_redis_failover_master_switch
def setnx(key: str, value: str, ttl_secs: int = 10):
return master_node.set(key, value, nx=True, ex=ttl_secs)
When I manually called these functions from the shell, they worked fine. But once the deployed (10 requests per second) Redis is throwing Redis is loading the dataset in memory
What is the cause of the issue here and how can I handle it gracefully?
Is it a bad idea for using sentinel for locking system?
I have an electron app frontend (zerorpc-node) communicating with a python backend (zerorpc-python) that needs to:
a) be able to send requests to the backend [standard zerorpc call]
b) be able to run multiple backend processes simultaneously [followed the architecture in https://github.com/0rpc/zerorpc-node/issues/96)
c) be able to cancel a backend process at will [not sure how to do this with current architecture]
Any guidance on how to architecture a solution to (c) would be great. If necessary I am willing to switch away from zerorpc if it is limiting, but if the solution involves using zerorpc that's fantastic.
I ended up using gipc to spin up processes. The cancellation mechanism relies on the fact that when a gipc process is terminated, the pipe closes. The entire API is complicated, this is what I ended up with:
class ZerorpcService():
def __init__(self):
self.participant_id = None
self.extraction_methods = []
# maps pid to (process, pipe writer)
self.processes = {}
self. = lock.Semaphore()
def _launch_process(self, function, kwargs):
"""
Launches a new process
"""
try:
# add required arguments
pid = kwargs["pid"]
# start independent gipc process, communicated via pipe
started = False
with gipc.pipe() as (r, w):
with self.mutex:
if pid in self.processes:
return_value = {'status': 1, 'error': 'pid already exists', "report": True}
return
proc = gipc.start_process(self._process_wrapper, args=(function, kwargs, w))
self.processes[pid] = proc
started = True
# wait for process to send something over pipe
return_value = r.get()
except EOFError as eof:
# happens when we terminate a process because the pipe closes
return_value = {'status': 1, 'error': "pid {} terminated".format(pid), "report": False}
except Exception as error:
logging.exception(error)
return_value = {'status': 1, 'error': str(error), 'traceback': traceback.format_exc(), "report": True}
finally:
# deletes the pid from the map
with self.mutex:
if started:
del self.processes[pid]
return return_value
#staticmethod
def _process_wrapper(function, kwargs, pipe):
"""
Executes f with kwargs and formats the result into a dict.
Wraps it in error handling.
Routes the return value through the pipe provided.
"""
return_val = {'status': 0}
try:
raw_val = function(**kwargs)
if raw_val is not None:
return_val = raw_val
except Exception as error:
logging.exception(error)
return_val = {'status': 1, 'error': str(error), 'traceback': traceback.format_exc(), "report": True}
finally:
pipe.put(return_val)
def cancel_process(self, pid):
if pid in self.processes:
with self.mutex:
process = self.processes[pid]
if process.is_alive():
process.terminate()
return {'status': 0}
else:
return {'status': 1, 'error': 'pid {} not found'.format(pid), "traceback": traceback.format_exc(),
"report": True}
So, I have a Celery system set up, where I dynamically create a cloud VM instance for each task, once the task completes the VM instance will delete itself. To accomplish this I am creating a new queue and assigning the worker on the newly created instance to that queue so that tasks can be sent to specific instances. This works with 1 or 2 simultaneous tasks, but if I try more than that, then celery's result.get method just waits indefinitely. I am using Celery version 4.2.1 (windowlicker).
Here is my Celery config.py file:
"""A module that configures Celery"""
from os import environ
from utils.loggerFactory import make_logger
LOGGER = make_logger(__name__)
LOGGER.info('Celery initalizing...')
REDIS_BACKEND_HOST = None
if 'RedisDNS' in environ:
REDIS_BACKEND_HOST = environ['RedisDNS']
LOGGER.info('Set Redis instance hostname to {}'.format(REDIS_BACKEND_HOST))
else:
LOGGER.warning('Couldn\'t fetch RedisDNS, defaulting to localhost...')
REDIS_BACKEND_HOST = 'localhost'
BROKER_URL = 'redis://{}'.format(REDIS_BACKEND_HOST)
CELERY_RESULT_BACKEND = 'redis://{}'.format(REDIS_BACKEND_HOST)
CELERY_TRACK_STARTED = True
CELERY_TASK_CREATE_MISSING_QUEUES = True
CELERY_TASK_IGNORE_RESULT = False
LOGGER.info('Init complete')
Here is the main code for executing tasks:
if ENV != 'development':
# Create a new compute instance
try:
created_instance_name = create_worker_compute_instance(
task_info['computeInstanceType'])
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t create compute instance: {}'.format(request_id, str(exc)))
try:
LOGGER.info(
'[{}] Saving exception into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response(
'Error: Couldn\'t create compute instance: {}'.format(str(exc)), None, 500)
result['code'] = 500
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save exception into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
report_exception(ENV, exc)
return
celery_queue_name = 'queue-{}'.format(created_instance_name)
LOGGER.info('[{}] Adding new Celery queue {}'.format(
request_id, celery_queue_name))
try:
APP.control.add_consumer(celery_queue_name, reply=False, destination=[
'worker1#{}'.format(created_instance_name)])
except Exception as exc:
LOGGER.error('[{}] Couldn\'t add queue {}: {}'.format(
request_id, celery_queue_name, str(exc)))
try:
LOGGER.info('[{}] Saving exception into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response(
'Error: Couldn\'t add queue {}: {}'.format(celery_queue_name, str(exc)), None, 500)
result['code'] = 500
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save exception into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
report_exception(ENV, exc)
return
LOGGER.info('[{}] Queue added'.format(request_id))
else:
celery_queue_name = 'celery'
# Execute the task
LOGGER.info('[{}] Executing task...'.format(request_id))
async_result = run_task.apply_async(
args=(data, task_info, SERVICE_ACCOUNT_FILE_DATA), queue=celery_queue_name)
LOGGER.info('[{}] Waiting for task to complete...'.format(request_id))
task_result = None
try:
task_result = async_result.get()
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t execute task {}: {}'.format(request_id, task, str(exc)))
try:
LOGGER.info('[{}] Saving exception into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response('Error: Couldn\'t execute task {}: {}'.format(
task, str(exc)), None, 500)
result['code'] = 500
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save exception into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
report_exception(ENV, exc)
return
LOGGER.info('[{}] Task executed successfully'.format(request_id))
task_result['message'] = 'Ok, task {} executed successfully'.format(
task)
try:
LOGGER.info('[{}] Saving result into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response(
None, task_result, 0)
result['code'] = 200
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save result into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
return
Edit:
Here is small diagram for a broad overview of the system:
Ok, it seems that the issue is with APP.control.add_consumer(celery_queue_name, reply=False, destination=['worker1#{}'.format(created_instance_name)]). Even though that command returns successfully, the worker still hasn't been added to the queue.
I managed to fix the issue by including the queue name in the worker startup command with the -Q parameter.
I am trying to write some simple loops to control objects in Pygazebo, but alas it only ever calls the method once and then the loops appears to block.
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 2 12:52:50 2015
#author: skylion
"""
import trollius #NOTE: Trollius requires protobuffer from Google
from trollius import From
import pygazebo
import pygazebo.msg.joint_cmd_pb2
import time
def apply_joint_force(world_name, robot_name, joint_name, force, duration=-1):
#trollius.coroutine
def joint_force_loop():
manager = yield From(pygazebo.connect())
print("connected")
publisher = yield From(
manager.advertise('/gazebo/' + world_name + '/' + robot_name + '/joint_cmd',
'gazebo.msgs.JointCmd'))
message = pygazebo.msg.joint_cmd_pb2.JointCmd()
message.name = robot_name + '::' + joint_name #format should be: name_of_robot + '::name_of_joint'
message.force = force
#t_end = time.time() + duration # The time that you want the controller to stop
while True: #time.time() < t_end or duration == -1:
try:
yield From(publisher.publish(message))
yield From(trollius.sleep(1.0))
except:
pass
#Nothing
print("Connection closed")
wait_net_service('localhost',11345)
loop = trollius.new_event_loop()
loop.run_until_complete(joint_force_loop())
raise
def wait_net_service(server, port, timeout=None):
""" Wait for network service to appear
#param timeout: in seconds, if None or 0 wait forever
#return: True of False, if timeout is None may return only True or
throw unhandled network exception
"""
import socket
import errno
s = socket.socket()
if timeout:
from time import time as now
# time module is needed to calc timeout shared between two exceptions
end = now() + timeout
while True:
try:
if timeout:
next_timeout = end - now()
if next_timeout < 0:
return False
else:
s.settimeout(next_timeout)
s.connect((server, port))
time.sleep(1)
except socket.timeout, err:
# this exception occurs only if timeout is set
if timeout:
return False
except socket.error, err:
# catch timeout exception from underlying network library
# this one is different from socket.timeout
if type(err.args) != tuple or (err[0] != errno.ETIMEDOUT and err[0] != errno.ECONNREFUSED):
raise err
else:
s.close()
return True
I thought #coroutines were suppose to be wrapped asynchronously? Do I just misunderstand the use this code? Or am I doing something else wrong? This is my first time with concurrency in Python btw.
Also this is how I am calling that function:
counter = 0
for joint_def in self.all_joint_props:
print("each joint_def")
apply_joint_force(world_name, robot_name, "hingejoint" + str(counter), joint_def[2])
#print("Appliing joint force")
Any idea why it keep blocking the thread? Should I be using a different method to this? Any help would be appreciated
So, the answer is quite simple really. You have to queue up the multiple Trollius.Tasks you want to run as a list before starting the object and combine that with Trollius.wait() to achieve this. To ensure the thread is non-blocking you then use the following method
Here is my code so far:
tasks = []
for joint_name in joint_names:
tasks.append(trollius.Task(joint_force_loop(world_name, robot_name, joint_name, force, duration))
loop = trollius.get_event_loop()
loop.run_until_complete(trollius.wait(tasks))