I'm using Redis sentinel with three nodes. One is the master and the other two are slaves.
I have gone through this, but it expect that the error comes less frequently and can be re-tried, but my approach might be wrong here.
Here I am handling the reconfiguration of the nodes.
import redis
# Initialize on system boot
slave_node_1: redis.Redis = None
slave_node_2: redis.Redis = None
master_node: redis.Redis = None
# Handling the reconfiguration of the nodes.
def reconfig_redis_nodes():
Sentinel = redis.Sentinel([
(REDIS_HOST_0, SENTINEL_PORT),
(REDIS_HOST_1, SENTINEL_PORT),
(REDIS_HOST_2, SENTINEL_PORT)
], sentinel_kwargs={'password': REDIS_SENTINEL_PASSWORD})
host, port = Sentinel.discover_master(REDIS_MASTER_NAME)
globals()['master_node'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
slave_nodes = Sentinel.discover_slaves(REDIS_MASTER_NAME)
try:
host, port = slave_nodes[0]
globals()['slave_node_1'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
except IndexError as e:
pass
try:
host, port = slave_nodes[1]
globals()['slave_node_2'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
except IndexError as e:
pass
# Decorator to handle config change
def handle_redis_failover_master_switch(func):
def inner(*args, **kwargs):
retries = 0
max_retry = 5
while True:
try:
return func(*args, **kwargs)
except Exception as e:
reconfig_redis_nodes()
retries += 1
if retries > max_retry:
logger.critical(str(e))
raise Exception(e)
return inner
# And this is the Redis method I am using to set lock.
#handle_redis_failover_master_switch
def setnx(key: str, value: str, ttl_secs: int = 10):
return master_node.set(key, value, nx=True, ex=ttl_secs)
When I manually called these functions from the shell, they worked fine. But once the deployed (10 requests per second) Redis is throwing Redis is loading the dataset in memory
What is the cause of the issue here and how can I handle it gracefully?
Is it a bad idea for using sentinel for locking system?
Related
I am attempting to port some old java code to python.
I am using pymqi to connect to a queue manager and query for all messageflow statistics topics using the topic string: $SYS/Broker/+/StatisticsAccounting/Archive/#
When using the existing java program messages are read from the topic without issue.
When using the new python code it is able to connect and query the topic without issue but always gives the message
Reason 2033: FAILED: MQRC_NO_MSG_AVAILABLE
Stats messages are published by the broker for each messageflow every 10 minutes, and I have left the new code running for over 30minutes, never having received a message.
I've also tried setting
get_opts['WaitInterval'] = pymqi.CMQC.MQWI_UNLIMITED
and sitting around for 20minutes rather than using a loop, but no luck.
Is there any IIB server config that might be impacting the messages that I am able to see, or are there other options I should be using within the client?
import pymqi
queue_manager = 'MYQM'
channel = 'MYAPP.SVRCONN'
host = 'MYHOST'
port = 'MYPORT'
topic_string = '$SYS/Broker/+/StatisticsAccounting/Archive/#'
conn_info = '%s(%s)' % (host, port)
user = ""
password = ""
qmgr = pymqi.QueueManager(None)
qmgr.connect_tcp_client(queue_manager, pymqi.CD(), channel, conn_info, user, password)
sub_desc = pymqi.SD()
sub_desc['Options'] = pymqi.CMQC.MQSO_CREATE + pymqi.CMQC.MQSO_RESUME + pymqi.CMQC.MQSO_MANAGED
sub_desc.set_vs('SubName', 'apptest')
sub_desc.set_vs('ObjectString', topic_string)
sub = pymqi.Subscription(qmgr)
sub.sub(sub_desc=sub_desc)
get_opts = pymqi.GMO(Options=pymqi.CMQC.MQGMO_WAIT)
get_opts['WaitInterval'] = 10000
md = pymqi.md()
keep_running = True
while keep_running:
try:
# Reset the MsgId, CorrelId & GroupId so that we can reuse
# the same 'md' object again.
md.MsgId = pymqi.CMQC.MQMI_NONE
md.CorrelId = pymqi.CMQC.MQCI_NONE
md.GroupId = pymqi.CMQC.MQGI_NONE
message = sub.get(None, md, get_opts)
print('Have message from Queue')
print(message)
except pymqi.MQMIError as e:
if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
print("no message?")
print(e)
pass
else:
# Some other error condition.
raise
except (UnicodeDecodeError, ValueError) as e:
print('Message is not valid json')
print(e)
print(message)
continue
except KeyboardInterrupt:
print('Have received a keyboard interrupt')
keep_running = False
sub.close(sub_close_options=0,close_sub_queue=True)
qmgr.disconnect()
Hello I'm working on simple python ssh tunnel scrpit but I allways receive Could not resolve hostname error, but it works if run it manually. this is my code:
#!/usr/bin/env python
import subprocess
import time
import tempfile
class TunnelSSH():
def __init__(self, ssh_user: str, ssh_password: str, ssh_host: str, ssh_port: int,
local_tunnel_port:int, remote_tunnel_host:str, remote_tunnel_port:int):
self.ssh_user = ssh_user
self.ssh_password = ssh_password
self.ssh_host = ssh_host
self.ssh_port = ssh_port
self.local_tunnel_port = local_tunnel_port
self.remote_tunnel_port = remote_tunnel_port
self.remote_tunnel_host = remote_tunnel_host
_socket_file = tempfile.NamedTemporaryFile()
_socket_file.close()
self.socket = _socket_file.name
self.connected = False
def start(self):
ssh_conection = ['ssh', '-CN',
f'"{self.ssh_user}:{self.ssh_password}"#{self.ssh_host} -p {self.ssh_port}',
f'-L {self.local_tunnel_port}:{self.remote_tunnel_host}:{self.remote_tunnel_port}',
f'-S {self.socket}',
'-o ExitOnForwardFailure=True'
]
if not self.connected:
status = subprocess.call(ssh_conection)
self._check_connection(status)
time.sleep(self.retry_sleep)
else:
raise Exception('Tunnel is open')
def stop(self):
if self.connected:
if self._send_control_command('exit') != 0:
raise Exception('SSH tunnel failed to exit')
self.connected = False
def _check_connection(self, status) -> None:
"""Check connection status and set connected to True is tunnel is open"""
if status != 0:
raise Exception(f'SSH tunnel failed status: {status}')
if self._send_control_command('check'):
raise Exception(f'SSH tunnel failed to check')
self.connected = True
def _send_control_command(self, ctl_cmd:str):
call = ['ssh',f'-S {self.socket}',f'-O {self.ctl_cmd}', f'-l {self.ssh_user}', f'{self.ssh_host}']
return subprocess.check_call(call)
if __name__ == "__main__":
tunnel = TunnelSSH(ssh_user='...',
ssh_password='...',
ssh_host='...',
ssh_port=...,
local_tunnel_port=...,
remote_tunnel_host='...',
remote_tunnel_port=...
)
retry = 10 # times
wait_for_retry = 5 #s
for i in range(retry):
print(f'Connection attempt: {i}')
try:
tunnel.start()
except Exception as err:
tunnel.stop()
print(err)
time.sleep(wait_for_retry)
print(f'Connected: {tunnel.connected}')
subprocess.call expects a list of arguments. When ssh_conection is formed, several arguments are slapped together, so e.g. this part gets quoted into a single argument:
'"{self.ssh_user}:{self.ssh_password}"#{self.ssh_host} -p {self.ssh_port}'
Fix: properly split the arguments:
...
ssh_conection = ['ssh', '-CN',
f'{self.ssh_user}:{self.ssh_password}#{self.ssh_host}', # will be quoted automatically
'-p', f'{self.ssh_port}',
'-L', f'{self.local_tunnel_port}:{self.remote_tunnel_host}:{self.remote_tunnel_port}',
'-S', f'{self.socket}',
'-o', 'ExitOnForwardFailure=True'
]
...
What hinted the problem: IP addresses are used directly. 'cannot be resolved' on an IP address says that it is interpreted as a symbolic name, which makes spotting this easier.
What I require is a simple hack for running function synchronously if celery is not active.
What I tried is:
is_celery_working returns False although celery and Redis both are running (ran celery -A project worker -l debug and redis-server respectively). Also get_celery_worker_status is always giving error in status.
I am using celery with Django.
from project.celery import app
def is_celery_working():
result = app.control.broadcast('ping', reply=True, limit=1)
return bool(result) # True if at least one result
def sync_async(func):
if is_celery_working():
return func.delay
else:
return func
sync_async(some_func)(**its_args, **its_kwrgs)
def get_celery_worker_status():
error_key = 'error'
try:
from celery.task.control import inspect
insp = inspect()
d = insp.stats()
if not d:
d = {error_key: 'No running Celery workers were found.'}
except IOError as e:
from errno import errorcode
msg = "Error connecting to the backend: " + str(e)
if len(e.args) > 0 and errorcode.get(e.args[0]) == 'ECONNREFUSED':
msg += ' Check that the RabbitMQ server is running.'
d = {error_key: msg}
except ImportError as e:
d = {error_key: str(e)}
return d
def sync_async(func):
status = get_celery_worker_status()
if 'error' not in status:
return func.delay
else:
return func
sync_async(some_func)(**its_args, **its_kwrgs)
Your simple is_celery_working function looks correct. If you're getting False, you may want to increase your timeout to 5 or 10 seconds using the optional timeout parameter.
def is_celery_working():
result = app.control.broadcast('ping', reply=True, limit=1, timeout=5.0)
return bool(result) # True if at least one result
def sync_async(func, *args, **kwargs):
try:
func.delay(*args, **kwargs)
except Exception as error:
print('Celery not active', error)
func(*args, **kwargs)
This just gives an error if the Redis server is not working. This worked fine for me as I am assuming that if Redis is not working then celery is stopped.
So, I have a Celery system set up, where I dynamically create a cloud VM instance for each task, once the task completes the VM instance will delete itself. To accomplish this I am creating a new queue and assigning the worker on the newly created instance to that queue so that tasks can be sent to specific instances. This works with 1 or 2 simultaneous tasks, but if I try more than that, then celery's result.get method just waits indefinitely. I am using Celery version 4.2.1 (windowlicker).
Here is my Celery config.py file:
"""A module that configures Celery"""
from os import environ
from utils.loggerFactory import make_logger
LOGGER = make_logger(__name__)
LOGGER.info('Celery initalizing...')
REDIS_BACKEND_HOST = None
if 'RedisDNS' in environ:
REDIS_BACKEND_HOST = environ['RedisDNS']
LOGGER.info('Set Redis instance hostname to {}'.format(REDIS_BACKEND_HOST))
else:
LOGGER.warning('Couldn\'t fetch RedisDNS, defaulting to localhost...')
REDIS_BACKEND_HOST = 'localhost'
BROKER_URL = 'redis://{}'.format(REDIS_BACKEND_HOST)
CELERY_RESULT_BACKEND = 'redis://{}'.format(REDIS_BACKEND_HOST)
CELERY_TRACK_STARTED = True
CELERY_TASK_CREATE_MISSING_QUEUES = True
CELERY_TASK_IGNORE_RESULT = False
LOGGER.info('Init complete')
Here is the main code for executing tasks:
if ENV != 'development':
# Create a new compute instance
try:
created_instance_name = create_worker_compute_instance(
task_info['computeInstanceType'])
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t create compute instance: {}'.format(request_id, str(exc)))
try:
LOGGER.info(
'[{}] Saving exception into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response(
'Error: Couldn\'t create compute instance: {}'.format(str(exc)), None, 500)
result['code'] = 500
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save exception into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
report_exception(ENV, exc)
return
celery_queue_name = 'queue-{}'.format(created_instance_name)
LOGGER.info('[{}] Adding new Celery queue {}'.format(
request_id, celery_queue_name))
try:
APP.control.add_consumer(celery_queue_name, reply=False, destination=[
'worker1#{}'.format(created_instance_name)])
except Exception as exc:
LOGGER.error('[{}] Couldn\'t add queue {}: {}'.format(
request_id, celery_queue_name, str(exc)))
try:
LOGGER.info('[{}] Saving exception into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response(
'Error: Couldn\'t add queue {}: {}'.format(celery_queue_name, str(exc)), None, 500)
result['code'] = 500
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save exception into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
report_exception(ENV, exc)
return
LOGGER.info('[{}] Queue added'.format(request_id))
else:
celery_queue_name = 'celery'
# Execute the task
LOGGER.info('[{}] Executing task...'.format(request_id))
async_result = run_task.apply_async(
args=(data, task_info, SERVICE_ACCOUNT_FILE_DATA), queue=celery_queue_name)
LOGGER.info('[{}] Waiting for task to complete...'.format(request_id))
task_result = None
try:
task_result = async_result.get()
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t execute task {}: {}'.format(request_id, task, str(exc)))
try:
LOGGER.info('[{}] Saving exception into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response('Error: Couldn\'t execute task {}: {}'.format(
task, str(exc)), None, 500)
result['code'] = 500
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save exception into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
report_exception(ENV, exc)
return
LOGGER.info('[{}] Task executed successfully'.format(request_id))
task_result['message'] = 'Ok, task {} executed successfully'.format(
task)
try:
LOGGER.info('[{}] Saving result into redis...'.format(request_id))
result = json.loads(REDIS_CLIENT.get(request_id))
result['response'] = generate_response(
None, task_result, 0)
result['code'] = 200
result['canDel'] = True
REDIS_CLIENT.set(request_id, json.dumps(result))
except Exception as exc:
LOGGER.error(
'[{}] Couldn\'t save result into redis: {}'.format(request_id, str(exc)))
report_exception(ENV, exc)
return
Edit:
Here is small diagram for a broad overview of the system:
Ok, it seems that the issue is with APP.control.add_consumer(celery_queue_name, reply=False, destination=['worker1#{}'.format(created_instance_name)]). Even though that command returns successfully, the worker still hasn't been added to the queue.
I managed to fix the issue by including the queue name in the worker startup command with the -Q parameter.
I am trying to write some simple loops to control objects in Pygazebo, but alas it only ever calls the method once and then the loops appears to block.
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 2 12:52:50 2015
#author: skylion
"""
import trollius #NOTE: Trollius requires protobuffer from Google
from trollius import From
import pygazebo
import pygazebo.msg.joint_cmd_pb2
import time
def apply_joint_force(world_name, robot_name, joint_name, force, duration=-1):
#trollius.coroutine
def joint_force_loop():
manager = yield From(pygazebo.connect())
print("connected")
publisher = yield From(
manager.advertise('/gazebo/' + world_name + '/' + robot_name + '/joint_cmd',
'gazebo.msgs.JointCmd'))
message = pygazebo.msg.joint_cmd_pb2.JointCmd()
message.name = robot_name + '::' + joint_name #format should be: name_of_robot + '::name_of_joint'
message.force = force
#t_end = time.time() + duration # The time that you want the controller to stop
while True: #time.time() < t_end or duration == -1:
try:
yield From(publisher.publish(message))
yield From(trollius.sleep(1.0))
except:
pass
#Nothing
print("Connection closed")
wait_net_service('localhost',11345)
loop = trollius.new_event_loop()
loop.run_until_complete(joint_force_loop())
raise
def wait_net_service(server, port, timeout=None):
""" Wait for network service to appear
#param timeout: in seconds, if None or 0 wait forever
#return: True of False, if timeout is None may return only True or
throw unhandled network exception
"""
import socket
import errno
s = socket.socket()
if timeout:
from time import time as now
# time module is needed to calc timeout shared between two exceptions
end = now() + timeout
while True:
try:
if timeout:
next_timeout = end - now()
if next_timeout < 0:
return False
else:
s.settimeout(next_timeout)
s.connect((server, port))
time.sleep(1)
except socket.timeout, err:
# this exception occurs only if timeout is set
if timeout:
return False
except socket.error, err:
# catch timeout exception from underlying network library
# this one is different from socket.timeout
if type(err.args) != tuple or (err[0] != errno.ETIMEDOUT and err[0] != errno.ECONNREFUSED):
raise err
else:
s.close()
return True
I thought #coroutines were suppose to be wrapped asynchronously? Do I just misunderstand the use this code? Or am I doing something else wrong? This is my first time with concurrency in Python btw.
Also this is how I am calling that function:
counter = 0
for joint_def in self.all_joint_props:
print("each joint_def")
apply_joint_force(world_name, robot_name, "hingejoint" + str(counter), joint_def[2])
#print("Appliing joint force")
Any idea why it keep blocking the thread? Should I be using a different method to this? Any help would be appreciated
So, the answer is quite simple really. You have to queue up the multiple Trollius.Tasks you want to run as a list before starting the object and combine that with Trollius.wait() to achieve this. To ensure the thread is non-blocking you then use the following method
Here is my code so far:
tasks = []
for joint_name in joint_names:
tasks.append(trollius.Task(joint_force_loop(world_name, robot_name, joint_name, force, duration))
loop = trollius.get_event_loop()
loop.run_until_complete(trollius.wait(tasks))