Pubsublite message acknowledgement not working - python

I'm using Google pubsublite. Small dummy topic with single partition and a few messages. Python client lib. Doing the standard SubscriberCluent.subscribe with callback. The callback places message in a queue. When the msg is taken out of the queue for consumption, its ack is called. When I want to stop, I call subscribe_future.cancel(); subscriber_future.result() and discard unconsumed messages in the queue.
Say I know the topic has 30 messages. I consume 10 of them before stopping. Then I restart a new SubscriberClient in the same subscription and receive messages. I expect to get starting with the 11th message, but I got starting with the first. So the precious subscriber has ack'd the first 10, but it's as if server did not receive the acknowledgement.
I thought maybe the ack needs some time to reach the server. So I waited 2 minutes before starting the second subscribe. Didn't help.
Then u thought maybe the subscriber object manages the ack calls, and I need to "flush" them before cancelling, but I found another about that.
What am I missing? Thanks.
Here's the code. If you have pubsublite account, the code is executable after you fill in credentials. The code shows two issues, one is the subject of this question; the other is asked at here
# Using python 3.8
from __future__ import annotations
import logging
import pickle
import queue
import time
import uuid
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
from typing import Union, Optional
from google.api_core.exceptions import AlreadyExists
from google.cloud.pubsub_v1.types import BatchSettings
from google.cloud.pubsublite import AdminClient, PubSubMessage
from google.cloud.pubsublite import Reservation as GCPReservation
from google.cloud.pubsublite import Subscription as GCPSubscription
from google.cloud.pubsublite import Topic as GCPTopic
from google.cloud.pubsublite.cloudpubsub import (PublisherClient,
SubscriberClient)
from google.cloud.pubsublite.types import (BacklogLocation, CloudZone,
LocationPath,
ReservationPath, SubscriptionPath,
TopicPath,
)
from google.cloud.pubsublite.types import FlowControlSettings
from google.oauth2.service_account import Credentials
logging.getLogger('google.cloud').setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
FORMAT = '[%(asctime)s.%(msecs)03d %(name)s] %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
class Account:
def __init__(self,
project_id: str,
region: str,
zone: str,
credentials: Credentials,
):
self.project_id = project_id
self.region = region
self.zone = CloudZone.parse(zone)
self.credentials = credentials
self.client = AdminClient(region=region, credentials=credentials)
def location_path(self) -> LocationPath:
return LocationPath(self.project_id, self.zone)
def reservation_path(self, name: str) -> ReservationPath:
return ReservationPath(self.project_id, self.region, name)
def topic_path(self, name: str) -> TopicPath:
return TopicPath(self.project_id, self.zone, name)
def subscription_path(self, name: str) -> SubscriptionPath:
return SubscriptionPath(self.project_id, self.zone, name)
def create_reservation(self, name: str, *, capacity: int = 32) -> None:
path = self.reservation_path(name)
reservation = GCPReservation(name=str(path),
throughput_capacity=capacity)
self.client.create_reservation(reservation)
# logger.info('reservation %s created', name)
def create_topic(self,
name: str,
*,
partition_count: int = 1,
partition_size_gib: int = 30,
reservation_name: str = 'default') -> Topic:
# A topic name can not be reused within one hour of deletion.
top_path = self.topic_path(name)
res_path = self.reservation_path(reservation_name)
topic = GCPTopic(
name=str(top_path),
partition_config=GCPTopic.PartitionConfig(count=partition_count),
retention_config=GCPTopic.RetentionConfig(
per_partition_bytes=partition_size_gib * 1024 * 1024 * 1024),
reservation_config=GCPTopic.ReservationConfig(
throughput_reservation=str(res_path)))
self.client.create_topic(topic)
# logger.info('topic %s created', name)
return Topic(name, self)
def delete_topic(self, name: str) -> None:
path = self.topic_path(name)
self.client.delete_topic(path)
# logger.info('topic %s deleted', name)
def get_topic(self, name: str) -> Topic:
return Topic(name, self)
class Topic:
def __init__(self, name: str, account: Account):
self.account = account
self.name = name
self._path = self.account.topic_path(name)
def create_subscription(self,
name: str,
*,
pos: str = None) -> Subscription:
path = self.account.subscription_path(name)
if pos is None or pos == 'beginning':
starting_offset = BacklogLocation.BEGINNING
elif pos == 'end':
starting_offset = BacklogLocation.END
else:
raise ValueError(
'Argument start only accepts one of two values - "beginning" or "end"'
)
Conf = GCPSubscription.DeliveryConfig
subscription = GCPSubscription(
name=str(path),
topic=str(self._path),
delivery_config=Conf(delivery_requirement=Conf.DeliveryRequirement.DELIVER_IMMEDIATELY))
self.account.client.create_subscription(subscription, starting_offset)
# logger.info('subscription %s created for topic %s', name, self.name)
return Subscription(name, self)
def delete_subscription(self, name: str) -> None:
path = self.account.subscription_path(name)
self.account.client.delete_subscription(path)
# logger.info('subscription %s deleted from topic %s', name, self.name)
def get_subscription(self, name: str):
return Subscription(name, self)
#contextmanager
def get_publisher(self, **kwargs):
with Publisher(self, **kwargs) as pub:
yield pub
class Publisher:
def __init__(self, topic: Topic, *, batch_size: int = 100):
self.topic = topic
self._batch_config = {
'max_bytes': 3 * 1024 * 1024, # 3 Mb; must be None:
self._messages.put(data)
class Subscription:
def __init__(self, name: str, topic: Topic):
self.topic = topic
self.name = name
self._path = topic.account.subscription_path(name)
#contextmanager
def get_subscriber(self, *, backlog=None):
with Subscriber(self, backlog=backlog) as sub:
yield sub
class Subscriber:
def __init__(self, subscription: Subscription, backlog: int = None):
self.subscription = subscription
self._backlog = backlog or 100
self._cancel_requested: bool = None
self._messages: queue.Queue = None
self._pool: ThreadPoolExecutor = None
self._NOMORE = object()
self._subscribe_task = None
def __enter__(self):
self._pool = ThreadPoolExecutor(1).__enter__()
self._messages = queue.Queue(self._backlog)
messages = self._messages
def callback(msg: PubSubMessage):
logger.info('got %s', pickle.loads(msg.data))
messages.put(msg)
def _subscribe():
flowcontrol = FlowControlSettings(
messages_outstanding=self._backlog,
bytes_outstanding=1024 * 1024 * 10)
subscriber = SubscriberClient(credentials=self.subscription.topic.account.credentials)
with subscriber:
fut = subscriber.subscribe(self.subscription._path, callback, flowcontrol)
logger.info('subscribe sent to gcp')
while True:
if self._cancel_requested:
fut.cancel()
fut.result()
while True:
while not messages.empty():
try:
_ = messages.get_nowait()
except queue.Empty:
break
try:
messages.put_nowait(self._NOMORE)
break
except queue.Full:
continue
break
time.sleep(0.003)
self._subscribe_task = self._pool.submit(_subscribe)
return self
def __exit__(self, *args, **kwargs):
if self._pool is not None:
if self._subscribe_task is not None:
self._cancel_requested = True
while True:
z = self._messages.get()
if z is self._NOMORE:
break
self._subscribe_task.result()
self._subscribe_task = None
self._messages = None
self._pool.__exit__(*args, **kwargs)
self._pool = None
def get(self, timeout=None):
if timeout is not None and timeout == 0:
msg = self._messages.get_nowait()
else:
msg = self._messages.get(block=True, timeout=timeout)
data = pickle.loads(msg.data)
msg.ack()
return data
def get_account() -> Account:
return Account(project_id='--fill-in-proj-id--',
region='us-central1',
zone='us-central1-a',
credentials='--fill-in-creds--')
# This test shows that it takes extremely long to get the first messsage
# in `subscribe`.
def test1(account):
name = 'test-' + str(uuid.uuid4())
topic = account.create_topic(name)
try:
with topic.get_publisher() as p:
p.put(1)
p.put(2)
p.put(3)
sub = topic.create_subscription(name)
try:
with sub.get_subscriber() as s:
t0 = time.time()
logger.info('getting the first message')
z = s.get()
t1 = time.time()
logger.info(' got the first message')
print(z)
print('getting the first msg took', t1 - t0, 'seconds')
finally:
topic.delete_subscription(name)
finally:
account.delete_topic(name)
def test2(account):
name = 'test-' + str(uuid.uuid4())
topic = account.create_topic(name)
N = 30
try:
with topic.get_publisher(batch_size=1) as p:
for i in range(N):
p.put(i)
sub = topic.create_subscription(name)
try:
with sub.get_subscriber() as s:
for i in range(10):
z = s.get()
assert z == i
# The following block shows that the subscriber
# resets to the first message, not as expected
# that it picks up where the last block left.
with sub.get_subscriber() as s:
for i in range(10, 20):
z = s.get()
try:
assert z == i
except AssertionError as e:
print(z, '!=', i)
return
finally:
topic.delete_subscription(name)
finally:
account.delete_topic(name)
if __name__ == '__main__':
a = get_account()
try:
a.create_reservation('default')
except AlreadyExists:
pass
test1(a)
print('')
test2(a)

I found a solution. Before cancelling the "subscribe" future, I need to sleep a little bit to allow acknowledgements to be flushed (i.e. sent out). In particular, google.cloud.pubsublite.cloudpubsub.internal.make_subscriber._DEFAULT_FLUSH_SECONDS (value 0.1) appears to be the time to watch. Need to sleep a little longer than this to be sure.
This is a bug in the google package. "Cancelling" the future means abandon unprocessed messages, whereas submitted acknowledgements should be sent out. This bug may have gone unnoticed because duplicate message delivery is not an error.

I was not able to recreate your issue but I think you should check the way its being handled on the official documentation about using cloud pubsublite.
This is the code I extract and update from Receiving messages sample and It works as intended, it will get the message from the lite-topic and acknowledge to avoid getting it again. if rerun, I will only get the data if there is data to pull. I added the code so you can check if something may differ from your code.
consumer.py
from concurrent.futures._base import TimeoutError
from google.cloud.pubsublite.cloudpubsub import SubscriberClient
from google.cloud.pubsublite.types import (
CloudRegion,
CloudZone,
FlowControlSettings,
SubscriptionPath,
MessageMetadata,
)
from google.cloud.pubsub_v1.types import PubsubMessage
# TODO(developer):
project_number = project-number
cloud_region = "us-central1"
zone_id = "a"
subscription_id = "sub-id"
timeout = 90
location = CloudZone(CloudRegion(cloud_region), zone_id)
subscription_path = SubscriptionPath(project_number, location, subscription_id)
per_partition_flow_control_settings = FlowControlSettings(
messages_outstanding=1000,
bytes_outstanding=10 * 1024 * 1024,
)
def callback(message: PubsubMessage):
message_data = message.data.decode("utf-8")
metadata = MessageMetadata.decode(message.message_id)
print(f"Received {message_data} of ordering key {message.ordering_key} with id {metadata}.")
message.ack()
# SubscriberClient() must be used in a `with` block or have __enter__() called before use.
with SubscriberClient() as subscriber_client:
streaming_pull_future = subscriber_client.subscribe(
subscription_path,
callback=callback,
per_partition_flow_control_settings=per_partition_flow_control_settings,
)
print(f"Listening for messages on {str(subscription_path)}...")
try:
streaming_pull_future.result(timeout=timeout)
except TimeoutError or KeyboardInterrupt:
streaming_pull_future.cancel()
assert streaming_pull_future.done()
The only way I hit your scenario is when I use different subscriptions. But on that regard, when different subscriptions get message from the topic each one will receive the same stored messages as explained on Receiving messages from Lite subscriptions.
Consider this:
Check your subscription deliver configuration. You can use Create and manage Lite subscriptions page for guidance.
Check if your code and the official samples somehow preserve the same structure. For my case, I check the following samples:
Create a Lite reservation
Create a Lite topic
Create a Lite subscription
Publishing messages
Receiving messages

Related

How to make async call inside a sync operation in locust for WebSocket Testing?

This is my WebSocket library similar to https://github.com/SvenskaSpel/locust-plugins/blob/master/locust_plugins/users/socketio.py
class WebsocketClient:
abstract = True
message_regex = re.compile(r"(\d*)(.*)")
description_regex = re.compile(r"<([0-9]+)>$")
def connect(self, endpoint, header={}):
self.ws = websocket.create_connection(endpoint, header=header)
Logger.log_message('Connection Established successfully')
Logger.log_message('status code: {}'.format(self.ws.getstatus()))
gevent.spawn(self.receive())
Logger.log_message('spawning started....')
def receive(self):
while True:
message = self.ws.recv()
Logger.log_message('WSR Original message: {}'.format(message))
self.on_message(message)
def receive_1(self, ):
message = self.ws.recv()
Logger.log_message('WSR Original message: {}'.format(message))
self.on_message(message)
def on_message(self, message): # override this method in your subclass for custom handling
m = self.message_regex.match(message)
# response_time = 0 # unknown
time_stamp = 0
if m is None:
# uh oh...
raise Exception(f"got no matches for {self.message_regex} in {message}")
code = m.group(1)
json_string = m.group(2)
if code == "0":
event_type = 'open'
current_timestamp = time.time()
json_obj = json.loads(json_string)
Logger.log_message('WSR: {}'.format(json_obj))
Logger.log_message('time_stamp: {}'.format(current_timestamp))
name = '{} | {}'.format(code, event_type)
time_stamp = current_timestamp
elif code == "2":
name = "2 | heartbeat"
elif code == "40" or code == "42":
current_timestamp = time.time()
json_obj = json.loads(json_string)
Logger.log_message('WSR: {}'.format(json_obj))
Logger.log_message('time_stamp: {}'.format(current_timestamp))
event_type = json_obj[0]
name = '{} | {}'.format(code, event_type)
time_stamp = current_timestamp
else:
print("Received unexpected message: {}".format(message))
Logger.log_message("Received unexpected message: {}".format(message), LogType.ERROR)
return
events.request.fire(
request_type="WSR",
name=name,
response_time=None,
response_length=len(message),
context=None,
exception=None
)
def send(self, body):
start_at = time.time()
if body == "2" or body == "3":
name = "{} heartbeat".format(body)
else:
# hoping this is a subscribe type message, try to detect name
m = re.search(r'(\d*)\["([a-z]*)"', body)
Logger.log_message(m)
code = m.group(1)
event_type = m.group(2)
name = '{} | {}'.format(code, event_type)
Logger.log_message("WSS - payload: {}".format(body))
self.ws.send(body)
events.request.fire(
request_type="WSS",
name=name,
response_time=int((time.time() - start_at) * 1000000),
response_length=len(body),
context=None,
exception=None
)
def sleep_with_heartbeat(self, seconds):
while seconds >= 0:
gevent.sleep(min(15, seconds))
seconds -= 15
self.send('3')
def close(self):
self.ws.close()
class CustomWebsocketLocust(User):
abstract = True
def __init__(self, *args, **kwargs):
super(CustomWebsocketLocust, self).__init__(*args, **kwargs)
self.client = WebsocketClient()
My Test code is like this:
But when I am running this locust file, it is getting stuck in the first task and the second task is never executed.
I thought gevent.spawn() would create an async thread that will keep continuously listening to all events and other sync calls will happen simultaneously by the locust thread. But it is not happening as expected
Any idea how to solve this issue?

(Python Kombu) Consuming and producing using the same channel (for RabbitMQ Direct Reply-to)

Trying to implement Direct Reply-to RabbitMQ Docs but having issues with consuming and producing using the same channel (this is a requirement for using direct reply-to).
Here's what I have tried:
Base Class:
from typing import Callable
from threading import Thread
from kombu import Connection, Exchange, Queue
from middleware.settings import ResourceSettings
settings = ResourceSettings()
class MiddlewareBrokerServiceBase(object):
RABBITMQ_EXCHANGE_NAME = "exchange-1"
def __init__(
self,
*,
queue_name: str,
username: str = None,
password: str = None,
host: str = None,
**kwargs
):
if not username:
username = settings.rabbitmq_username
if not password:
password = settings.rabbitmq_password
if not host:
host = settings.rabbitmq_host
self.username = username
self.password = password
self.host = host
self.queue_name = queue_name
self.exchange = self._create_exchange()
self.queue = self._create_queue()
self.connection = self._create_connection()
def _create_exchange(self):
return Exchange(self.RABBITMQ_EXCHANGE_NAME, 'topic', durable=True)
def _create_queue(self):
return Queue(self.queue_name, exchange=self.exchange, routing_key=self.queue_name)
def _create_connection(self):
return Connection(f'amqp://{self.username}:{self.password}#{self.host}/{settings.rabbitmq_vhost}')
#classmethod
def _start_rabbitmq_thread(cls, target: Callable):
rmq_thread = Thread(target=target)
rmq_thread.start()
Producer:
from typing import Callable
from kombu import Queue
from middleware.daemon.rabbitmq_service import MiddlewareBrokerServiceBase
class MiddlewareBrokerProducer(MiddlewareBrokerServiceBase):
def __init__(self, *, on_reply: Callable = None, **kwargs):
self.on_reply = on_reply
super().__init__(**kwargs)
self.channel = self.connection.channel()
self.reply_queue = None
if on_reply:
self.reply_queue = self._get_reply_queue()
self._start_rabbitmq_thread(self._reply_consumer_thread)
def _get_reply_queue(self):
return Queue(name='amq.rabbitmq.reply-to', exchange='', routing_key='amq.rabbitmq.reply-to', exclusive=True, auto_delete=True, channel=self.channel)\
def _get_publish_base_args(self):
args = {'exchange': self.exchange,
'routing_key': self.queue.routing_key,
'declare': [self.queue]}
if self.on_reply:
args['reply_to'] = 'amq.rabbitmq.reply-to'
return args
def _on_reply(self, a):
print(f'Got message {a}')
if self.on_reply:
self.on_reply(a)
def _reply_consumer_thread(self):
print('Starting fast-reply consumer..')
with self.channel.Consumer(queues=[self.reply_queue], no_ack=True, on_message=self._on_reply) as consumer:
consumer.consume(no_ack=True)
while True:
try:
self.connection.drain_events(timeout=1)
except TimeoutError:
continue
def publish_message(self, message: str):
publish_args = self._get_publish_base_args()
producer = self.channel.Producer(serializer='json')
producer.publish(message, **publish_args)
Running the producer with MiddlewareBrokerProducer().publish_message('New alert') raises a timeout error at the line producer.publish(message, **publish_args) of the publish_message method.
With some troubleshooting I have noticed that this works as expected if the _reply_consumer_thread method of the producer is changed as follows:
def _reply_consumer_thread(self):
print('Starting fast-reply consumer..')
with self.channel.Consumer(queues=[self.reply_queue], no_ack=True, on_message=self._on_reply) as consumer:
while True:
consumer.consume(no_ack=True)
But this causes 100% CPU utilization which could be fixed by adding a time.sleep(1) but I doubt this is the right way to go about fixing this issue.
Any help appreciated.

What could be the reason python says port is being used when its not?

No matter what port I set Python says its being used I don't understand why, I'm using twisted
Sat Aug 26 12:49:31 2017 - (/usr/lib/python2.7/dist-packages/twisted/internet/tcp.py:980) Couldn't listen on any:4444: [Errno 98] Address already in use.
I'm not sure what code portions you need or information so if you need anything let me know.
server.py
import glob
import uuid
from modules import util
from modules import db as _db
from modules import LOG
from objects.user import User
from objects.room import Room
from objects.message import Message
from objects.Table import Table
from autobahn.twisted.websocket import WebSocketServerProtocol, \
WebSocketServerFactory, \
listenWS
def hexc(e):
et, ev, tb = sys.exc_info()
if not tb:
return str(e)
while tb:
lineno = tb.tb_lineno
fn = tb.tb_frame.f_code.co_filename
tb = tb.tb_next
return "(%s:%i) %s" % (fn, lineno, str(e))
class oChat(WebSocketServerProtocol):
_rooms = []
_userlist = Table()
_commands = Table()
_commands.user = Table()
db = _db.db('/home/chat/database.db')
def onOpen(self):
self.loadUserCommands()
self.loadSysCommands()
def getLevel(self, user):
if user.mod:
return 1
elif user.owner:
return 2
else:
return 0
def add(self, object):
if object not in self._rooms:
self._rooms.append(object)
def get(self, name):
for room in self._rooms:
if room.name == name:
return room
def execFile(self, f, dict):
with open(f, "r") as file:
try:
exec (compile(file.read(), f, 'exec'), dict)
except:
execfile(f, dict)
file.close()
return dict
def config(self, value):
config = {}
self.execFile("configuration/config.conf", config)
return config[value]
def getCommand(self, name):
name = name.lower()
if self._commands.has_key(name):
if not self._commands[name].disabled:
return self._commands[name]
def getUserCommand(self, name):
name = name.lower()
if self._commands.user.has_key(name):
if not self._commands.user[name].disabled:
return self._commands.user[name]
def setCommand(self, name, desc, func, disabled=False):
name = name.lower()
self._commands[name] = Table()
self._commands[name].desc = desc
self._commands[name].func = func
self._commands[name].disabled = disabled
def setUserCommand(self, name, desc, func, disabled=False, level=0):
name = name.lower()
self._commands.user[name] = Table()
self._commands.user[name].desc = desc
self._commands.user[name].func = func
self._commands.user[name].level = level
self._commands.user[name].disabled = disabled
def reload(self):
try:
self.loadSysCommands()
self.loadUserCommands()
except Exception as e:
print hexc(e)
def make_user(self, *args):
return User(*args)
def make_room(self, *args):
return Room(*args)
def make_message(self, *args):
return Message(*args)
def loadUserCommands(self):
files = glob.glob("protocol/user/*.py")
for file in files:
b = self.execFile(file, {})
b['init'](self)
def loadSysCommands(self):
files = glob.glob("protocol/*.py")
for file in files:
b = self.execFile(file, {})
b['init'](self)
def joinRoom(self, room, user, args):
has_sym = util.has_symbles(args, False)
room.removeUser(user, self)
room._sendCommand("uc", str(room.getCount(self)))
if args in self.db.getRooms():
room.addUser(user, self)
user.setNewRoom(room.name)
self.add(room)
room._sendCommand("uc", str(room.getCount(self)))
return True
else:
args = args.replace(" ", "-")
if not has_sym and user.status == "logged_in":
self.db.addRoom(args, user.name)
room = Room(args, self)
self.add(room)
user.setNewRoom(args)
room.addUser(user, self)
self.db.setTitle(room.name, user.name, room.name)
room._sendCommand('title', room.name)
room._sendCommand("uc", str(room.getCount(self)))
return True
else:
return False
def onConnect(self, req):
self.id = uuid.uuid4().hex
User(self.id).setIdent(db._ident(str(self.peer.split(":", 2)[1])))
User(self.id).setConnection(self.id, self)
msg = "[CONNECT] IP(%s) IDENTITY(%s)" % (str(self.peer.split(":", 2)[1]), User(self.id).ident)
print(LOG.Log(msg))
def onMessage(self, payload, isBinary):
data = payload.decode('utf8').split("::", 1)
user = User(self.id).get()
room = self.get(user.roomname)
if not room: room = Room(user.roomname.lower(), self)
try: room.check(user, self.db)
except: pass
print LOG.Log(payload.decode("utf8"))
if len(data) > 1:
cmd, args = data[0], data[1]
else:
cmd, args = data[0], ""
if cmd == "bmsg":
if args.startswith(self.config("USER_PROTOCOL_SEP")):
data = args.split(self.config("USER_PROTOCOL_SEP"), 1)
data = data[1].split(" ", 1)
if len(data) > 1:
cmd, args = data[0], data[1]
else:
cmd, args = data[0], ""
key = cmd
cmd = self.getUserCommand(key)
msg = Message(room.name, user.name, args, ident=user.ident)
if cmd and self.getLevel(user) >= cmd.level: # user protocol
try: cmd.func(self, room, user, msg, args)
except Exception as e: user._sendCommand('sysmsg', 'er', hexc(e))
else:
if not user.banned:
key = cmd
msg = Message(room.name, user.name, args, ident=user.ident) # just storing the message the bmsg.py handles sending
msg.createMessage(self.db, True)
cmd = self.getCommand(key)
if cmd: # main protocol bmsg
if user.status == 'logged_in': cmd.func(self, room, user, msg, args)
else: user._sendCommand('sysmsg', 'er', 'login or register')
else:
user._sendCommand('sysmsg', 'banned', 'you are banned') # set on sending live msg only
else:
key = cmd
cmd = self.getCommand(key)
if cmd: # main protocol other
msg = Message(room.name, user.name, args, ident=user.ident, storeMsg=False)
try: cmd.func(self, room, user, msg, args)
except Exception as e: user._sendCommand("sysmsg", "er", hexc(e))
if __name__ == '__main__':
try:
import sys
from twisted.internet import reactor,ssl
contextFactory = ssl.DefaultOpenSSLContextFactory('/etc/letsencrypt/live/otku.ga/privkey.pem',
'/etc/letsencrypt/live/otku.ga/fullchain.pem')
factory = WebSocketServerFactory(u"wss://otku.ga:4444")
factory.protocol = oChat
listenWS(factory, contextFactory)
#log.startLogging(sys.stdout)
#factory.setProtocolOptions(maxConnections=2)
reactor.listenTCP(4444, factory)
reactor.run()
except KeyboardInterrupt:
print("[ERR] KBI")
except Exception as e:
LOG.Log(hexc(e), 'er')
I don't have any errors other then the port being in use when its not.
If you need the whole server ill provide a zip with requirements.txt
You’re setting the server up to listen twice – once with listenWS and once with reactor.listenTCP. Remove reactor.listenTCP, as you want listenWS to call reactor.listenSSL for you.
There are various reasons this might happen [General Solution],
Reason 1: You may tried running your application is one of the reserved ports [0-1024] so some applications might be using the port actually.
Reason 2: You may terminated the application, so the instruction for closing the socket (e.g socket.close()) has never gets called. so the socket is open somewhere
Reason 3: Is that your only error message? Does it says anything about admin permission? Have you tried running in admin permission?

multiprocessing - processes won't join?

TL;DR - the consumer processes finish but do not join, no errors are raised and the script runs infinitely, stuck in limbo on the join statment?
I am aiming to speed up a data retrieval process, however I do not know how many 'tasks' (pieces of data to retrieve) there might be. So I made a modified version of the poison pill method so that the task recognizes when it is no longer retrieving information, and triggers the poison pill if statement.
I have posted a proof, which is a working example of my poison pill method, and a full script, which as the name implies is the full script. (both should be able to run as is)
proof:
import multiprocessing
class Task:
def __init__(self, number):
self.number = number
def __call__(self):
"""Find officer and company data and combine and save it"""
try:
# 'gather some data!'
self.result = self.number*2
print(self.number)
# 'fake' finding no data
if self.result >= 8:
raise NameError
except NameError:
# become poison pill once latest is done
self.result = None
def output(self):
return self.result
class Consumer(multiprocessing.Process):
"""Handle process and re-queue complete tasks"""
def __init__(self, waiting_queue, complete_queue):
multiprocessing.Process.__init__(self)
self.waiting_queue = waiting_queue
self.complete_queue = complete_queue
def run(self):
"""process tasks until queue is empty"""
proc_name = self.name
while True:
current_task = self.waiting_queue.get()
current_task()
if current_task.output() is None:
print('{}: Exiting, poison pill reached'.format(proc_name))
self.waiting_queue.task_done()
break
self.waiting_queue.task_done()
self.complete_queue.put(current_task)
print('{}: complete'.format(proc_name))
class Shepard:
"""Handle life cycle of Consumers, Queues and Tasks"""
def __init__(self):
pass
def __call__(self, start_point):
# initialize queues
todo = multiprocessing.JoinableQueue()
finished = multiprocessing.JoinableQueue()
# start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(todo, finished) for i in range(num_consumers)]
for q in consumers:
q.start()
# decide on (max) end limit (make much longer than suspected amount of data to be gathered
start = int(start_point)
max_record_range = 100
end = start + max_record_range
# Enqueue jobs
for i in range(start, end):
todo.put(Task(i))
print('Processes joining')
# wait for processes to join
for p in consumers:
p.join()
print('Processes joined')
# process results - UNFINISHED
pass
# return results - UNFINISHED
return 'results!'
if __name__ == '__main__':
# load start points:
start_points = {'cat1': 1, 'cat2': 3, 'cat3': 4}
master = Shepard()
cat1 = master(start_points['cat1'])
print('cat1 done')
cat2 = master(start_points['cat2'])
print('cat2 done')
cat3 = master(start_points['cat3'])
So here is the full script:
import time
import requests
import sys
import json
import pandas as pd
import multiprocessing
import queue
class CompaniesHouseRequest:
"""Retreive information from Companies House"""
def __init__(self, company, catagory_url=''):
"""Example URL: '/officers'"""
self.company = str(company)
self.catagory_url = str(catagory_url)
def retrieve(self, key='Rn7RLDV9Tw9v4ShDCotjDtJFBgp1Lr4d-9GRYZMo'):
"""retrieve data from Companies House"""
call = 'https://api.companieshouse.gov.uk/company/' + self.company + self.catagory_url
retrieve_complete = False
while retrieve_complete is False:
resp = requests.get(call, auth=requests.auth.HTTPBasicAuth(key, ''))
code = resp.status_code
if code == 404:
print(resp.status_code)
raise NameError('Company not found')
elif code == 200:
try:
self.data = json.loads(resp.content.decode('UTF8'))
retrieve_complete = True
except json.decoder.JSONDecodeError:
print('Decode Error in Officers!')
else:
print("Error:", sys.exc_info()[0])
print('Retrying')
time.sleep(5)
return self.data
class Company:
"""Retrieve and hold company details"""
def __init__(self, company_number):
self.company_number = company_number
def __call__(self):
"""Create request and process data"""
# make request
req = CompaniesHouseRequest(self.company_number)
data = req.retrieve()
# extract data
try:
line = [self.company_number,
data['company_name'],
data['registered_office_address'].get('premises', ''),
data['registered_office_address'].get('address_line_1', ''),
data['registered_office_address'].get('address_line_2', ''),
data['registered_office_address'].get('country', ''),
data['registered_office_address'].get('locality', ''),
data['registered_office_address'].get('postal_code', ''),
data['registered_office_address'].get('region', '')]
except KeyError:
line = ['' for i in range(0, 9)]
# save as pandas dataframe
return pd.DataFrame([line], columns=['company_number', 'company_name', 'company_address_premises',
'company_address_line_1', 'company_address_line_2',
'company_address_country', 'company_address_locality',
'company_address_postcode', 'company_address_region'])
def name_splitter(name):
split = name.split(', ')
if len(split) > 2:
return [split[2], split[1], split[0]]
else:
return ['', split[1], split[0]]
class Officers:
"""Retrieve and hold officers details"""
def __init__(self, company_number):
self.company_number = company_number
def __call__(self):
"""Create request and process data"""
# make request
req = CompaniesHouseRequest(self.company_number, '/officers')
data = req.retrieve()
# extract data
for officer in data['items']:
if officer['officer_role'] == 'director':
name = name_splitter(officer['name'])
line = [name[0],
name[1],
name[2],
officer.get('occupation'),
officer.get('country_of_residence'),
officer.get('nationality'),
officer.get('appointed_on', ''),
officer['address'].get('premises', ''),
officer['address'].get('address_line_1', ''),
officer['address'].get('address_line_2', ''),
officer['address'].get('country', ''),
officer['address'].get('locality', ''),
officer['address'].get('postal_code', ''),
officer['address'].get('region', '')]
break
director_count = sum(map(lambda x: x['officer_role'] == 'director', data['items']))
if director_count > 1:
line += [True]
elif director_count == 1:
line += [False]
else:
line = ['no directors'] * 3 + [''] * 12
return pd.DataFrame([line], columns=['title', 'first_name', 'surname', 'occupation', 'country_of_residence',
'nationality', 'appointed_on',
'address_premises', 'address_line_1', 'address_line_2',
'address_country', 'address_locality', 'address_postcode',
'address_region', 'multi_director'])
class Task:
def __init__(self, prefix, company_number):
self.prefix = prefix
self.company_number = company_number
def __call__(self):
"""Find officer and company data and combine and save it"""
comp_id = self.prefix + str(self.company_number)
print(comp_id)
try:
# initialise company class
comp = Company(comp_id)
# initialise officer class
off = Officers(comp_id)
# retrieve and concatonate
self.result = pd.concat([comp(), off()], axis=1)
except NameError:
# become poison pill once latest is done
self.result = None
def output(self):
return self.result
class Consumer(multiprocessing.Process):
"""Handle process and re-queue complete tasks"""
def __init__(self, waiting_queue, complete_queue):
multiprocessing.Process.__init__(self)
self.waiting_queue = waiting_queue
self.complete_queue = complete_queue
def run(self):
"""process tasks until queue is empty"""
proc_name = self.name
while True:
current_task = self.waiting_queue.get()
current_task()
if current_task.output() is None:
print('{}: Exiting, poison pill reached'.format(proc_name))
self.waiting_queue.task_done()
break
self.waiting_queue.task_done()
self.complete_queue.put(current_task)
print('{}: complete'.format(proc_name))
class Shepard:
"""Handle life of Consumers, Queues and Tasks"""
def __init__(self):
pass
def __call__(self, prefix, start_point):
# initialize queues
todo = multiprocessing.JoinableQueue()
finished = multiprocessing.JoinableQueue()
# start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(todo, finished) for i in range(num_consumers)]
for q in consumers:
q.start()
# decide on (max) end limit
start = int(start_point)
max_record_range = 1000
end = start + max_record_range
# Enqueue jobs
for i in range(start, end):
todo.put(Task(prefix, i))
print('Processes joining')
# wait for processes to join
for p in consumers:
p.join()
print('Processes joined')
# process results - UNFINISHED
pass
# return results - UNFINISHED
return 'results!'
if __name__ == '__main__':
# paths to data
data_directory = r'C:\Users\hdewinton\OneDrive - Advanced Payment Solutions\Python\Corporate DM\data'
base = r'\base'
# load start points:
init = {"England": 10926071, "Scotland": 574309, "Ireland": 647561}
# gather data for each catagory
master = Shepard()
ireland = master('NI', init['Ireland'])
scotland = master('SC', init['Scotland'])
england = master('', init['England'])
TL;DR - the consequence (getting stuck in limbo while the consumers fail to join) can be fixed by changing this:
finished = multiprocessing.JoinableQueue()
to this:
mananger = multiprocessing.Manager()
finished = mananger.Queue()
Details - "When an object is put on a queue, the object is pickled and a background thread later flushes the pickled data to an underlying pipe. This has some consequences which are a little surprising, but should not cause any practical difficulties – if they really bother you then you can instead use a queue created with a manager." from the documentation
The second queue, of finished items, triggers one of the aforementioned surprising consquences if a certain number of tasks are added to it. Below the limit there are no problems and above the limit the consequence occurs. This does not occur in the dummy because the second queue, while present, is not used. The limit depends on the size and complexity of the Task objects, so I recon this has something to do with the flushing of pickled data only occurring after a certain volume of data is reached - the volume of data triggers this consequence
Addendum - Another error also appears once the fix has been implemented: a pipe error occurs as the consumers of the todo queue are terminated before the queue
is empty leaving the pipe within the queue object with no connection object to send data to. This triggers a WinError 232. Not to worry though, the pipe error can be fixed by emptying the queue before exiting the consumers.
Simply add this to the consumers class run method:
while not self.waiting_queue.empty():
try:
self.waiting_queue.get(timeout=0.001)
except:
pass
self.waiting_queue.close()
this removes every element from the queue, make sure its after the main while loop and the pipe error should not occur because the consumers will empty the will queue before terminating.

Client timeout with asyncio.Protocol

I've been writing a MUD in python, using asyncio.Protocol, however I have a problem with when users close their client (typically a terminal, since you connect via telnet) without disconnecting properly.
The server doesn't recognize the user as disconnected, and they remain in the game.
The problem only occurs when the client is connected remotely (for some reason, maybe someone can explain...) it doesn't happen when connecting from the localhost.
Is there a neat way to check a user is still actually connected (without additional software client-side), or on failing that how do I incorporate a timeout?
My Protocol looks something like this currently:
class User(Protocol):
def connection_made(self, transport):
self.transport = transport
self.addr = transport.get_extra_info('peername')
self.authd = False
self.name = None
self.admin = False
self.room = None
self.table = None
self.db = None
self.flags = []
print("Connected: {}".format(self.addr))
server.connected.append(self)
actions['help'](self, ['welcome'])
self.get_prompt()
def data_received(self, data):
msg = data.decode().strip()
args = msg.split()
if self.authd is False:
actions['login'](self, args)
return
if msg:
if args[0] in self.db.aliases:
args[0] = str(self.db.aliases[args[0]])
msg = ' '.join(args)
args = msg.split()
if msg[0] in server.channels:
ch = db.session.query(db.models.Channel).get(msg[0])
if msg[1] =='#':
channels.send_to_channel(self, ch, msg[2:], do_emote=True)
else:
channels.send_to_channel(self, ch, msg[1:])
self.get_prompt()
return
if args[0] in actions:
if self.is_frozen():
self.send_to_self("You're frozen solid!")
else:
actions[args[0]](self, args[1:] if len(args) > 1 else None)
self.get_prompt()
return
self.send_to_self("Huh?")
else:
if self.table is not None:
actions['table'](self, None)
elif self.room is not None:
actions['look'](self, None)
def send_to_self(self, msg):
msg = "\r\n" + msg
msg = colourify(msg)
self.transport.write(msg.encode())
#staticmethod
def send_to_user(user, msg):
msg = "\r\n"+msg
msg = colourify(msg)
user.transport.write(msg.encode())
#staticmethod
def send_to_users(users, msg):
msg = "\r\n"+msg
msg = colourify(msg)
for user in users:
user.transport.write(msg.encode())
def connection_lost(self, ex):
print("Disconnected: {}".format(self.addr))
server.connected.remove(self)
if self.authd:
self.save()
server.users.remove(self)
self.room.occupants.remove(self)
Note: I've chopped a lot of superfluous stuff out. If you want the full code, it's here.
You may schedule a new timeout handler on every data_received() call (with cancelling previous timeout handler, sure). I found the approach too cumbersome.
Or, as an option, switch to asyncio streams -- you may use asyncio.wait_for or brand new not released yet asyncio.timeout.

Categories