Trying to consume messages with python multiprocessing

Trying to consume messages with python multiprocessing - python

I am not able to consume messages with the below code. I am able to consume if I just directly consOne.startLoop(). What am I missing here. Appreciate the help.
from confluent_kafka import Consumer, KafkaError, KafkaException, TopicPartition
from multiprocessing import Process
import sys
idlist = []
def setConfig(bootstrapServers, groupId, autoOffsetReset):
consumerConf = {}
consumerConf['bootstrap.servers'] = bootstrapServers
consumerConf['group.id'] = groupId
consumerConf['auto.offset.reset'] = autoOffsetReset
print(consumerConf)
return consumerConf
def createConsumer(consumerConf, topic):
consumer = Consumer(consumerConf)
consumer.subscribe([topic])
print("consumer subscribed to topic {}".format(topic))
return consumer
# self.consumer.assign([TopicPartition(topic, partition)])
def startLoop(consumer):
try:
while True:
message = consumer.poll(1.0)
if message is None:
print("none")
continue
elif message.error():
if message.error().code == KafkaError._PARTITION_EOF:
sys.stderr.write('EOF Partition - {} '.format(message.partition()))
else:
sys.stderr.write('Consumer Error on Topic - {} '.format(message.topic()))
sys.stderr.write('''-- topic - {}
-- partition - {}
-- offset - {}'''.format(
message.topic(), message.partition(), message.offset()))
else:
print('Received message: {}'.format(message.value().decode('utf-8')))
handleMessage(message.value())
except KeyboardInterrupt:
sys.stderr.write('Kafka Exception raised - {} '.format(message.topic()))
sys.exit(1)
finally:
consumer.close()
# body of the message or (message.vlue())
def handleMessage(body):
global idlist
idlist.append(body)
print(idlist)
if __name__ === '__main__':
config = setConfig('localhost:9092', groupId='group',
autoOffsetReset='smallest')
consOne = createConsumer(config, 'test')
# consOne.startLoop() Works!
processOne = Process(target=startLoop, args=(consOne, ), group=None)
# doesn't work :(
processOne.start()
processOne.join()
consumer = Consumer({'bootstrap.servers':'localhost:9092', 'group.id':'group', 'auto.offset.reset':'smallest'})
consumer.subscribe(['test'])
def startLoop():
try:
global consumer
print(consumer)
while True:
message = consumer.poll(1.0)
if message is None:
print("none")
continue
elif message.error():
if message.error().code == KafkaError._PARTITION_EOF:
sys.stderr.write('EOF Partition - {} '.format(message.partition()))
else:
sys.stderr.write('Consumer Error on Topic - {} '.format(message.topic()))
sys.stderr.write('''-- topic - {}
-- partition - {}
-- offset - {}'''.format(
message.topic(), message.partition(), message.offset()))
else:
print('Received message: {}'.format(message.value().decode('utf-8')))
# handleMessage(message.value())
except KeyboardInterrupt:
sys.stderr.write('Kafka Exception raised - {} '.format(message.topic()))
sys.exit(1)
finally:
consumer.close()
if __name__ == '__main__':
processOne = Process(target=startLoop, group=None)
# still consumes message with startLoop() but not with processOne.start()
# startLoop()
processOne.start()
processOne.join()

Probably you use multiprocessing in the wrong way. An example of the official document.
Make sure that the main module can be safely imported by a new Python interpreter without causing unintended side effects (such a starting a new process). Safe importing of main module | Programming guidelines
So, It is necessary to start a process in if __name__ == '__main__':.

Related

Winrt API Python getting Bluetooth signal strength

I am trying to measure Bluetooth signal strength using winrt APi in python using winsdk. My workflow is to measure Bluetooth Signal strength of a device that is already connected with my Windows machine. I followed the guideline from Pywinrt documentation found here:
Here is my code snippet:
import asyncio
import winsdk.windows.devices.enumeration as e
import winsdk.windows.devices.bluetooth as bl
async def scan():
sig_strength = "System.Devices.Aep.SignalStrength"
additionalProperties = [sig_strength]
watcher = e.DeviceInformation.create_watcher(bl.BluetoothDevice.get_device_selector(), additionalProperties)
received_queue = asyncio.Queue()
def added_w(device_watcher, device_info_update):
if(device_info_update.name == "my_device"):
print("found!")
for value, key in enumerate(device_info_update.properties):
if key == "System.Devices.Aep.SignalStrength":
print("signal strength: {}".format(value) )
def updated_w(device_watcher, device_info_update):
print("update for {} with kind {}".format(device_info_update.id, device_info_update.kind))
def removed_w(device_watcher, device_info_update):
pass
def stopped_w(device_watcher, device_info_update):
pass
received_token = watcher.add_added(
lambda s, e: event_loop.call_soon_threadsafe(added_w, s, e)
)
updated_token = watcher.add_updated(
lambda s, e: event_loop.call_soon_threadsafe(updated_w, s, e)
)
removed_token = watcher.add_removed(
lambda s, e: event_loop.call_soon_threadsafe(removed_w, s, e)
)
event_loop = asyncio.get_running_loop()
stopped_future = event_loop.create_future()
def handle_stopped(sender, event_args):
stopped_future.set_result(event_args)
try:
print("scanning...")
watcher.start()
# this is the consumer for the received event queue
async def print_received():
while True:
event_args = await received_queue.get()
print(
"received:",
event_args.bluetooth_address.to_bytes(6, "big").hex(":"),
event_args.raw_signal_strength_in_d_bm, "dBm",
)
printer_task = asyncio.create_task(print_received())
# since the print task is an infinite loop, we have to cancel it when we don't need it anymore
stopped_future.add_done_callback(printer_task.cancel)
# scan for 30 seconds or until an unexpected stopped event (due to error)
done, pending = await asyncio.wait(
[stopped_future, printer_task], timeout=30, return_when=asyncio.FIRST_COMPLETED
)
if stopped_future in done:
print("unexpected stopped event", stopped_future.result().error)
else:
print("stopping...")
watcher.stop()
await stopped_future
finally:
# event handler are removed in a finally block to ensure we don't leak
watcher.remove_received(received_token)
watcher.remove_stopped(handle_stopped)
asyncio.run(scan())
However, I only get a fixed RSSI value 8 in my print in added_w function.
Any help on potential solution would be greatly appreciated!

Can´t consume messages from topic

I have the following code:
from confluent_kafka.admin import AdminClient, NewTopic
a = AdminClient({'bootstrap.servers': 'localhost:9092'})
new_topics = [NewTopic(topic, num_partitions=3, replication_factor=1) for topic in ["topic1", "topic2"]]
fs = a.create_topics(new_topics)
for topic, f in fs.items():
try:
f.result()
print("Topic {} created".format(topic))
except Exception as e:
print("Failed to create topic {}: {}".format(topic, e))
Creating the topics worked fine.
This is my producer:
from confluent_kafka import Producer
p = Producer({'bootstrap.servers': 'localhost:9092'})
some_data_source = ["hello", "wuff"]
def delivery_report(err, msg):
if err is not None:
print('Message delivery failed: {}'.format(err))
else:
print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
for data in some_data_source:
p.poll(0)
p.produce('mytopic', data.encode('utf-8'), callback=delivery_report)
p.flush()
Message delivered to mytopic [0]
Message delivered to mytopic [0]
Consumer:
from confluent_kafka import Consumer
c = Consumer({
'bootstrap.servers': 'localhost:9092',
'group.id': 'mygroup',
'auto.offset.reset': 'earliest'
})
c.subscribe(['topic1'])
while True:
msg = c.poll(1.0)
print(msg)
if msg is None:
continue
if msg.error():
print("Consumer error: {}".format(msg.error()))
continue
print('Received message: {}'.format(msg.value().decode('utf-8')))
c.close()
When I subscribe to the topic (which works), I only get None every second. Am I doing something wrong here? Does it has to do something with 'group.id': 'mygroup'? Can anyone help me?

Your producer code is writing to mytopic topic. Which doesn't match your create script or what your consumer has subscribed to.
Also, if you don't want it to print None, then move the print statement inside the if statement since poll function can return None
As commented, you may also want to try further debugging with CLI tools

How to add configuration setting for sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication in python script

Need some help to set the configuration for sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication.
We are using confluent Kafka here, there are two scripts, one a python script and the second one is a bash script which calls the python one. You can find the script below.
Thanks for the help in advance!
import json
import os
import string
import random
import socket
import uuid
import re
from datetime import datetime
import time
import hashlib
import math
import sys
from functools import cache
from confluent_kafka import Producer, KafkaError, KafkaException
topic_name = os.environ['TOPIC_NAME']
partition_count = int(os.environ['PARTITION_COUNT'])
message_key_template = json.loads(os.environ['KEY_TEMPLATE'])
message_value_template = json.loads(os.environ['VALUE_TEMPLATE'])
message_header_template = json.loads(os.environ['HEADER_TEMPLATE'])
bootstrap_servers = os.environ['BOOTSTRAP_SERVERS']
perf_counter_batch_size = int(os.environ.get('PERF_COUNTER_BATCH_SIZE', 100))
messages_per_aggregate = int(os.environ.get('MESSAGES_PER_AGGREGATE', 1))
max_message_count = int(os.environ.get('MAX_MESSAGE_COUNT', sys.maxsize))
def error_cb(err):
""" The error callback is used for generic client errors. These
errors are generally to be considered informational as the client will
automatically try to recover from all errors, and no extra action
is typically required by the application.
For this example however, we terminate the application if the client
is unable to connect to any broker (_ALL_BROKERS_DOWN) and on
authentication errors (_AUTHENTICATION). """
print("Client error: {}".format(err))
if err.code() == KafkaError._ALL_BROKERS_DOWN or \
err.code() == KafkaError._AUTHENTICATION:
# Any exception raised from this callback will be re-raised from the
# triggering flush() or poll() call.
raise KafkaException(err)
def acked(err, msg):
if err is not None:
print("Failed to send message: %s: %s" % (str(msg), str(err)))
producer_configs = {
'bootstrap.servers': bootstrap_servers,
'client.id': socket.gethostname(),
'error_cb': error_cb
}
# TODO: Need to support sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication.
# TODO: Need to support truststores for connecting to private DCs.
producer = Producer(producer_configs)
# generates a random value if it is not cached in the template_values dictionary
def get_templated_value(term, template_values):
if not term in template_values:
template_values[term] = str(uuid.uuid4())
return template_values[term]
def fill_template_value(value, template_values):
str_value = str(value)
template_regex = '{{(.+?)}}'
templated_terms = re.findall(template_regex, str_value)
for term in templated_terms:
str_value = str_value.replace(f"{{{{{term}}}}}", get_templated_value(term, template_values))
return str_value
def fill_template(template, templated_terms):
# TODO: Need to address metadata field, as it's treated as a string instead of a nested object.
return {field: fill_template_value(value, templated_terms) for field, value in template.items()}
#cache
def get_partition(lock_id):
bits = 128
bucket_size = 2**bits / partition_count
partition = (int(hashlib.md5(lock_id.encode('utf-8')).hexdigest(), 16) / bucket_size)
return math.floor(partition)
sequence_number = int(time.time() * 1000)
sequence_number = 0
message_count = 0
producing = True
start_time = time.perf_counter()
aggregate_message_counter = 0
# cache for templated term values so that they match across the different templates
templated_values = {}
try:
while producing:
sequence_number += 1
aggregate_message_counter += 1
message_count += 1
if aggregate_message_counter % messages_per_aggregate == 0:
# reset templated values
templated_values = {}
else:
for term in list(templated_values):
if term not in ['aggregateId', 'tenantId']:
del(templated_values[term])
# Fill in templated field values
message_key = fill_template(message_key_template, templated_values)
message_value = fill_template(message_value_template, templated_values)
message_header = fill_template(message_header_template, templated_values)
ts = datetime.utcnow().isoformat()[:-3]+'Z'
message_header['timestamp'] = ts
message_header['sequence_number'] = str(sequence_number)
message_value['timestamp'] = ts
message_value['sequenceNumber'] = sequence_number
lock_id = message_header['lock_id']
partition = get_partition(lock_id) # partition by lock_id, since key could be random, but a given aggregate_id should ALWAYS resolve to the same partition, regardless of key.
# Send message
producer.produce(topic_name, partition=partition, key=json.dumps(message_key), value=json.dumps(message_value), headers=message_header, callback=acked)
if sequence_number % perf_counter_batch_size == 0:
producer.flush()
end_time = time.perf_counter()
total_duration = end_time - start_time
messages_per_second=(perf_counter_batch_size/total_duration)
print(f'{messages_per_second} messages/second')
# reset start time
start_time = time.perf_counter()
if message_count >= max_message_count:
break
except Exception as e:
print(f'ERROR: %s' % e)
sys.exit(1)
finally:
producer.flush()

IBM Stats subscription topic always returns Reason 2033: FAILED: MQRC_NO_MSG_AVAILABLE

I am attempting to port some old java code to python.
I am using pymqi to connect to a queue manager and query for all messageflow statistics topics using the topic string: $SYS/Broker/+/StatisticsAccounting/Archive/#
When using the existing java program messages are read from the topic without issue.
When using the new python code it is able to connect and query the topic without issue but always gives the message
Reason 2033: FAILED: MQRC_NO_MSG_AVAILABLE
Stats messages are published by the broker for each messageflow every 10 minutes, and I have left the new code running for over 30minutes, never having received a message.
I've also tried setting
get_opts['WaitInterval'] = pymqi.CMQC.MQWI_UNLIMITED
and sitting around for 20minutes rather than using a loop, but no luck.
Is there any IIB server config that might be impacting the messages that I am able to see, or are there other options I should be using within the client?
import pymqi
queue_manager = 'MYQM'
channel = 'MYAPP.SVRCONN'
host = 'MYHOST'
port = 'MYPORT'
topic_string = '$SYS/Broker/+/StatisticsAccounting/Archive/#'
conn_info = '%s(%s)' % (host, port)
user = ""
password = ""
qmgr = pymqi.QueueManager(None)
qmgr.connect_tcp_client(queue_manager, pymqi.CD(), channel, conn_info, user, password)
sub_desc = pymqi.SD()
sub_desc['Options'] = pymqi.CMQC.MQSO_CREATE + pymqi.CMQC.MQSO_RESUME + pymqi.CMQC.MQSO_MANAGED
sub_desc.set_vs('SubName', 'apptest')
sub_desc.set_vs('ObjectString', topic_string)
sub = pymqi.Subscription(qmgr)
sub.sub(sub_desc=sub_desc)
get_opts = pymqi.GMO(Options=pymqi.CMQC.MQGMO_WAIT)
get_opts['WaitInterval'] = 10000
md = pymqi.md()
keep_running = True
while keep_running:
try:
# Reset the MsgId, CorrelId & GroupId so that we can reuse
# the same 'md' object again.
md.MsgId = pymqi.CMQC.MQMI_NONE
md.CorrelId = pymqi.CMQC.MQCI_NONE
md.GroupId = pymqi.CMQC.MQGI_NONE
message = sub.get(None, md, get_opts)
print('Have message from Queue')
print(message)
except pymqi.MQMIError as e:
if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
print("no message?")
print(e)
pass
else:
# Some other error condition.
raise
except (UnicodeDecodeError, ValueError) as e:
print('Message is not valid json')
print(e)
print(message)
continue
except KeyboardInterrupt:
print('Have received a keyboard interrupt')
keep_running = False
sub.close(sub_close_options=0,close_sub_queue=True)
qmgr.disconnect()

Processing messages within a time window using Kafka

This code:
from confluent_kafka import Consumer, KafkaError
settings = {
'bootstrap.servers': 'localhost:9092',
'group.id': 'mygroup',
'client.id': 'client-1',
'enable.auto.commit': True,
'session.timeout.ms': 6000,
'default.topic.config': {'auto.offset.reset': 'smallest'}
}
c = Consumer(settings)
c.subscribe(['mytopic'])
try:
while True:
msg = c.poll(0.1)
if msg is None:
continue
elif not msg.error():
print('Received message: {0}'.format(msg.value()))
elif msg.error().code() == KafkaError._PARTITION_EOF:
print('End of partition reached {0}/{1}'
.format(msg.topic(), msg.partition()))
else:
print('Error occured: {0}'.format(msg.error().str()))
except KeyboardInterrupt:
pass
finally:
c.close()
is taken from https://www.confluent.io/blog/introduction-to-apache-kafka-for-python-programmers
I'm attempting to update this code so that the topic is polled every second but stats for all messages within a window of 1 minute are processed.
This is how I plan to solve:
replace msg = c.poll(0.1) with msg = c.poll(1)
introduce a new variable i which will maintain the current number of messages for the given minute.
Create a new class SharedQueue to store the data to be processed:
class SharedQueue:
data_queue = deque(maxlen=1000000)
def append_data_queue(self, msg):
self.data_queue.append(msg)
def get_data_queue(self, record_key, record_value, timestamp):
return self.append_data_queue
With the changes, the code becomes:
from confluent_kafka import Consumer, KafkaError
settings = {
'bootstrap.servers': 'localhost:9092',
'group.id': 'mygroup',
'client.id': 'client-1',
'enable.auto.commit': True,
'session.timeout.ms': 6000,
'default.topic.config': {'auto.offset.reset': 'smallest'}
}
sq = SharedQueue()
c = Consumer(settings)
c.subscribe(['mytopic'])
try:
i = 0
while True:
i = i + 1
msg = c.poll(1)
sq.append_data_queue(msg)
if msg is None:
continue
elif not msg.error():
print('Received message: {0}'.format(msg.value()))
elif msg.error().code() == KafkaError._PARTITION_EOF:
print('End of partition reached {0}/{1}'
.format(msg.topic(), msg.partition()))
else:
print('Error occured: {0}'.format(msg.error().str()))
if i == 60:
//process the last 60 items of the queue.
i = 0
except KeyboardInterrupt:
pass
But this is not a good solution as poll can return immediately if there are records available.
How can I implement processing of messages received within a time window? Am I on the right track implementing a queue?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Trying to consume messages with python multiprocessing - python

Related

Winrt API Python getting Bluetooth signal strength

Can´t consume messages from topic

How to add configuration setting for sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication in python script

IBM Stats subscription topic always returns Reason 2033: FAILED: MQRC_NO_MSG_AVAILABLE

Processing messages within a time window using Kafka

Categories

Resources