Can´t consume messages from topic - python

I have the following code:
from confluent_kafka.admin import AdminClient, NewTopic
a = AdminClient({'bootstrap.servers': 'localhost:9092'})
new_topics = [NewTopic(topic, num_partitions=3, replication_factor=1) for topic in ["topic1", "topic2"]]
fs = a.create_topics(new_topics)
for topic, f in fs.items():
try:
f.result()
print("Topic {} created".format(topic))
except Exception as e:
print("Failed to create topic {}: {}".format(topic, e))
Creating the topics worked fine.
This is my producer:
from confluent_kafka import Producer
p = Producer({'bootstrap.servers': 'localhost:9092'})
some_data_source = ["hello", "wuff"]
def delivery_report(err, msg):
if err is not None:
print('Message delivery failed: {}'.format(err))
else:
print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
for data in some_data_source:
p.poll(0)
p.produce('mytopic', data.encode('utf-8'), callback=delivery_report)
p.flush()
Message delivered to mytopic [0]
Message delivered to mytopic [0]
Consumer:
from confluent_kafka import Consumer
c = Consumer({
'bootstrap.servers': 'localhost:9092',
'group.id': 'mygroup',
'auto.offset.reset': 'earliest'
})
c.subscribe(['topic1'])
while True:
msg = c.poll(1.0)
print(msg)
if msg is None:
continue
if msg.error():
print("Consumer error: {}".format(msg.error()))
continue
print('Received message: {}'.format(msg.value().decode('utf-8')))
c.close()
When I subscribe to the topic (which works), I only get None every second. Am I doing something wrong here? Does it has to do something with 'group.id': 'mygroup'? Can anyone help me?

Your producer code is writing to mytopic topic. Which doesn't match your create script or what your consumer has subscribed to.
Also, if you don't want it to print None, then move the print statement inside the if statement since poll function can return None
As commented, you may also want to try further debugging with CLI tools

Related

How to implement exactly-once on Apache Kafka using Python?

I am trying to implement an exactly-once concept using confluent kafka library. But I couldn't succeed. Here is my producer code:
from confluent_kafka import Producer, SerializingProducer
import socket, json, random, time
conf = {'bootstrap.servers': "localhost:9092,localhost:9093,localhost:9094",
'client.id': socket.gethostname(),
'message.send.max.retries':2,
'enable.idempotence':True,
'isolation.level':'read_committed'}
producer = Producer(conf)
producer.produce("test_topic2", key="pc", value="ok")
producer.flush()
Here is my consumer code:
from confluent_kafka import Consumer, TopicPartition
conf = {'bootstrap.servers': 'localhost:9092,localhost:9093,localhost:9094',
'group.id': "foo2",
'auto.offset.reset': 'earliest',
'enable.idempotence':True,
'isolation.level':'read_committed'}
consumer = Consumer(conf)
topic = "test_topic2"
#consumer.subscribe([topic],on_assign=my_on_assign)
consumer.subscribe([topic])
# Poll for new messages from Kafka and print them.
try:
while True:
msg = consumer.poll()
if msg is None:
print("Waiting...")
elif msg.error():
print("ERROR: %s".format(msg.error()))
else:
# Extract the (optional) key and value, and print.
print(f"{msg.topic()} key = {msg.key()} value = {msg.value().decode('utf-8'):12}\
{msg.partition()} {msg.offset()} ")
except KeyboardInterrupt:
pass
finally:
# Leave group and commit final offsets
consumer.close()
And this is my output(respectively topic name, key name, value name, partition and offset):
test_topic2 key = b'pc' value = ok 2 3
test_topic2 key = b'pc' value = ok 2 4
As I know when I send the same key and value it needs to be arranged to show only one of them. Here for example I need to only get the message which offset is 3. So my question is am I implementing wrong architecture on my code or I understand the concept wrongly ? Thanks in advance.

IBM Stats subscription topic always returns Reason 2033: FAILED: MQRC_NO_MSG_AVAILABLE

I am attempting to port some old java code to python.
I am using pymqi to connect to a queue manager and query for all messageflow statistics topics using the topic string: $SYS/Broker/+/StatisticsAccounting/Archive/#
When using the existing java program messages are read from the topic without issue.
When using the new python code it is able to connect and query the topic without issue but always gives the message
Reason 2033: FAILED: MQRC_NO_MSG_AVAILABLE
Stats messages are published by the broker for each messageflow every 10 minutes, and I have left the new code running for over 30minutes, never having received a message.
I've also tried setting
get_opts['WaitInterval'] = pymqi.CMQC.MQWI_UNLIMITED
and sitting around for 20minutes rather than using a loop, but no luck.
Is there any IIB server config that might be impacting the messages that I am able to see, or are there other options I should be using within the client?
import pymqi
queue_manager = 'MYQM'
channel = 'MYAPP.SVRCONN'
host = 'MYHOST'
port = 'MYPORT'
topic_string = '$SYS/Broker/+/StatisticsAccounting/Archive/#'
conn_info = '%s(%s)' % (host, port)
user = ""
password = ""
qmgr = pymqi.QueueManager(None)
qmgr.connect_tcp_client(queue_manager, pymqi.CD(), channel, conn_info, user, password)
sub_desc = pymqi.SD()
sub_desc['Options'] = pymqi.CMQC.MQSO_CREATE + pymqi.CMQC.MQSO_RESUME + pymqi.CMQC.MQSO_MANAGED
sub_desc.set_vs('SubName', 'apptest')
sub_desc.set_vs('ObjectString', topic_string)
sub = pymqi.Subscription(qmgr)
sub.sub(sub_desc=sub_desc)
get_opts = pymqi.GMO(Options=pymqi.CMQC.MQGMO_WAIT)
get_opts['WaitInterval'] = 10000
md = pymqi.md()
keep_running = True
while keep_running:
try:
# Reset the MsgId, CorrelId & GroupId so that we can reuse
# the same 'md' object again.
md.MsgId = pymqi.CMQC.MQMI_NONE
md.CorrelId = pymqi.CMQC.MQCI_NONE
md.GroupId = pymqi.CMQC.MQGI_NONE
message = sub.get(None, md, get_opts)
print('Have message from Queue')
print(message)
except pymqi.MQMIError as e:
if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
print("no message?")
print(e)
pass
else:
# Some other error condition.
raise
except (UnicodeDecodeError, ValueError) as e:
print('Message is not valid json')
print(e)
print(message)
continue
except KeyboardInterrupt:
print('Have received a keyboard interrupt')
keep_running = False
sub.close(sub_close_options=0,close_sub_queue=True)
qmgr.disconnect()

Processing messages within a time window using Kafka

This code:
from confluent_kafka import Consumer, KafkaError
settings = {
'bootstrap.servers': 'localhost:9092',
'group.id': 'mygroup',
'client.id': 'client-1',
'enable.auto.commit': True,
'session.timeout.ms': 6000,
'default.topic.config': {'auto.offset.reset': 'smallest'}
}
c = Consumer(settings)
c.subscribe(['mytopic'])
try:
while True:
msg = c.poll(0.1)
if msg is None:
continue
elif not msg.error():
print('Received message: {0}'.format(msg.value()))
elif msg.error().code() == KafkaError._PARTITION_EOF:
print('End of partition reached {0}/{1}'
.format(msg.topic(), msg.partition()))
else:
print('Error occured: {0}'.format(msg.error().str()))
except KeyboardInterrupt:
pass
finally:
c.close()
is taken from https://www.confluent.io/blog/introduction-to-apache-kafka-for-python-programmers
I'm attempting to update this code so that the topic is polled every second but stats for all messages within a window of 1 minute are processed.
This is how I plan to solve:
replace msg = c.poll(0.1) with msg = c.poll(1)
introduce a new variable i which will maintain the current number of messages for the given minute.
Create a new class SharedQueue to store the data to be processed:
class SharedQueue:
data_queue = deque(maxlen=1000000)
def append_data_queue(self, msg):
self.data_queue.append(msg)
def get_data_queue(self, record_key, record_value, timestamp):
return self.append_data_queue
With the changes, the code becomes:
from confluent_kafka import Consumer, KafkaError
settings = {
'bootstrap.servers': 'localhost:9092',
'group.id': 'mygroup',
'client.id': 'client-1',
'enable.auto.commit': True,
'session.timeout.ms': 6000,
'default.topic.config': {'auto.offset.reset': 'smallest'}
}
sq = SharedQueue()
c = Consumer(settings)
c.subscribe(['mytopic'])
try:
i = 0
while True:
i = i + 1
msg = c.poll(1)
sq.append_data_queue(msg)
if msg is None:
continue
elif not msg.error():
print('Received message: {0}'.format(msg.value()))
elif msg.error().code() == KafkaError._PARTITION_EOF:
print('End of partition reached {0}/{1}'
.format(msg.topic(), msg.partition()))
else:
print('Error occured: {0}'.format(msg.error().str()))
if i == 60:
//process the last 60 items of the queue.
i = 0
except KeyboardInterrupt:
pass
But this is not a good solution as poll can return immediately if there are records available.
How can I implement processing of messages received within a time window? Am I on the right track implementing a queue?

Trying to consume messages with python multiprocessing

I am not able to consume messages with the below code. I am able to consume if I just directly consOne.startLoop(). What am I missing here. Appreciate the help.
from confluent_kafka import Consumer, KafkaError, KafkaException, TopicPartition
from multiprocessing import Process
import sys
idlist = []
def setConfig(bootstrapServers, groupId, autoOffsetReset):
consumerConf = {}
consumerConf['bootstrap.servers'] = bootstrapServers
consumerConf['group.id'] = groupId
consumerConf['auto.offset.reset'] = autoOffsetReset
print(consumerConf)
return consumerConf
def createConsumer(consumerConf, topic):
consumer = Consumer(consumerConf)
consumer.subscribe([topic])
print("consumer subscribed to topic {}".format(topic))
return consumer
# self.consumer.assign([TopicPartition(topic, partition)])
def startLoop(consumer):
try:
while True:
message = consumer.poll(1.0)
if message is None:
print("none")
continue
elif message.error():
if message.error().code == KafkaError._PARTITION_EOF:
sys.stderr.write('EOF Partition - {} '.format(message.partition()))
else:
sys.stderr.write('Consumer Error on Topic - {} '.format(message.topic()))
sys.stderr.write('''-- topic - {}
-- partition - {}
-- offset - {}'''.format(
message.topic(), message.partition(), message.offset()))
else:
print('Received message: {}'.format(message.value().decode('utf-8')))
handleMessage(message.value())
except KeyboardInterrupt:
sys.stderr.write('Kafka Exception raised - {} '.format(message.topic()))
sys.exit(1)
finally:
consumer.close()
# body of the message or (message.vlue())
def handleMessage(body):
global idlist
idlist.append(body)
print(idlist)
if __name__ === '__main__':
config = setConfig('localhost:9092', groupId='group',
autoOffsetReset='smallest')
consOne = createConsumer(config, 'test')
# consOne.startLoop() Works!
processOne = Process(target=startLoop, args=(consOne, ), group=None)
# doesn't work :(
processOne.start()
processOne.join()
consumer = Consumer({'bootstrap.servers':'localhost:9092', 'group.id':'group', 'auto.offset.reset':'smallest'})
consumer.subscribe(['test'])
def startLoop():
try:
global consumer
print(consumer)
while True:
message = consumer.poll(1.0)
if message is None:
print("none")
continue
elif message.error():
if message.error().code == KafkaError._PARTITION_EOF:
sys.stderr.write('EOF Partition - {} '.format(message.partition()))
else:
sys.stderr.write('Consumer Error on Topic - {} '.format(message.topic()))
sys.stderr.write('''-- topic - {}
-- partition - {}
-- offset - {}'''.format(
message.topic(), message.partition(), message.offset()))
else:
print('Received message: {}'.format(message.value().decode('utf-8')))
# handleMessage(message.value())
except KeyboardInterrupt:
sys.stderr.write('Kafka Exception raised - {} '.format(message.topic()))
sys.exit(1)
finally:
consumer.close()
if __name__ == '__main__':
processOne = Process(target=startLoop, group=None)
# still consumes message with startLoop() but not with processOne.start()
# startLoop()
processOne.start()
processOne.join()
Probably you use multiprocessing in the wrong way. An example of the official document.
Make sure that the main module can be safely imported by a new Python interpreter without causing unintended side effects (such a starting a new process). Safe importing of main module | Programming guidelines
So, It is necessary to start a process in if __name__ == '__main__':.

Consuming and replying to separate queues | Pika implementation

I am struggling to solve my issue, hope anyone from the community can help me here.
Our requirement is locked and can't be changed as the producer publishing the queues is controlled by a different team.
Producer which is written in JAVA declares three queues (TASK, RESPONSE, TASK_RESPONSE) and listens on them with the help of spring framework.
A hashmap is sent to the TASK and TASK_RESPONSE queue from the java AMQP client (Producer).
We need to consume these hashmaps and send the responses as follows.
If the queue TASK is processed, the response needs to be sent on RESPONSE queue incrementally.
If the queue TASK_RESPONSE is processed, the response needs to be sent on TASK_RESPONSE queue incrementally (RPC mode).
Now, we need to consume and publish this in python since we need to do some background processing on the tasks.
I tried to work with celery and dramatiq, but was not able to figure out how it can be done with them, so I tried writing myself (with the help of tutorials available online)
Problem is, I am able to consume the messages but not able to reply_to the RESPONSE queue. Here is my code.
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
import pika
import datetime
import logging
import json
from logging import StreamHandler
from time import sleep
from random import randint
from pika import SelectConnection
from settings import *
logging.basicConfig(handlers=[StreamHandler()], level=logging.INFO, format=logging.BASIC_FORMAT)
_logger = logging.getLogger(__name__)
class QueueConsumer(object):
"""The consumer class to manage connections to the AMQP server/queue"""
def __init__(self, queue, logger, parameters, thread_id=0):
self.channel = None
self.connection = None
self.queue_name_task = queue['task']
self.queue_name_response = queue['response']
self.logger = logger
self.consumer_id = 'Consumer Thread: %d' % (thread_id,)
self.parameters = pika.ConnectionParameters(**parameters)
def consume(self):
try:
self.connection = SelectConnection(parameters=self.parameters, on_open_callback=self._on_connected)
self.connection.ioloop.start()
except Exception as e:
self.logger.error('{} {}'.format(self.consumer_id, str(e)))
self.connection.close()
self.connection.ioloop.start()
def _on_connected(self, connection):
connection.channel(on_open_callback=self._on_channel_open)
def _on_channel_open(self, channel):
self.channel = channel
try:
# Declare Task Queue
self.channel.queue_declare(queue=self.queue_name_task,
exclusive=False,
durable=True,
auto_delete=False,
callback=self._on_queue_declared)
self.logger.info("{} Opened Channel....".format(self.consumer_id))
# Declare Task Response Queue
self.channel.queue_declare(queue=self.queue_name_response,
exclusive=False,
durable=True,
auto_delete=False)
self.logger.info("{} Opened Channel....".format(self.consumer_id))
except Exception as e:
self.logger.error('{} {}'.format(self.consumer_id, str(e)))
def _on_queue_declared(self, frame):
self.logger.debug('{} ... declaring queue'.format(self.consumer_id))
self.channel.basic_qos(prefetch_count=1)
try:
self.channel.basic_consume(queue=self.queue_name_task,
on_message_callback=self.handle_delivery,
auto_ack=True)
self.logger.info("{} Declared queue...".format(self.consumer_id))
except Exception as e:
self.logger.error('{} crashing:--> {}'.format(self.consumer_id, str(e)))
def handle_delivery(self, channel, method, header, body):
try:
start_time = datetime.datetime.now()
_logger.info("Received...")
_logger.info("Content: %s" % body)
req = json.loads(self.decode(body))
# Do something
sleep(randint(10, 20))
time_taken = datetime.datetime.now() - start_time
log_msg = "[{}] Time Taken: {}.{}".format(req['bar']['baz'], time_taken.seconds, time_taken.microseconds)
_logger.info(log_msg)
# Publish the result to another queue.
try:
self.channel.basic_publish(exchange='',
routing_key=self.queue_name_response,
properties=pika.BasicProperties(),
body=log_msg)
_logger.info("Message Published...\t(%s)" % self.queue_name_response)
except Exception as e:
self.logger.error('{} Message publishing failed:--> {}'.format(self.consumer_id, str(e)))
except Exception as err:
_logger.exception(err)
def decode(self, body):
try:
_body = body.decode('utf-8')
except AttributeError:
_body = body
return _body
if __name__ == "__main__":
pika_parameters = OrderedDict([
('host', TF_BROKER_HOST),
('port', TF_BROKER_PORT),
('virtual_host', TF_BROKER_VHOST)
])
queue = {'task': TF_IAAS_TASK_QUEUE, 'response': TF_IAAS_REPLY_QUEUE}
try:
with ThreadPoolExecutor(max_workers=TF_IAAS_THREAD_SIZE, thread_name_prefix=TF_IAAS_THREAD_PREFIX) as executor:
start = 1
for thread_id in range(start, (TF_IAAS_THREAD_SIZE + start)):
executor.submit(QueueConsumer(queue, _logger, pika_parameters, thread_id).consume)
except Exception as err:
_logger.exception(err)
Publish Messages On RabbitMQ
import pika
import json
import random
import datetime
from faker import Faker
from random import randint
fake = Faker('en_US')
if __name__ == '__main__':
try:
connection = pika.BlockingConnection(pika.ConnectionParameters(
host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='tf_task', durable=True)
started_at = datetime.datetime.now()
properties = pika.BasicProperties(delivery_mode=2)
for i in range(0, 10000):
body = {
'foo': randint(i, i+100),
'bar': {
'baz': fake.name(),
'poo': float(random.randrange(155+i, 389+i))/100
}
}
channel.basic_publish(exchange='',
routing_key='tf_task',
body=json.dumps(body),
properties=properties)
if i%10000 == 0:
duration = datetime.datetime.now() - started_at
print(i, duration.total_seconds())
print(" [x] Sent 'Hello World!'")
connection.close()
now = datetime.datetime.now()
duration = now - started_at
print(duration.total_seconds())
except Exception as e:
print(e)

Categories