how to stop publishing kafka producer after x minutes - python

I am trying to stop publishing the tweets in Kafka producer python script after x minutes.
Below is main code.
I have tried time.sleep(), foo function with terminate(), etc are not working.
Please suggest to run code for x minutes and then stop(sys.exit()).
# twitter authorization
auth = OAuthHandler(API_KEY, API_KEY_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
# init tweepy
api = tweepy.API(auth)
# init kafka producer
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
value_serializer=lambda x: dumps(x).encode('utf-8'),
api_version=(0, 10, 1))
# Step 1: Creating a StreamListener: override tweepy.StreamListener to add logic to on_status
class MyStreamListener(tweepy.Stream):
def on_status(self, tweet):
length = len(tweet.text.split(' '))
if (tweet.lang != 'en') or (length <= 10):
pass
print("==filtered==")
else:
message = {
"text": tweet.text,
"created_at": process_time(tweet.created_at),
}
# write to kafka topic
producer.send(topic_name, value=message)
#NEED TO ADD code to publish producer only for 5 minutes and exit()
sys.exit()
# Step 2: Creating a Stream
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
# Step 3: Starting a Stream
myStream.filter(track=keywords_to_track)

The solution really has nothing to do with Kafka.
Track the time you start the listener and when you want it to end.
import sys
from datetime import datetime, timedelta
class MyStreamListener(tweepy.Stream):
def __init__(self, stop_at):
this.stop_at = stop_at
def on_status(self, tweet):
if datetime.now() >= stop_at:
print("Reached time limit!")
sys.exit()
else:
# process the tweet
end = datetime.now() + timedelta(minutes=5)
myStreamListener = MyStreamListener(end)
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)

Related

How to add configuration setting for sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication in python script

Need some help to set the configuration for sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication.
We are using confluent Kafka here, there are two scripts, one a python script and the second one is a bash script which calls the python one. You can find the script below.
Thanks for the help in advance!
import json
import os
import string
import random
import socket
import uuid
import re
from datetime import datetime
import time
import hashlib
import math
import sys
from functools import cache
from confluent_kafka import Producer, KafkaError, KafkaException
topic_name = os.environ['TOPIC_NAME']
partition_count = int(os.environ['PARTITION_COUNT'])
message_key_template = json.loads(os.environ['KEY_TEMPLATE'])
message_value_template = json.loads(os.environ['VALUE_TEMPLATE'])
message_header_template = json.loads(os.environ['HEADER_TEMPLATE'])
bootstrap_servers = os.environ['BOOTSTRAP_SERVERS']
perf_counter_batch_size = int(os.environ.get('PERF_COUNTER_BATCH_SIZE', 100))
messages_per_aggregate = int(os.environ.get('MESSAGES_PER_AGGREGATE', 1))
max_message_count = int(os.environ.get('MAX_MESSAGE_COUNT', sys.maxsize))
def error_cb(err):
""" The error callback is used for generic client errors. These
errors are generally to be considered informational as the client will
automatically try to recover from all errors, and no extra action
is typically required by the application.
For this example however, we terminate the application if the client
is unable to connect to any broker (_ALL_BROKERS_DOWN) and on
authentication errors (_AUTHENTICATION). """
print("Client error: {}".format(err))
if err.code() == KafkaError._ALL_BROKERS_DOWN or \
err.code() == KafkaError._AUTHENTICATION:
# Any exception raised from this callback will be re-raised from the
# triggering flush() or poll() call.
raise KafkaException(err)
def acked(err, msg):
if err is not None:
print("Failed to send message: %s: %s" % (str(msg), str(err)))
producer_configs = {
'bootstrap.servers': bootstrap_servers,
'client.id': socket.gethostname(),
'error_cb': error_cb
}
# TODO: Need to support sasl.mechanism PLAIN (API) and GSSAPI (Kerberos) authentication.
# TODO: Need to support truststores for connecting to private DCs.
producer = Producer(producer_configs)
# generates a random value if it is not cached in the template_values dictionary
def get_templated_value(term, template_values):
if not term in template_values:
template_values[term] = str(uuid.uuid4())
return template_values[term]
def fill_template_value(value, template_values):
str_value = str(value)
template_regex = '{{(.+?)}}'
templated_terms = re.findall(template_regex, str_value)
for term in templated_terms:
str_value = str_value.replace(f"{{{{{term}}}}}", get_templated_value(term, template_values))
return str_value
def fill_template(template, templated_terms):
# TODO: Need to address metadata field, as it's treated as a string instead of a nested object.
return {field: fill_template_value(value, templated_terms) for field, value in template.items()}
#cache
def get_partition(lock_id):
bits = 128
bucket_size = 2**bits / partition_count
partition = (int(hashlib.md5(lock_id.encode('utf-8')).hexdigest(), 16) / bucket_size)
return math.floor(partition)
sequence_number = int(time.time() * 1000)
sequence_number = 0
message_count = 0
producing = True
start_time = time.perf_counter()
aggregate_message_counter = 0
# cache for templated term values so that they match across the different templates
templated_values = {}
try:
while producing:
sequence_number += 1
aggregate_message_counter += 1
message_count += 1
if aggregate_message_counter % messages_per_aggregate == 0:
# reset templated values
templated_values = {}
else:
for term in list(templated_values):
if term not in ['aggregateId', 'tenantId']:
del(templated_values[term])
# Fill in templated field values
message_key = fill_template(message_key_template, templated_values)
message_value = fill_template(message_value_template, templated_values)
message_header = fill_template(message_header_template, templated_values)
ts = datetime.utcnow().isoformat()[:-3]+'Z'
message_header['timestamp'] = ts
message_header['sequence_number'] = str(sequence_number)
message_value['timestamp'] = ts
message_value['sequenceNumber'] = sequence_number
lock_id = message_header['lock_id']
partition = get_partition(lock_id) # partition by lock_id, since key could be random, but a given aggregate_id should ALWAYS resolve to the same partition, regardless of key.
# Send message
producer.produce(topic_name, partition=partition, key=json.dumps(message_key), value=json.dumps(message_value), headers=message_header, callback=acked)
if sequence_number % perf_counter_batch_size == 0:
producer.flush()
end_time = time.perf_counter()
total_duration = end_time - start_time
messages_per_second=(perf_counter_batch_size/total_duration)
print(f'{messages_per_second} messages/second')
# reset start time
start_time = time.perf_counter()
if message_count >= max_message_count:
break
except Exception as e:
print(f'ERROR: %s' % e)
sys.exit(1)
finally:
producer.flush()

Consuming and replying to separate queues | Pika implementation

I am struggling to solve my issue, hope anyone from the community can help me here.
Our requirement is locked and can't be changed as the producer publishing the queues is controlled by a different team.
Producer which is written in JAVA declares three queues (TASK, RESPONSE, TASK_RESPONSE) and listens on them with the help of spring framework.
A hashmap is sent to the TASK and TASK_RESPONSE queue from the java AMQP client (Producer).
We need to consume these hashmaps and send the responses as follows.
If the queue TASK is processed, the response needs to be sent on RESPONSE queue incrementally.
If the queue TASK_RESPONSE is processed, the response needs to be sent on TASK_RESPONSE queue incrementally (RPC mode).
Now, we need to consume and publish this in python since we need to do some background processing on the tasks.
I tried to work with celery and dramatiq, but was not able to figure out how it can be done with them, so I tried writing myself (with the help of tutorials available online)
Problem is, I am able to consume the messages but not able to reply_to the RESPONSE queue. Here is my code.
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
import pika
import datetime
import logging
import json
from logging import StreamHandler
from time import sleep
from random import randint
from pika import SelectConnection
from settings import *
logging.basicConfig(handlers=[StreamHandler()], level=logging.INFO, format=logging.BASIC_FORMAT)
_logger = logging.getLogger(__name__)
class QueueConsumer(object):
"""The consumer class to manage connections to the AMQP server/queue"""
def __init__(self, queue, logger, parameters, thread_id=0):
self.channel = None
self.connection = None
self.queue_name_task = queue['task']
self.queue_name_response = queue['response']
self.logger = logger
self.consumer_id = 'Consumer Thread: %d' % (thread_id,)
self.parameters = pika.ConnectionParameters(**parameters)
def consume(self):
try:
self.connection = SelectConnection(parameters=self.parameters, on_open_callback=self._on_connected)
self.connection.ioloop.start()
except Exception as e:
self.logger.error('{} {}'.format(self.consumer_id, str(e)))
self.connection.close()
self.connection.ioloop.start()
def _on_connected(self, connection):
connection.channel(on_open_callback=self._on_channel_open)
def _on_channel_open(self, channel):
self.channel = channel
try:
# Declare Task Queue
self.channel.queue_declare(queue=self.queue_name_task,
exclusive=False,
durable=True,
auto_delete=False,
callback=self._on_queue_declared)
self.logger.info("{} Opened Channel....".format(self.consumer_id))
# Declare Task Response Queue
self.channel.queue_declare(queue=self.queue_name_response,
exclusive=False,
durable=True,
auto_delete=False)
self.logger.info("{} Opened Channel....".format(self.consumer_id))
except Exception as e:
self.logger.error('{} {}'.format(self.consumer_id, str(e)))
def _on_queue_declared(self, frame):
self.logger.debug('{} ... declaring queue'.format(self.consumer_id))
self.channel.basic_qos(prefetch_count=1)
try:
self.channel.basic_consume(queue=self.queue_name_task,
on_message_callback=self.handle_delivery,
auto_ack=True)
self.logger.info("{} Declared queue...".format(self.consumer_id))
except Exception as e:
self.logger.error('{} crashing:--> {}'.format(self.consumer_id, str(e)))
def handle_delivery(self, channel, method, header, body):
try:
start_time = datetime.datetime.now()
_logger.info("Received...")
_logger.info("Content: %s" % body)
req = json.loads(self.decode(body))
# Do something
sleep(randint(10, 20))
time_taken = datetime.datetime.now() - start_time
log_msg = "[{}] Time Taken: {}.{}".format(req['bar']['baz'], time_taken.seconds, time_taken.microseconds)
_logger.info(log_msg)
# Publish the result to another queue.
try:
self.channel.basic_publish(exchange='',
routing_key=self.queue_name_response,
properties=pika.BasicProperties(),
body=log_msg)
_logger.info("Message Published...\t(%s)" % self.queue_name_response)
except Exception as e:
self.logger.error('{} Message publishing failed:--> {}'.format(self.consumer_id, str(e)))
except Exception as err:
_logger.exception(err)
def decode(self, body):
try:
_body = body.decode('utf-8')
except AttributeError:
_body = body
return _body
if __name__ == "__main__":
pika_parameters = OrderedDict([
('host', TF_BROKER_HOST),
('port', TF_BROKER_PORT),
('virtual_host', TF_BROKER_VHOST)
])
queue = {'task': TF_IAAS_TASK_QUEUE, 'response': TF_IAAS_REPLY_QUEUE}
try:
with ThreadPoolExecutor(max_workers=TF_IAAS_THREAD_SIZE, thread_name_prefix=TF_IAAS_THREAD_PREFIX) as executor:
start = 1
for thread_id in range(start, (TF_IAAS_THREAD_SIZE + start)):
executor.submit(QueueConsumer(queue, _logger, pika_parameters, thread_id).consume)
except Exception as err:
_logger.exception(err)
Publish Messages On RabbitMQ
import pika
import json
import random
import datetime
from faker import Faker
from random import randint
fake = Faker('en_US')
if __name__ == '__main__':
try:
connection = pika.BlockingConnection(pika.ConnectionParameters(
host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='tf_task', durable=True)
started_at = datetime.datetime.now()
properties = pika.BasicProperties(delivery_mode=2)
for i in range(0, 10000):
body = {
'foo': randint(i, i+100),
'bar': {
'baz': fake.name(),
'poo': float(random.randrange(155+i, 389+i))/100
}
}
channel.basic_publish(exchange='',
routing_key='tf_task',
body=json.dumps(body),
properties=properties)
if i%10000 == 0:
duration = datetime.datetime.now() - started_at
print(i, duration.total_seconds())
print(" [x] Sent 'Hello World!'")
connection.close()
now = datetime.datetime.now()
duration = now - started_at
print(duration.total_seconds())
except Exception as e:
print(e)

How to run 2 differents loops in 2 differents threads?

I'm doing a telemetry application using Azure IoT Hub, Azure IoT SDK in Python and a raspberry pi with temperature and humidity sensors.
Humidity + Temperature sensors => Rasperry Pi => Azure IoT Hub
In my first implementation thanks azure examples, I used one loop that collect data from the temperature sensor and the humidity sensor, and send them to Azure IoT Hub in the same time every 60 second.
>>> 1 Loop every 60s = Collect data & send data of temperature and humidity
Now I would like to send them with different frequencies, I mean :
One loop will collect the data of the temperature sensor and send it to Azure IoT Hub every 60 seconds;
Whereas a second loop will collect the data of the humidity sensor and send it to Azure IoT Hub every 600 seconds.
>>> 1 Loop every 60s= Collect data & send data of temperature
>>> 2 Loop every 600s= Collect data & send data of humidity
I think the tool I need is multi-threading, but I don't understand which library or structure I have to implement in my case.
Here is the code provided by Azure, including one loop that handles temperature and humidity at the same time. Reading the data and sending to Azure every 60 seconds.
import random
import time
import sys
# Using the Python Device SDK for IoT Hub:
from iothub_client import IoTHubClient, IoTHubClientError,
IoTHubTransportProvider, IoTHubClientResult
from iothub_client import IoTHubMessage, IoTHubMessageDispositionResult,
IoTHubError, DeviceMethodReturnValue
# The device connection string to authenticate the device with your IoT hub.
CONNECTION_STRING = "{Your IoT hub device connection string}"
# Using the MQTT protocol.
PROTOCOL = IoTHubTransportProvider.MQTT
MESSAGE_TIMEOUT = 10000
# Define the JSON message to send to IoT Hub.
TEMPERATURE = 20.0
HUMIDITY = 60
MSG_TXT = "{\"temperature\": %.2f,\"humidity\": %.2f}"
def send_confirmation_callback(message, result, user_context):
print ( "IoT Hub responded to message with status: %s" % (result) )
def iothub_client_init():
# Create an IoT Hub client
client = IoTHubClient(CONNECTION_STRING, PROTOCOL)
return client
def iothub_client_telemetry_sample_run():
try:
client = iothub_client_init()
print ( "IoT Hub device sending periodic messages, press Ctrl-C to exit" )
#******************LOOP*******************************
while True:
# Build the message with simulated telemetry values.
temperature = TEMPERATURE + (random.random() * 15)
humidity = HUMIDITY + (random.random() * 20)
msg_txt_formatted = MSG_TXT % (temperature, humidity)
message = IoTHubMessage(msg_txt_formatted)
# Send the message.
print( "Sending message: %s" % message.get_string() )
client.send_event_async(message, send_confirmation_callback, None)
time.sleep(60)
except IoTHubError as iothub_error:
print ( "Unexpected error %s from IoTHub" % iothub_error )
return
except KeyboardInterrupt:
print ( "IoTHubClient sample stopped" )
if __name__ == '__main__':
print ( "IoT Hub Quickstart #1 - Simulated device" )
print ( "Press Ctrl-C to exit" )
iothub_client_telemetry_sample_run()
I would like to use the same structure of functions, including two loops that handles temperature and humidity, one every 60s and one every 600s.
while True:
# Build the message with simulated telemetry values.
temperature = TEMPERATURE + (random.random() * 15)
msg_txt_formatted1 = MSG_TXT1 % (temperature)
message1 = IoTHubMessage(msg_txt_formatted1)
# Send the message.
print( "Sending message: %s" % message1.get_string() )
client.send_event_async(message1, send_confirmation_callback, None)
time.sleep(60)
while True:
# Build the message with simulated telemetry values.
humidity = HUMIDITY + (random.random() * 20)
msg_txt_formatted2 = MSG_TXT2 % (humidity)
message2 = IoTHubMessage(msg_txt_formatted2)
# Send the message.
print( "Sending message: %s" % message2.get_string() )
client.send_event_async(message2, send_confirmation_callback, None)
time.sleep(600)
How can I do that? How to call those loops with multi-threading or another method?
It may be simpler to do something like
while True:
loop_b()
for _ in range(10):
loop_a()
time.sleep(60)
or even
while True:
time.sleep(1)
now = time.time()
if now % 60 == 0:
loop_a()
if now % 600 == 0:
loop_b()
But if you really want to use threads, then:
import threading
class LoopAThread(threading.Thread):
def run(self):
loop_a()
class LoopBThread(threading.Thread):
def run(self):
loop_b()
...
thread_a = LoopAThread()
thread_b = LoopBThread()
thread_a.start()
thread_b.start()
thread_a.join()
thread_b.join()
Here are two competing approaches to consider
Don't bother with threads at all. Just have one loop that sleeps every 60 seconds like you have now. Keep track of the last time you sent humidity data. If 600 seconds has passed, then send it. Otherwise, skip it and go to sleep for 60 seconds. Something like this:
from datetime import datetime, timedelta
def iothub_client_telemetry_sample_run():
last_humidity_run = None
humidity_period = timedelta(seconds=600)
client = iothub_client_init()
while True:
now = datetime.now()
send_temperature_data(client)
if not last_humidity_run or now - last_humidity_run >= humidity_period:
send_humidity_data(client)
last_humidity_run = now
time.sleep(60)
Rename iothub_client_telemetry_sample_run to temperature_thread_func or something like it. Create a separate function that looks just like it for humidity. Spawn two threads from the main function of your program. Set them to daemon mode so they shutdown when the user exits
from threading import Thread
def temperature_thread_func():
client = iothub_client_init()
while True:
send_temperature_data(client)
time.sleep(60)
def humidity_thread_func():
client = iothub_client_init()
while True:
send_humidity_data(client)
time.sleep(600)
if __name__ == '__main__':
temp_thread = Thread(target=temperature_thread_func)
temp_thread.daemon = True
humidity_thread = Thread(target=humidity_thread_func)
humidity_thread.daemon = True
input('Polling for data. Press a key to exit')
Notes:
If you decide to use threads, consider using an
event
to terminate them cleanly.
time.sleep is not a precise way to keep
time. You might need a different timing mechanism if the samples need
to be taken at precise moments.

Getting no msgs when reading Azure Event Hub with Python using EventHubClient

I have an Azure Event Hub that has messages in it. I wrote the msgs with a Python app and can see the correct msg count in the Event Hub GUI. But I cannot seem to read the msg with Python. My code is below. It runs with no errors but gets zero results.
Oddly, after I run this code, the Event Hub GUI shows that all the msgs (a couple thousand) were outgoing, indicating that my program actually got them. But the code never displays them.
Any help appreciated!
The result is always...
Msg offset: <azure.eventhub.common.Offset object at 0x102fc4e10>
Msg seq: 0
Msg body: 0
Received 1 messages in 0.11292386054992676 seconds
++++++++++++
# pip install azure-eventhub
import logging
import time
from azure.eventhub import EventHubClient, Receiver, Offset
logger = logging.getLogger("azure")
# URL of the event hub, amqps://<mynamespace>.servicebus.windows.net/myeventhub
ADDRESS = "amqps://chc-eh-ns.servicebus.windows.net/chc-eh"
# Access tokens for event hub namespace, from Azure portal for namespace
USER = "RootManageSharedAccessKey"
KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXX"
# Additional setup to receive events
CONSUMER_GROUP = "$default" # our view of the event hub, useful when there is more than one consumer at same time
PARTITION = "0" # which stream within event hub
OFFSET = Offset("-1") # get all msgs in event hub. msgs are never removed, they just expire per event hub settings
PREFETCH = 100 # not sure exactly what this does ??
# Initialize variables
total = 0
last_sn = -1
last_offset = -1
client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY)
try:
receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=PREFETCH, offset=OFFSET)
client.run()
start_time = time.time()
for event_data in receiver.receive(timeout=100):
last_offset = event_data.offset
last_sn = event_data.sequence_number
print("Msg offset: " + str(last_offset))
print("Msg seq: " + str(last_sn))
print("Msg body: " + event_data.body_as_str())
total += 1
end_time = time.time()
client.stop()
run_time = end_time - start_time
print("\nReceived {} messages in {} seconds".format(total, run_time))
except KeyboardInterrupt:
pass
finally:
client.stop()
Got it! The code sample I copied was wrong. You have to get batches of messages then iterate over each batch. Here is the correct code...
# pip install azure-eventhub
import time
from azure.eventhub import EventHubClient, Offset
# URL of the event hub, amqps://<mynamespace>.servicebus.windows.net/myeventhub
ADDRESS = "amqps://chc-eh-ns.servicebus.windows.net/chc-eh"
# Access tokens for event hub namespace, from Azure portal for namespace
USER = "RootManageSharedAccessKey"
KEY = "XXXXXXXXXXXX"
# Additional setup to receive events
CONSUMER_GROUP = "$default" # our view of the event hub, useful when there is more than one consumer at same time
PARTITION = "0" # which stream within event hub
OFFSET = Offset("-1") # get all msgs in event hub. msgs are never removed, they just expire per event hub settings
PREFETCH = 100 # batch size ??
# Initialize variables
total = 0
last_sn = -1
last_offset = -1
client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY)
try:
receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=PREFETCH, offset=OFFSET)
client.run()
start_time = time.time()
batch = receiver.receive(timeout=5000)
while batch:
for event_data in batch:
last_offset = event_data.offset
last_sn = event_data.sequence_number
print("Msg offset: " + str(last_offset))
print("Msg seq: " + str(last_sn))
print("Msg body: " + event_data.body_as_str())
total += 1
batch = receiver.receive(timeout=5000)
end_time = time.time()
client.stop()
run_time = end_time - start_time
print("\nReceived {} messages in {} seconds".format(total, run_time))
except KeyboardInterrupt:
pass
finally:
client.stop()

Processing event hub data using python

I am using azure event hub python SDK to send to and receive messages from event hub following this link.https://github.com/Azure/azure-event-hubs-python/tree/develop. I can successfully send and receive messages. But how do i parse the messages and retrieve the data from the event data object. Please find the code below.
import os
import sys
#import logging
from azure.eventhub import EventHubClient, Receiver, Offset
ADDRESS = 'sb://####.servicebus.windows.net/#####'
USER = '##########'
KEY = '##################################'
CONSUMER_GROUP = "$default"
OFFSET = Offset("-1")
PARTITION = "1"
total = 0
last_sn = -1
last_offset = "-1"
try:
if not ADDRESS:
raise ValueError("No EventHubs URL supplied.")
client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY)
receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=5000,
offset=OFFSET)
client.run()
try:
batched_events = receiver.receive(timeout=20)
except:
raise
finally:
client.stop()
for event_data in batched_events:
last_offset = event_data.offset.value
last_sn = event_data.sequence_number
total += 1
print("Partition {}, Received {}, sn={} offset={}".format(
PARTITION,
total,
last_sn,
last_offset))
except KeyboardInterrupt:
pass
if i try to view the event_data received i can see the below message.
event_data
<azure.eventhub.common.EventData at 0xd4f1358>
event_data.message
<uamqp.message.Message at 0xd4f1240>
Any help on the above on how to parse this message to extract the data
As of 1.1.0, there are new utility methods to extract the actual data of the message:
body_as_str
body_as_json
So, what used to be
import json
event_obj = json.loads(next(event_data.body).decode('UTF-8'))
Is now:
event_obj = event_data.body_as_json()
For people using the Event Hub version 5.2.0 -- latest as of today (GitHub, Reference Docs), it's the same as the 1.1.0 version, i.e. use body_as_str() or body_as_json(). But the client has changed -- there's an EventHubProducerClient and an EventHubConsumerClient in the new version. To print the body of an event received:
from azure.eventhub import EventHubConsumerClient
connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>'
consumer_group = '<< CONSUMER GROUP >>'
eventhub_name = '<< NAME OF THE EVENT HUB >>'
client = EventHubConsumerClient.from_connection_string(
connection_str, consumer_group, eventhub_name=eventhub_name
)
def on_event_batch(partition_context, events):
partition_context.update_checkpoint()
for e in events:
print(e.body_as_str())
with client:
client.receive_batch(
on_event_batch=on_event_batch,
starting_position="-1", # "-1" is from the beginning of the partition.
)

Categories