AWS EC2: How to prevent infinite loop for instance status check? - python

I have the following python boto3 code with a potentially infinite while-loop. Generally, after a few minutes the while-loop succeeds. However, if something fails on the AWS side the program could hang for an indefinite period.
I am sure that this is not the most appropriate way to do this.
# credentials stored in ../.aws/credentials
# region stored in ../.aws/config
# builtins
from time import sleep
# plugins
import boto3
# Assign server instance IDs.
cye_production_web_server_2 = 'i-FAKE-ID'
# Setup EC2 client
ec2 = boto3.client('ec2')
# Start the second web server.
start_response = ec2.start_instances(
InstanceIds=[cye_production_web_server_2, ],
DryRun=False
)
print(
'instance id:',
start_response['StartingInstances'][0]['InstanceId'],
'is',
start_response['StartingInstances'][0]['CurrentState']['Name']
)
# Wait until status is 'ok'
status = None
while status != 'ok':
status_response = ec2.describe_instance_status(
DryRun=False,
InstanceIds=[cye_production_web_server_2, ],
)
status = status_response['InstanceStatuses'][0]['SystemStatus']['Status']
sleep(5) # 5 second throttle
print(status_response)
print('status is', status.capitalize())

Implement a counter in the loop and fail after so many attempts
status = None
counter = 5
while (status != 'ok' and counter > 0):
status_response = ec2.describe_instance_status(
DryRun=False,
InstanceIds=[cye_production_web_server_2, ],
)
status = status_response['InstanceStatuses'][0]['SystemStatus']['Status']
sleep(5) # 5 second throttle
counter=counter-1
print(status_response)
print('status is', status.capitalize())

You could try doing it in a for loop instead with a fixed number of tries.
For example:
MAX_RETRIES = 5
# Try until status is 'ok'
for x in range(MAX_RETRIES):
status_response = ec2.describe_instance_status(
DryRun=False,
InstanceIds=[cye_production_web_server_2, ],
)
status = status_response['InstanceStatuses'][0]['SystemStatus']['Status']
if status != 'ok':
sleep(5) # 5 second throttle
else:
break

Use timeout might be a better idea
import time
systemstatus = False
timeout = time.time() + 60*minute
while systemstatus is not True:
status = ec2.describe_instance_status( \
DryRun = False,
InstanceIds = [instance_id]
)
if status['InstanceStatuses'][0]['SystemStatus']['Status'] == 'ok':
systemstatus = True
if time.time() > timeout:
break
else:
time.sleep(10)

Related

Selenium Web driver, python: Test passes in local but fails in GitHub CI due to AssertionError

I´ve faced a problem that my one test passes in local but not in CI.
I have a progress bar with: positions 0%, 50%, 100% of starting a server, status of starting the server - "Server requested", "Spawning server...", "Server ready at" and status log.
I rewrote this case in few times but the result is the same - I got AssertionError, it cannot be catch one of values in CI.
I have no so big experience in Selenium and I´m stuck in which way I need to move to fix such problem. I´d appreciate if anyone helps me!!!
while '/spawn-pending/' in browser.current_url or is_displayed(
browser, SpawningPageLocators.PROGRESS_BAR
):
# checking text messages that the server is starting to up
texts = browser.find_elements(*SpawningPageLocators.TEXT_SERVER)
texts_list = []
for text in texts:
text.text
texts_list.append(text.text)
assert str(texts_list[0]) == SpawningPageLocators.TEXT_SERVER_STARTING
assert str(texts_list[1]) == SpawningPageLocators.TEXT_SERVER_REDIRECT
# checking progress of the servers starting (messages, % progress and events log))
logs_list = []
try:
progress_bar = browser.find_element(*SpawningPageLocators.PROGRESS_BAR)
progress = browser.find_elements(*SpawningPageLocators.PROGRESS_MESSAGE)
logs = browser.find_elements(*SpawningPageLocators.PROGRESS_LOG)
for status_message in progress:
progress_messages = status_message.text
percent = (
progress_bar.get_attribute('style')
.split(';')[0]
.split(':')[1]
.strip()
)
for i in range(len(logs)):
progress_log = logs[i].text
logs_list.append(progress_log)
if progress_messages == "":
assert percent == "0%"
assert int(len(logs_list)) == 0
if progress_messages == "Server requested":
assert percent == "0%"
assert int(len(logs_list)) == 1
> assert str(logs_list[0]) == "Server requested"
E AssertionError: assert '' == 'Server requested'
E - Server requested
After rewrote it to
while '/spawn-pending/' in browser.current_url:
progress_message = browser.find_element(
*SpawningPageLocators.PROGRESS_MESSAGE
).text
# checking text messages that the server is starting to up
texts = browser.find_elements(*SpawningPageLocators.TEXT_SERVER)
texts_list = []
for text in texts:
text.text
texts_list.append(text.text)
assert str(texts_list[0]) == SpawningPageLocators.TEXT_SERVER_STARTING
assert str(texts_list[1]) == SpawningPageLocators.TEXT_SERVER_REDIRECT
# checking progress of the servers starting (messages, % progress and events log))
logs_list = []
try:
progress_bar = browser.find_element(*SpawningPageLocators.PROGRESS_BAR)
progress = browser.find_element(*SpawningPageLocators.PROGRESS_MESSAGE)
logs = browser.find_elements(*SpawningPageLocators.PROGRESS_LOG)
percent = (
progress_bar.get_attribute('style').split(';')[0].split(':')[1].strip()
)
for log in logs:
# only include non-empty log messages
# avoid partially-created elements
if log.text:
logs_list.append(log.text)
except (NoSuchElementException, StaleElementReferenceException):
break
expected_messages = [
"Server requested",
"Spawning server...",
f"Server ready at {app.base_url}user/{user.name}/",
]
if progress_message:
assert progress_message in expected_messages
if logs_list:
# race condition: progress_message _should_
# be the last log message, but it _may_ be the next one
assert progress_message
if len(logs_list) < 2:
> assert percent == "0%"
Got : AssertionError: assert '50%' == '0%' on the line "assert percent == "0%"

Device twin Reported properties are not updating properly with python sdk

I had divided my software update process in various stages like (download, unzip, pre-install, install, post-install). I am setting reported properties
at every stage accordingly. But these properties are not updating during installation process (i.e. unable to see changes in reported property in device twin on azure portal) but at the end of installation I am getting callback responses for all the set "reported" properties.
I am working on software update with Azure IOT using python device sdk.
For this I have modified the sample given in SDK (i.e. iothub_client_sample_class.py file). I am using device twin for updating the software. I had created "desired" property for "software_version" in device twin. Once "desired" property for "software_version" is changed, software update process is started. Software update process perform various operation so I have divided this process in various stages. I am sending "reported" properties for ever stage to IotHub. But these "reported" are not updating in seqence in device twin reported property on azure portal.
import random
import time
import sys
import iothub_client
import json
from iothub_client import IoTHubClient, IoTHubClientError, IoTHubTransportProvider
from iothub_client import IoTHubMessage, IoTHubMessageDispositionResult, IoTHubError, DeviceMethodReturnValue
from iothub_client_args import get_iothub_opt, OptionError
# HTTP options
# Because it can poll "after 9 seconds" polls will happen effectively
# at ~10 seconds.
# Note that for scalabilty, the default value of minimumPollingTime
# is 25 minutes. For more information, see:
# https://azure.microsoft.com/documentation/articles/iot-hub-devguide/#messaging
TIMEOUT = 241000
MINIMUM_POLLING_TIME = 9
# messageTimeout - the maximum time in milliseconds until a message times out.
# The timeout period starts at IoTHubClient.send_event_async.
# By default, messages do not expire.
MESSAGE_TIMEOUT = 1000
RECEIVE_CONTEXT = 0
AVG_WIND_SPEED = 10.0
MIN_TEMPERATURE = 20.0
MIN_HUMIDITY = 60.0
MESSAGE_COUNT = 5
RECEIVED_COUNT = 0
TWIN_CONTEXT = 0
METHOD_CONTEXT = 0
# global counters
RECEIVE_CALLBACKS = 0
SEND_CALLBACKS = 0
BLOB_CALLBACKS = 0
TWIN_CALLBACKS = 0
SEND_REPORTED_STATE_CALLBACKS = 0
METHOD_CALLBACKS = 0
firstTime = True
hub_manager = None
PROTOCOL = IoTHubTransportProvider.MQTT
CONNECTION_STRING = "XXXXXX"
base_version = '1.0.0.000'
SUCCESS = 0
firstTime = True
def downloadImage(url):
# Code for downloading the package from url
return 0
def unzipPackage():
#code for unzipping the package
return 0
def readPackageData():
# code reading package data
return 0
def pre_install():
#code for installing dependencies
return 0
def install():
#code for installing main package
return 0
def post_install():
#code for verifying installation
return 0
def start_software_update(url,message):
global hub_manager
print "Starting software update process!!!!"
reported_state = "{\"updateStatus\":\"softwareUpdateinprogress\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1003)
time.sleep(1)
status = downloadImage(url)
if status == SUCCESS:
reported_state = "{\"updateStatus\":\"downloadComplete\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1004)
print "Downlaod Phase Done!!!"
time.sleep(1)
else:
print "Download Phase failed!!!!"
return False
status = unzipPackage()
if status == SUCCESS:
reported_state = "{\"updateStatus\":\"UnzipComplete\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1005)
print "Unzip Package Done!!!"
time.sleep(1)
else:
print "Unzip package failed!!!"
return False
status = readPackageData()
if status == SUCCESS:
reported_state = "{\"updateStatus\":\"ReadPackageData\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1006)
print "Reading package json data"
time.sleep(1)
else:
print "Failed Reading package!!!"
return False
status = pre_install()
if status == SUCCESS:
reported_state = "{\"updateStatus\":\"PreInstallComplete\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1007)
time.sleep(1)
print "pre_install state successful!!!"
else:
print "pre_install failed!!!!"
return False
status = install()
if status == SUCCESS:
reported_state = "{\"updateStatus\":\"InstallComplete\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1008)
time.sleep(1)
print "install sucessful!!!"
else:
print "install failed!!!"
return False
status = post_install()
if status == SUCCESS:
reported_state = "{\"updateStatus\":\"SoftwareUpdateComplete\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1009)
time.sleep(1)
print "post install sucessful!!!"
else:
print "post install failed!!!"
return False
return True
def device_twin_callback(update_state, payload, user_context):
global TWIN_CALLBACKS
global firstTime
global base_version
print ( "\nTwin callback called with:\nupdateStatus = %s\npayload = %s\ncontext = %s" % (update_state, payload, user_context) )
TWIN_CALLBACKS += 1
print ( "Total calls confirmed: %d\n" % TWIN_CALLBACKS )
message = json.loads(payload)
if not firstTime:
if message["software_version"] != base_version:
url = message["url"]
status = start_software_update(url,message)
if status:
print "software Update Successful!!!"
else:
print "software Update Unsuccessful!!!"
else:
base_version = message["desired"]["software_version"]
print "Set firstTime to false", base_version
firstTime = False
def send_reported_state_callback(status_code, user_context):
global SEND_REPORTED_STATE_CALLBACKS
print ( "Confirmation for reported state received with:\nstatus_code = [%d]\ncontext = %s" % (status_code, user_context) )
SEND_REPORTED_STATE_CALLBACKS += 1
print ( " Total calls confirmed: %d" % SEND_REPORTED_STATE_CALLBACKS )
class HubManager(object):
def __init__(
self,
connection_string,
protocol=IoTHubTransportProvider.MQTT):
self.client_protocol = protocol
self.client = IoTHubClient(connection_string, protocol)
if protocol == IoTHubTransportProvider.HTTP:
self.client.set_option("timeout", TIMEOUT)
self.client.set_option("MinimumPollingTime", MINIMUM_POLLING_TIME)
# set the time until a message times out
self.client.set_option("messageTimeout", MESSAGE_TIMEOUT)
# some embedded platforms need certificate information
# self.set_certificates()
self.client.set_device_twin_callback(device_twin_callback, TWIN_CONTEXT)
def send_reported_state(self, reported_state, size, user_context):
self.client.send_reported_state(
reported_state, size,
send_reported_state_callback, user_context)
def main(connection_string, protocol):
global hub_manager
try:
print ( "\nPython %s\n" % sys.version )
print ( "IoT Hub Client for Python" )
hub_manager = HubManager(connection_string, protocol)
print ( "Starting the IoT Hub Python sample using protocol %s..." % hub_manager.client_protocol )
reported_state = "{\"updateStatus\":\"waitingforupdate\"}"
hub_manager.send_reported_state(reported_state, len(reported_state), 1002)
while True:
time.sleep(1)
except IoTHubError as iothub_error:
print ( "Unexpected error %s from IoTHub" % iothub_error )
return
except KeyboardInterrupt:
print ( "IoTHubClient sample stopped" )
if __name__ == '__main__':
try:
(CONNECTION_STRING, PROTOCOL) = get_iothub_opt(sys.argv[1:], CONNECTION_STRING)
except OptionError as option_error:
print ( option_error )
usage()
sys.exit(1)
main(CONNECTION_STRING, PROTOCOL)
Expected Result: The "reported" property for every stage in software update should update properly in device twin in azure portal.
Actual Result: The "reported" property for each stage in software update process is not updating properly.

Getting no msgs when reading Azure Event Hub with Python using EventHubClient

I have an Azure Event Hub that has messages in it. I wrote the msgs with a Python app and can see the correct msg count in the Event Hub GUI. But I cannot seem to read the msg with Python. My code is below. It runs with no errors but gets zero results.
Oddly, after I run this code, the Event Hub GUI shows that all the msgs (a couple thousand) were outgoing, indicating that my program actually got them. But the code never displays them.
Any help appreciated!
The result is always...
Msg offset: <azure.eventhub.common.Offset object at 0x102fc4e10>
Msg seq: 0
Msg body: 0
Received 1 messages in 0.11292386054992676 seconds
++++++++++++
# pip install azure-eventhub
import logging
import time
from azure.eventhub import EventHubClient, Receiver, Offset
logger = logging.getLogger("azure")
# URL of the event hub, amqps://<mynamespace>.servicebus.windows.net/myeventhub
ADDRESS = "amqps://chc-eh-ns.servicebus.windows.net/chc-eh"
# Access tokens for event hub namespace, from Azure portal for namespace
USER = "RootManageSharedAccessKey"
KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXX"
# Additional setup to receive events
CONSUMER_GROUP = "$default" # our view of the event hub, useful when there is more than one consumer at same time
PARTITION = "0" # which stream within event hub
OFFSET = Offset("-1") # get all msgs in event hub. msgs are never removed, they just expire per event hub settings
PREFETCH = 100 # not sure exactly what this does ??
# Initialize variables
total = 0
last_sn = -1
last_offset = -1
client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY)
try:
receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=PREFETCH, offset=OFFSET)
client.run()
start_time = time.time()
for event_data in receiver.receive(timeout=100):
last_offset = event_data.offset
last_sn = event_data.sequence_number
print("Msg offset: " + str(last_offset))
print("Msg seq: " + str(last_sn))
print("Msg body: " + event_data.body_as_str())
total += 1
end_time = time.time()
client.stop()
run_time = end_time - start_time
print("\nReceived {} messages in {} seconds".format(total, run_time))
except KeyboardInterrupt:
pass
finally:
client.stop()
Got it! The code sample I copied was wrong. You have to get batches of messages then iterate over each batch. Here is the correct code...
# pip install azure-eventhub
import time
from azure.eventhub import EventHubClient, Offset
# URL of the event hub, amqps://<mynamespace>.servicebus.windows.net/myeventhub
ADDRESS = "amqps://chc-eh-ns.servicebus.windows.net/chc-eh"
# Access tokens for event hub namespace, from Azure portal for namespace
USER = "RootManageSharedAccessKey"
KEY = "XXXXXXXXXXXX"
# Additional setup to receive events
CONSUMER_GROUP = "$default" # our view of the event hub, useful when there is more than one consumer at same time
PARTITION = "0" # which stream within event hub
OFFSET = Offset("-1") # get all msgs in event hub. msgs are never removed, they just expire per event hub settings
PREFETCH = 100 # batch size ??
# Initialize variables
total = 0
last_sn = -1
last_offset = -1
client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY)
try:
receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=PREFETCH, offset=OFFSET)
client.run()
start_time = time.time()
batch = receiver.receive(timeout=5000)
while batch:
for event_data in batch:
last_offset = event_data.offset
last_sn = event_data.sequence_number
print("Msg offset: " + str(last_offset))
print("Msg seq: " + str(last_sn))
print("Msg body: " + event_data.body_as_str())
total += 1
batch = receiver.receive(timeout=5000)
end_time = time.time()
client.stop()
run_time = end_time - start_time
print("\nReceived {} messages in {} seconds".format(total, run_time))
except KeyboardInterrupt:
pass
finally:
client.stop()

Python thread blocks after getting from Queue

I have a list of emails i want to validate from same domain. Firstly, I check if the domain allows verfication or is a catchall (accepts all email user as valid). E.g Assuming [a#domain.com, b#domain.com] is the list, I will check if foo-fake#domain.com would be valid, if it is return from function.
If not use multiple thread to verify all emails in list - if all fail return the last item. If one is valid and other threads are running stop them and return the valid one.
from Queue import Queue
import threading
import smtplib
class MailWorker(threading.Thread):
kill = False
def __init__(self, in_que, out_que):
super(MailWorker, self).__init__()
self.in_que = in_que
self.out_que = out_que
def run(self):
while True:
email, host = self.in_que.get()
self.test_email(email, host)
self.in_que.task_done()
def test_email(self, email, host, retry=0):
status = "Unknown"
code = None
rand = "info#example.com"
try:
server = smtplib.SMTP(timeout=20)
server.connect(host, 25)
server.ehlo_or_helo_if_needed()
code, response = server.docmd('mail from:', "<{}>".format(rand))
code, response = server.docmd('rcpt to:', "<{}>".format(email))
if code == 421:
while retry < 3:
if retry >= 3:
server.quit()
self.out_que.put((email, "Service not available", code))
server.quit()
return self.test_email(email, host, retry=retry)
if code == 250:
status = 'valid'
self.out_que.put((email, status, code,))
except smtplib.SMTPServerDisconnected:
while retry < 3:
retry += 1
status = "(SMTP Disconnected Unexpectedly) Retry # {}".format(retry)
code = -2
time.sleep(2)
if retry >= 3:
self.out_que.put((email, "SMTP Disconnected", code))
else:
return self.test_email(email, host, retry=retry)
self.out_que.put((email, status, code,))
def check_email(emails, domain, index=0, is_main=False):
email = status = code = None
in_que = Queue(maxsize=10)
out_que = Queue(maxsize=10)
if 'brighthouse' in domain:
host = 'brighthouse-co-uk.mail.protection.outlook.com'
else:
host = 'eu-smtp-inbound-2.mimecast.com'
# is it a catchall? if it is i want to return from the function ---
# If the email is valid then it is not a catchall all so execute line 91
# till return checking multipe emails in threads but exit when one valid is found else do all email in list
if not is_main: # check if the email is main
in_que.put(('JUNK_EMAIL_CANT_BE_REAL_fewfwewefew#' + domain, host)) # put rubbish email in thread if valid it is a catchall, then exit
for i in range(1):
mw = MailWorker(in_que, out_que)
print mw
print "*" * 20
mw.daemon = True
mw.start()
print mw.getName()
print in_que.qsize(), " in queue size"
print out_que.qsize(), " out queue size"
print "*" * 20
email, status, code = out_que.get()
print "Contet = ", email, status, code
if code == 250:
print 'Domain is a Catch-All. email: %s host: %s' % (emails[0], host)
MailWorker.kill = True
return emails, "catchall", -99, index
elif code == -1:
return email, status, code, index
# in_que.join()
# out_que.join()
for index, email_address in enumerate(emails):
in_que.put((email_address, host,))
for i in range(10):
mw = MailWorker(in_que, out_que)
mw.daemon = True
mw.start()
while not out_que.empty():
email, status, code, index = out_que.get()
if code == 250:
MailWorker.kill = True
return email, status, code, index
in_que.join()
out_que.join()
return email, status, code, index
emails_list = [
['fred#brighthouse.co.uk', 'joe.fred#brighthouse.co.uk', 'joe#brighthouse.co.uk'],
['fred#cqs.com', 'joe.fred#cqs.com', 'joe#cqs.com']
]
for emails in emails_list:
domain = emails[0].split('#')[1]
print(check_email(emails, domain))
My expectation is for the next list item to run.
The result with the above code is:
# CODE FREEZES and DOESN'T RUN SECOND item IN EMAILS_LIST -- SEE output
"""
<MailWorker(Thread-1, initial)>
********************
Thread-1
1 in queue size
0 out queue size
********************
Contet = rubbish_fQ94hEAi93#brighthouse.co.uk valid 250
Domain is a Catch-All. email: fred#brighthouse.co.uk host: brighthouse-co-uk.mail.protection.outlook.com
(['fred#brighthouse.co.uk', 'joe.fred#brighthouse.co.uk', 'joe#brighthouse.co.uk'], 'catchall', -99, 0)
<MailWorker(Thread-2, initial)>
********************
Thread-2
0 in queue size
0 out queue size
******************** <---- code blocks here
"""

boto3 wait_until_running doesn't work as desired

I'm trying to write a script using boto3 to start an instance and wait until it is started. As per the documentation of wait_until_running, it should wait until the instance is fully started (I"m assuming checks should be OK) but unfortunately it only works for wait_until_stopped and incase of wait_until_running it just starts the instance and doesn't wait until it is completely started. Not sure if I'm doing something wrong here or this is a bug of boto3.
Here is the code:
import boto3
ec2 = boto3.resource('ec2',region_name="ap-southeast-2")
ec2_id = 'i-xxxxxxxx'
instance = ec2.Instance(id=ec2_id)
print("starting instance " + ec2_id)
instance.start()
instance.wait_until_running()
print("instance started")
Thanks to #Mark B #Madhurya Gandi here is the solution that worked in my case:
import boto3,socket
retries = 10
retry_delay=10
retry_count = 0
ec2 = boto3.resource('ec2',region_name="ap-southeast-2")
ec2_id = 'i-xxxxxxxx'
instance = ec2.Instance(id=ec2_id)
print("starting instance " + ec2_id)
instance.start()
instance.wait_until_running()
while retry_count <= retries:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = sock.connect_ex((instance.public_ip_address,22))
if result == 0:
Print "Instance is UP & accessible on port 22, the IP address is: ",instance.public_ip_address
break
else:
print "instance is still down retrying . . . "
time.sleep(retry_delay)
except ClientError as e:
print('Error', e)
I believe this way wait until the instance status is 2/2 passed in the check tests:
Boto3 Doc:
instance-status.reachability - Filters on instance status where the name is reachability (passed | failed | initializing | insufficient-data ).
import boto3
client = boto3.client('ec2')
waiter = client.get_waiter('instance_status_ok')
waiter.wait(
InstanceIds = ["instanceID"],
Filters = [
{
"Name": "instance-status.reachability" ,
"Values": [
"passed"
]
}
]
)
[1]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Waiter.InstanceStatusOk
I believe the right way to do it is as follows:
instance.wait_until_running(
Filters=[
{
'Name': 'instance-state-name',
'Values': [
'running',
]
},
]
)
tested and worked for me
I've tried instance.wait_until_running(). It took time to update the instance to running state. As per amazon docs link, it says that the instances take a minimum of 60seconds to spin up. Here's a sample code that worked for me. Hope it helps!
;to create 5 instances
ec2.create_instances(ImageId='<ami-image-id>', MinCount=1, MaxCount=5)
time.sleep(60)
;print your instances

Categories