Building Lambda Function in yaml - Issue - python

I'm building a CloudFormation deployment that includes a Lambda function built out in python 3.9. However, when I build the function, it will not allow me to keep the single quotes. This hasn't been an issue for most of the script as I simply import json and the double quote (") work fine, but one section requires the single quotes.
Here is the code:
import boto3
import json
def lambda_handler(event, context):
client = client_obj()
associated = associated_list(client)
response = client.list_resolver_query_log_configs(
MaxResults=1,
)
config = response['ResolverQueryLogConfigs'][0]['Id']
ec2 = boto3.client('ec2')
vpc = ec2.describe_vpcs()
vpcs = vpc['Vpcs']
for v in vpcs:
if v['VpcId'] not in associated:
client.associate_resolver_query_log_config(
ResolverQueryLogConfigId= f"{config}",
ResourceId=f"{v['VpcId']}"
)
else:
print(f"{v['VpcId']} is already linked.")
def client_obj():
client = boto3.client('route53resolver')
return client
def associated_list(client_object):
associated = list()
assoc = client_object.list_resolver_query_log_config_associations()
for element in assoc['ResolverQueryLogConfigAssociations']:
associated.append(element['ResourceId'])
return associated
any section that includes f"{v['VpcId']}" requires the single quote inside the [] for the script to run properly. Since yaml requires the script to be encapsulated in single quotes for packaging, how can I fix this?
Example in yaml from another script:
CreateIAMUser:
Type: 'AWS::Lambda::Function'
Properties:
Code:
ZipFile: !Join
- |+
- - import boto3
- 'import json'
- 'from botocore.exceptions import ClientError'
- ''
- ''
- 'def lambda_handler(event, context):'
- ' iam_client = boto3.client("iam")'
- ''
- ' account_id = boto3.client("sts").get_caller_identity()["Account"]'
- ''
I imagine I could re-arrange the script to avoid this, but I would like to use this opportunity to learn something new if possible.

Not sure what you are trying to do, but usually you just use pipe in yaml for that:
Code:
ZipFile: |
import boto3
import json
def lambda_handler(event, context):
client = client_obj()
associated = associated_list(client)
response = client.list_resolver_query_log_configs(
MaxResults=1,
)
config = response['ResolverQueryLogConfigs'][0]['Id']
ec2 = boto3.client('ec2')
vpc = ec2.describe_vpcs()
vpcs = vpc['Vpcs']
for v in vpcs:
if v['VpcId'] not in associated:
client.associate_resolver_query_log_config(
ResolverQueryLogConfigId= f"{config}",
ResourceId=f"{v['VpcId']}"
)
else:
print(f"{v['VpcId']} is already linked.")
def client_obj():
client = boto3.client('route53resolver')
return client
def associated_list(client_object):
associated = list()
assoc = client_object.list_resolver_query_log_config_associations()
for element in assoc['ResolverQueryLogConfigAssociations']:
associated.append(element['ResourceId'])
return associated

Related

How to mock list of response objects from boto3?

I'd like to get all archives from a specific directory on S3 bucket like the following:
def get_files_from_s3(bucket_name, s3_prefix):
files = []
s3_resource = boto3.resource("s3")
bucket = s3_resource.Bucket(bucket_name)
response = bucket.objects.filter(Prefix=s3_prefix)
for obj in response:
if obj.key.endswidth('.zip'):
# get all archives
files.append(obj.key)
return files
My question is about testing it; because I'd like to mock the list of objects in the response to be able to iterate on it. Here is what I tried:
from unittest.mock import patch
from dataclasses import dataclass
#dataclass
class MockZip:
key = 'file.zip'
#patch('module.boto3')
def test_get_files_from_s3(self, mock_boto3):
bucket = mock_boto3.resource('s3').Bucket(self.bucket_name)
response = bucket.objects.filter(Prefix=S3_PREFIX)
response.return_value = [MockZip()]
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
I get an assertion error like this: E AssertionError: ['file.zip'] != []
Does anyone have a better approach? I used struct but I don't think this is the problem, I guess I get an empty list because the response is not iterable. So how can I mock it to be a list of mock objects instead of just a MockMagick type?
Thanks
You could use moto, which is an open-source libray specifically build to mock boto3-calls. It allows you to work directly with boto3, without having to worry about setting up mocks manually.
The testfunction that you're currently using would look like this:
from moto import mock_s3
#pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials, to ensure we're not touching AWS directly"""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
#mock_s3
def test_get_files_from_s3(self, aws_credentials):
s3 = boto3.resource('s3')
bucket = s3.Bucket(self.bucket_name)
# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket.create()
# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-east-1')
client.put_object(Bucket=self.bucket_name, Key="file.zip", Body="...")
client.put_object(Bucket=self.bucket_name, Key="file.nonzip", Body="...")
# Retrieve the files again using whatever logic
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
Full documentation for Moto can be found here:
http://docs.getmoto.org/en/latest/index.html
Disclaimer: I am a maintainer for Moto.

Passing Lambda Variable to Glue Script AWS

I have a lambda function that looks like this:
client = boto3.client('glue')
glueJobName = "Python Glue Script"
inputtedData = "A1B2C3D4E5"
school = "testing"
def lambda_handler(event, context):
response = client.start_job_run(JobName = glueJobName, Arguments = {'==inputtedData': inputtedData, '--school': school})
return response
This starts running my glue job which contains a script. However, I want to pass the Arguments 'inputtedData' and 'school' to this script, so that when the script starts, these variables will be inputted into my syncData function like this:
def syncAttendance(inputtedData, school):
schoolName = school
Data = inputtedData
print(schoolName, Data)
syncData(inputtedData, school)
How do I receive these variables in the glue script?
You need to use getResolvedOptions function as follows:
import sys
from awsglue.utils import getResolvedOptions
options = ['inputtedData', 'school']
args = getResolvedOptions(sys.argv, options)
syncData(args['inputtedData'], args['school'])

How to pass a Jinja2 to HiveQL using Python

I'm using Gcloud Composer as my Airflow. When I try to use Jinja in my HQL code, it does not translate it correctly.
I know that the HiveOperator has a Jinja translator as I'm used to it, but the DataProcHiveOperator doesn't.
I've tried to use the HiveConf directly into my HQL files, but when setting those values to my Partition (i.e. INSERT INTO TABLE abc PARTITION (ds = ${hiveconf:ds}))`, it doesn't work.
I have also added the following to my HQL file:
SET ds=to_date(current_timestamp());
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
But it didn't work as HIVE is transforming the formula above into a STRING.
So my idea was to combine both operators to have the Jinja translator working fine, but when I do that, I get the following error: ERROR - submit() takes from 3 to 4 positional arguments but 5 were given.
I'm not very familiar with Python coding and any help would be great, see below code for the operator I'm trying to build;
Header of the Python File (please note that the file contains other Operators not mentioned in this question):
import ntpath
import os
import re
import time
import uuid
from datetime import timedelta
from airflow.contrib.hooks.gcp_dataproc_hook import DataProcHook
from airflow.contrib.hooks.gcs_hook import GoogleCloudStorageHook
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.version import version
from googleapiclient.errors import HttpError
from airflow.utils import timezone
from airflow.utils.operator_helpers import context_to_airflow_vars
modified DataprocHiveOperator:
class DataProcHiveOperator(BaseOperator):
template_fields = ['query', 'variables', 'job_name', 'cluster_name', 'dataproc_jars']
template_ext = ('.q',)
ui_color = '#0273d4'
#apply_defaults
def __init__(
self,
query=None,
query_uri=None,
hiveconfs=None,
hiveconf_jinja_translate=False,
variables=None,
job_name='{{task.task_id}}_{{ds_nodash}}',
cluster_name='cluster-1',
dataproc_hive_properties=None,
dataproc_hive_jars=None,
gcp_conn_id='google_cloud_default',
delegate_to=None,
region='global',
job_error_states=['ERROR'],
*args,
**kwargs):
super(DataProcHiveOperator, self).__init__(*args, **kwargs)
self.gcp_conn_id = gcp_conn_id
self.delegate_to = delegate_to
self.query = query
self.query_uri = query_uri
self.hiveconfs = hiveconfs or {}
self.hiveconf_jinja_translate = hiveconf_jinja_translate
self.variables = variables
self.job_name = job_name
self.cluster_name = cluster_name
self.dataproc_properties = dataproc_hive_properties
self.dataproc_jars = dataproc_hive_jars
self.region = region
self.job_error_states = job_error_states
def prepare_template(self):
if self.hiveconf_jinja_translate:
self.query_uri= re.sub(
"(\$\{(hiveconf:)?([ a-zA-Z0-9_]*)\})", "{{ \g<3> }}", self.query_uri)
def execute(self, context):
hook = DataProcHook(gcp_conn_id=self.gcp_conn_id,
delegate_to=self.delegate_to)
job = hook.create_job_template(self.task_id, self.cluster_name, "hiveJob",
self.dataproc_properties)
if self.query is None:
job.add_query_uri(self.query_uri)
else:
job.add_query(self.query)
if self.hiveconf_jinja_translate:
self.hiveconfs = context_to_airflow_vars(context)
else:
self.hiveconfs.update(context_to_airflow_vars(context))
job.add_variables(self.variables)
job.add_jar_file_uris(self.dataproc_jars)
job.set_job_name(self.job_name)
job_to_submit = job.build()
self.dataproc_job_id = job_to_submit["job"]["reference"]["jobId"]
hook.submit(hook.project_id, job_to_submit, self.region, self.job_error_states)
I would like to be able to use Jinja templating inside my HQL code to allow partition automation on my data pipeline.
P.S: I'll use the Jinja templating mostly for Partition DateStamp
Does anyone know what is the error message I'm getting + help me solve it?
ERROR - submit() takes from 3 to 4 positional arguments but 5 were given
Thank you!
It is because of the 5th argument job_error_states which is only in master and not in the current stable release (1.10.1).
Source Code for 1.10.1 -> https://github.com/apache/incubator-airflow/blob/76a5fc4d2eb3c214ca25406f03b4a0c5d7250f71/airflow/contrib/hooks/gcp_dataproc_hook.py#L219
So remove that parameter and it should work.

AWS Lambda is not reading environment variables

I am writing a python script to query the Qualys API for vulnerability metadata. I am executing it as a lambda function in AWS. I have set my environment variables in the console, but when I execute my function, I am getting the following error:
module initialization error: name 'QUALYS_USERNAME' is not defined
I am using this os module to call them in my code in my handler function:
import os
import requests
import time
import lxml
import tinys3
from lxml import etree
def lambda_handler(event, context):
QUALYS_USERNAME = os.environ('QUALYS_USERNAME')
QUALYS_PASSWORD = os.environ('QUALYS_PASSWORD')
ACCESS_KEY = os.environ('ACCESS_KEY')
SECRET_KEY = os.environ('SECRET_KEY')
s = requests.Session()
s.headers.update({'X-Requested-With':QUALYS_USERNAME})
def login(s):
payload = {'action':'login', 'username':QUALYS_USERNAME,
'password':QUALYS_PASSWORD}
r = s.post('https://qualysapi.qualys.com/api/2.0/fo/session/',
data=payload)
def launchReport(s, polling_delay=120):
payload = {'action':'launch', 'template_id':'X',
'output_format':'xml', 'report_title':'X'}
r = s.post('https://qualysapi.qualys.com/api/2.0/fo/report/', data=payload)
global extract_id
extract_id = etree.fromstring(r.content).find('.//VALUE').text
print("Report ID = %s" % extract_id)
time.sleep(polling_delay)
return extract_id
def bucket_upload(s):
conn = tinys3.Connection(ACCESS_KEY,SECRET_KEY)
payload = {'action': 'fetch', 'id': extract_id}
r = s.post('https://qualysapi.qualys.com/api/2.0/fo/report/',
data=payload)
os.chdir('/tmp')
with open(extract_id + '.xml', 'w') as file:
file.write(r.content)
f = open(extract_id + '.xml','rb')
conn.upload(extract_id + '.xml',f,'X-bucket')
login(s)
launchReport(s)
bucket_upload(s)
Here are my defined environment variables in Lambda:
Env Variables Console
I am not sure why I am getting this error.
You need to access the environment variables as a dictionary not as a function call.
QUALYS_USERNAME = os.environ["QUALYS_USERNAME"]
QUALYS_PASSWORD = os.environ["QUALYS_PASSWORD"]
Potential cause: the inline code, outside of the functions, is running before the event handler.
Unrelated: rather than use environment variables for credentials, you should use IAM roles for AWS credentials and use Parameter Store for other, non-AWS, credentials.

How to create a signed cloudfront URL with Python?

I would like to know how to create a signed URL for cloudfront. The current working solution is unsecured, and I would like to switch the system to secure URL's.
I have tried using Boto 2.5.2 and Django 1.4
Is there a working example on how to use the boto.cloudfront.distribution.create_signed_url method? or any other solution that works?
I have tried the following code using the BOTO 2.5.2 API
def get_signed_url():
import boto, time, pprint
from boto import cloudfront
from boto.cloudfront import distribution
AWS_ACCESS_KEY_ID = 'YOUR_AWS_ACCESS_KEY_ID'
AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY'
KEYPAIR_ID = 'YOUR_KEYPAIR_ID'
KEYPAIR_FILE = 'YOUR_FULL_PATH_TO_FILE.pem'
CF_DISTRIBUTION_ID = 'E1V7I3IOVHUU02'
my_connection = boto.cloudfront.CloudFrontConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
distros = my_connection.get_all_streaming_distributions()
oai = my_connection.create_origin_access_identity('my_oai', 'An OAI for testing')
distribution_config = my_connection.get_streaming_distribution_config(CF_DISTRIBUTION_ID)
distribution_info = my_connection.get_streaming_distribution_info(CF_DISTRIBUTION_ID)
my_distro = boto.cloudfront.distribution.Distribution(connection=my_connection, config=distribution_config, domain_name=distribution_info.domain_name, id=CF_DISTRIBUTION_ID, last_modified_time=None, status='Active')
s3 = boto.connect_s3()
BUCKET_NAME = "YOUR_S3_BUCKET_NAME"
bucket = s3.get_bucket(BUCKET_NAME)
object_name = "FULL_URL_TO_MP4_ECLUDING_S3_URL_DOMAIN_NAME EG( my/path/video.mp4)"
key = bucket.get_key(object_name)
key.add_user_grant("READ", oai.s3_user_id)
SECS = 8000
OBJECT_URL = 'FULL_S3_URL_TO_FILE.mp4'
my_signed_url = my_distro.create_signed_url(OBJECT_URL, KEYPAIR_ID, expire_time=time.time() + SECS, valid_after_time=None, ip_address=None, policy_url=None, private_key_file=KEYPAIR_FILE, private_key_string=KEYPAIR_ID)
Everything seems fine until the method create_signed_url. It returns an error.
Exception Value: Only specify the private_key_file or the private_key_string not both
Omit the private_key_string:
my_signed_url = my_distro.create_signed_url(OBJECT_URL, KEYPAIR_ID,
expire_time=time.time() + SECS, private_key_file=KEYPAIR_FILE)
That parameter is used to pass the actual contents of the private key file, as a string. The comments in the source explain that only one of private_key_file or private_key_string should be passed.
You can also omit all the kwargs which are set to None, since None is the default.

Categories