I've got code that downloads a file from an S3 bucket using boto3.
# foo.py
def dl(src_f, dest_f):
s3 = boto3.resource('s3')
s3.Bucket('mybucket').download_file(src_f, dest_f)
I'd now like to write a unit test for dl() using pytest and by mocking the interaction with AWS using the stubber available in botocore.
#pytest.fixture
def s3_client():
yield boto3.client("s3")
from foo import dl
def test_dl(s3_client):
with Stubber(s3_client) as stubber:
params = {"Bucket": ANY, "Key": ANY}
response = {"Body": "lorem"}
stubber.add_response(SOME_OBJ, response, params)
dl('bucket_file.txt', 'tmp/bucket_file.txt')
assert os.path.isfile('tmp/bucket_file.txt')
I'm not sure about the right approach for this. How do I add bucket_file.txt to the stubbed reponse? What object do I need to add_response() to (shown as SOME_OBJ)?
Have you considered using moto3?
Your code could look the same way as it is right now:
# foo.py
def dl(src_f, dest_f):
s3 = boto3.resource('s3')
s3.Bucket('mybucket').download_file(src_f, dest_f)
and the test:
import boto3
import os
from moto import mock_s3
#mock_s3
def test_dl():
s3 = boto3.client('s3', region_name='us-east-1')
# We need to create the bucket since this is all in Moto's 'virtual' AWS account
s3.create_bucket(Bucket='mybucket')
s3.put_object(Bucket='mybucket', Key= 'bucket_file.txt', Body='')
dl('bucket_file.txt', 'bucket_file.txt')
assert os.path.isfile('bucket_file.txt')
The intention of the code becomes a bit more obvious since you simply work with s3 as usual, except for there is no real s3 behind the method calls.
Related
I'd like to get all archives from a specific directory on S3 bucket like the following:
def get_files_from_s3(bucket_name, s3_prefix):
files = []
s3_resource = boto3.resource("s3")
bucket = s3_resource.Bucket(bucket_name)
response = bucket.objects.filter(Prefix=s3_prefix)
for obj in response:
if obj.key.endswidth('.zip'):
# get all archives
files.append(obj.key)
return files
My question is about testing it; because I'd like to mock the list of objects in the response to be able to iterate on it. Here is what I tried:
from unittest.mock import patch
from dataclasses import dataclass
#dataclass
class MockZip:
key = 'file.zip'
#patch('module.boto3')
def test_get_files_from_s3(self, mock_boto3):
bucket = mock_boto3.resource('s3').Bucket(self.bucket_name)
response = bucket.objects.filter(Prefix=S3_PREFIX)
response.return_value = [MockZip()]
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
I get an assertion error like this: E AssertionError: ['file.zip'] != []
Does anyone have a better approach? I used struct but I don't think this is the problem, I guess I get an empty list because the response is not iterable. So how can I mock it to be a list of mock objects instead of just a MockMagick type?
Thanks
You could use moto, which is an open-source libray specifically build to mock boto3-calls. It allows you to work directly with boto3, without having to worry about setting up mocks manually.
The testfunction that you're currently using would look like this:
from moto import mock_s3
#pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials, to ensure we're not touching AWS directly"""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
#mock_s3
def test_get_files_from_s3(self, aws_credentials):
s3 = boto3.resource('s3')
bucket = s3.Bucket(self.bucket_name)
# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket.create()
# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-east-1')
client.put_object(Bucket=self.bucket_name, Key="file.zip", Body="...")
client.put_object(Bucket=self.bucket_name, Key="file.nonzip", Body="...")
# Retrieve the files again using whatever logic
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
Full documentation for Moto can be found here:
http://docs.getmoto.org/en/latest/index.html
Disclaimer: I am a maintainer for Moto.
I have created a function in AWS lambda which looks like this:
import boto3
import numpy as np
import pandas as pd
import s3fs
from io import StringIO
def test(event=None, context=None):
# creating a pandas dataframe from an api
# placing 2 csv files in S3 bucket
This function queries an external API and places 2 csv files in S3 bucket. I want to trigger this function in Airflow, I have found this code:
import boto3, json, typing
def invokeLambdaFunction(*, functionName:str=None, payload:typing.Mapping[str, str]=None):
if functionName == None:
raise Exception('ERROR: functionName parameter cannot be NULL')
payloadStr = json.dumps(payload)
payloadBytesArr = bytes(payloadStr, encoding='utf8')
client = boto3.client('lambda')
response = client.invoke(
FunctionName=test,
InvocationType="RequestResponse",
Payload=payloadBytesArr
)
return response
if __name__ == '__main__':
payloadObj = {"something" : "1111111-222222-333333-bba8-1111111"}
response = invokeLambdaFunction(functionName='test', payload=payloadObj)
print(f'response:{response}')
But as I understand this code snippet does not connect to the S3. Is this the right approach to trigger AWS Lambda function from Airflow or there is a better way?
I would advice to use the AwsLambdaHook:
https://airflow.apache.org/docs/stable/_api/airflow/contrib/hooks/aws_lambda_hook/index.html#module-airflow.contrib.hooks.aws_lambda_hook
And you can check a test showing its usage to trigger a lambda function:
https://github.com/apache/airflow/blob/master/tests/providers/amazon/aws/hooks/test_lambda_function.py
I want to add tags to the files as I upload them to S3. Boto3 supports specifying tags with put_object method, however considering expected file size, I am using upload_file function which handles multipart uploads. But this function rejects 'Tagging' as keyword argument.
import boto3
client = boto3.client('s3', region_name='us-west-2')
client.upload_file('test.mp4', 'bucket_name', 'test.mp4',
ExtraArgs={'Tagging': 'type=test'})
ValueError: Invalid extra_args key 'Tagging', must be one of: ACL, CacheControl, ContentDisposition, ContentEncoding, ContentLanguage, ContentType, Expires, GrantFullControl, GrantRead, GrantReadACP, GrantWriteACP, Metadata, RequestPayer, ServerSideEncryption, StorageClass, SSECustomerAlgorithm, SSECustomerKey, SSECustomerKeyMD5, SSEKMSKeyId, WebsiteRedirectLocation
I found a way to make this work by using S3 transfer manager directly and modifying allowed keyword list.
from s3transfer import S3Transfer
import boto3
client = boto3.client('s3', region_name='us-west-2')
transfer = S3Transfer(client)
transfer.ALLOWED_UPLOAD_ARGS.append('Tagging')
transfer.upload_file('test.mp4', 'bucket_name', 'test.mp4',
extra_args={'Tagging': 'type=test'})
Even though this works, I don't think this is the best way. It might create other side effects. Currently I am not able to find correct way to achieve this. Any advice would be great. Thanks.
Tagging directive is now supported by boto3. You can do the following to add tags:
import boto3
from urllib import parse
s3 = boto3.client("s3")
tags = {"key1": "value1", "key2": "value2"}
s3.upload_file(
"file_path",
"bucket",
"key",
ExtraArgs={"Tagging": parse.urlencode(tags)},
)
The S3 Customization Reference — Boto 3 Docs documentation lists valid values for extra_args as:
ALLOWED_UPLOAD_ARGS = ['ACL', 'CacheControl', 'ContentDisposition', 'ContentEncoding', 'ContentLanguage', 'ContentType', 'Expires', 'GrantFullControl', 'GrantRead', 'GrantReadACP', 'GrantWriteACP', 'Metadata', 'RequestPayer', 'ServerSideEncryption', 'StorageClass', 'SSECustomerAlgorithm', 'SSECustomerKey', 'SSECustomerKeyMD5', 'SSEKMSKeyId', 'WebsiteRedirectLocation']
Therefore, this does not appear to be a valid way to specify a tag.
It appears that you might need to call put_object_tagging() to add the tag(s) after creating the object.
I am doing unit test with python mock. I've gone through blogs and python docs related to mocking but confuse about mocking the test case.
Here is the snippet for which I want to write test case.
The agenda is to test the method "set_contents_from_string()" using mock.
def write_to_customer_registry(customer):
# establish a connection with S3
conn = _connect_to_s3()
# build customer registry dict and convert it to json
customer_registry_dict = json.dumps(build_customer_registry_dict(customer))
# attempt to access requested bucket
bucket = _get_customer_bucket(conn)
s3_key = _get_customer_key(bucket, customer)
s3_key.set_metadata('Content-Type', 'application/json')
s3_key.set_contents_from_string(customer_registry_dict)
return s3_key
As you are testing some private methods I have added them to a module which I called s3.py that contains your code:
import json
def _connect_to_s3():
raise
def _get_customer_bucket(conn):
raise
def _get_customer_key(bucket, customer):
raise
def build_customer_registry_dict(cust):
raise
def write_to_customer_registry(customer):
# establish a connection with S3
conn = _connect_to_s3()
# build customer registry dict and convert it to json
customer_registry_dict = json.dumps(build_customer_registry_dict(customer))
# attempt to access requested bucket
bucket = _get_customer_bucket(conn)
s3_key = _get_customer_key(bucket, customer)
s3_key.set_metadata('Content-Type', 'application/json')
s3_key.set_contents_from_string(customer_registry_dict)
return s3_key
Next, in another module test_s3.py, I tested your code taking into account that for Unit Tests all interactions with third parties, such as network calls to s3 should be patched:
from unittest.mock import MagicMock, Mock, patch
from s3 import write_to_customer_registry
import json
#patch('json.dumps', return_value={})
#patch('s3._get_customer_key')
#patch('s3.build_customer_registry_dict')
#patch('s3._get_customer_bucket')
#patch('s3._connect_to_s3')
def test_write_to_customer_registry(connect_mock, get_bucket_mock, build_customer_registry_dict_mock, get_customer_key_mock, json_mock):
customer = MagicMock()
connect_mock.return_value = 'connection'
get_bucket_mock.return_value = 'bucket'
get_customer_key_mock.return_value = MagicMock()
write_to_customer_registry(customer)
assert connect_mock.call_count == 1
assert get_bucket_mock.call_count == 1
assert get_customer_key_mock.call_count == 1
get_bucket_mock.assert_called_with('connection')
get_customer_key_mock.assert_called_with('bucket', customer)
get_customer_key_mock.return_value.set_metadata.assert_called_with('Content-Type', 'application/json')
get_customer_key_mock.return_value.set_contents_from_string.assert_called_with({})
As you can see from the tests I am not testing that set_contents_from_string is doing what is supposed to do (since that should already be tested by the boto library) but that is being called with the proper arguments.
If you still doubt that the boto library is not properly testing such call you can always check it yourself in boto Github or boto3 Github
Something else you could test is that your are handling the different exceptions and edge cases in your code properly.
Finally, you can find more about patching and mocking in the docs. Usually the section about where to patch is really useful.
Some other resources are this blog post with python mock gotchas or this blog post I wrote myself (shameless self plug) after answering related pytest, patching and mocking questions in Stackoverflow.
came up with solution that worked for me, Posting it here, may be helpful for someone.
def setup(self):
self.customer = Customer.objects.create('tiertranstests')
self.customer.save()
def test_build_customer_registry(self):
mock_connection = Mock()
mock_bucket = Mock()
mock_s3_key = Mock()
customer_registry_dict = json.dumps(build_customer_registry_dict(self.customer))
# Patch S3 connection and Key class of registry method
with patch('<path>.customer_registry.S3Connection', Mock(return_value=mock_connection)),\
patch('<path>.customer_registry.Key', Mock(return_value=mock_s3_key)):
mock_connection.get_bucket = Mock(return_value=mock_bucket)
mock_s3_key.set_metadata.return_value = None
mock_s3_key.set_contents_from_string = Mock(return_value=customer_registry_dict)
write_to_customer_registry(self.customer)
mock_s3_key.set_contents_from_string.assert_called_once_with(customer_registry_dict)
I am writing test cases for a quick class to find / fetch keys from s3, using boto3. I have used moto in the past to test boto (not 3) code but am trying to move to boto3 with this project, and running into an issue:
class TestS3Actor(unittest.TestCase):
#mock_s3
def setUp(self):
self.bucket_name = 'test_bucket_01'
self.key_name = 'stats_com/fake_fake/test.json'
self.key_contents = 'This is test data.'
s3 = boto3.session.Session().resource('s3')
s3.create_bucket(Bucket=self.bucket_name)
s3.Object(self.bucket_name, self.key_name).put(Body=self.key_contents)
error:
...
File "/Library/Python/2.7/site-packages/botocore/vendored/requests/packages/urllib3/connectionpool.py", line 344, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
File "/Library/Python/2.7/site-packages/botocore/vendored/requests/packages/urllib3/connectionpool.py", line 314, in _raise_timeout
if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
TypeError: __str__ returned non-string (type WantWriteError)
botocore.hooks: DEBUG: Event needs-retry.s3.CreateBucket: calling handler <botocore.retryhandler.RetryHandler object at 0x10ce75310>
It looks like moto is not mocking out the boto3 call correctly - how do I make that work?
What worked for me is setting up the environment with boto before running my mocked tests with boto3.
Here's a working snippet:
import unittest
import boto
from boto.s3.key import Key
from moto import mock_s3
import boto3
class TestS3Actor(unittest.TestCase):
mock_s3 = mock_s3()
def setUp(self):
self.mock_s3.start()
self.location = "eu-west-1"
self.bucket_name = 'test_bucket_01'
self.key_name = 'stats_com/fake_fake/test.json'
self.key_contents = 'This is test data.'
s3 = boto.connect_s3()
bucket = s3.create_bucket(self.bucket_name, location=self.location)
k = Key(bucket)
k.key = self.key_name
k.set_contents_from_string(self.key_contents)
def tearDown(self):
self.mock_s3.stop()
def test_s3_boto3(self):
s3 = boto3.resource('s3', region_name=self.location)
bucket = s3.Bucket(self.bucket_name)
assert bucket.name == self.bucket_name
# retrieve already setup keys
keys = list(bucket.objects.filter(Prefix=self.key_name))
assert len(keys) == 1
assert keys[0].key == self.key_name
# update key
s3.Object(self.bucket_name, self.key_name).put(Body='new')
key = s3.Object(self.bucket_name, self.key_name).get()
assert 'new' == key['Body'].read()
When run with py.test test.py you get the following output:
collected 1 items
test.py .
========================================================================================= 1 passed in 2.22 seconds =========================================================================================
According to this information, it looks like streaming upload to s3 using Boto3 S3 Put is not yet supported.
In my case, I used following to successfully upload an object to a bucket:
s3.Object(self.s3_bucket_name, self.s3_key).put(Body=open("file_to_upload", 'rb'))
where "file_to_upload" is your local file to be uploaded to s3 bucket. For your test case, you can just create a temporary file to check this functionality:
test_file = open("test_file.json", "w")
test_file.write("some test contents")
test_file.close()
s3.Object(self.s3_bucket_name, self.s3_key).put(Body=open("test_file", 'rb'))