How to mock list of response objects from boto3? - python

I'd like to get all archives from a specific directory on S3 bucket like the following:
def get_files_from_s3(bucket_name, s3_prefix):
files = []
s3_resource = boto3.resource("s3")
bucket = s3_resource.Bucket(bucket_name)
response = bucket.objects.filter(Prefix=s3_prefix)
for obj in response:
if obj.key.endswidth('.zip'):
# get all archives
files.append(obj.key)
return files
My question is about testing it; because I'd like to mock the list of objects in the response to be able to iterate on it. Here is what I tried:
from unittest.mock import patch
from dataclasses import dataclass
#dataclass
class MockZip:
key = 'file.zip'
#patch('module.boto3')
def test_get_files_from_s3(self, mock_boto3):
bucket = mock_boto3.resource('s3').Bucket(self.bucket_name)
response = bucket.objects.filter(Prefix=S3_PREFIX)
response.return_value = [MockZip()]
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
I get an assertion error like this: E AssertionError: ['file.zip'] != []
Does anyone have a better approach? I used struct but I don't think this is the problem, I guess I get an empty list because the response is not iterable. So how can I mock it to be a list of mock objects instead of just a MockMagick type?
Thanks

You could use moto, which is an open-source libray specifically build to mock boto3-calls. It allows you to work directly with boto3, without having to worry about setting up mocks manually.
The testfunction that you're currently using would look like this:
from moto import mock_s3
#pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials, to ensure we're not touching AWS directly"""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
#mock_s3
def test_get_files_from_s3(self, aws_credentials):
s3 = boto3.resource('s3')
bucket = s3.Bucket(self.bucket_name)
# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket.create()
# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-east-1')
client.put_object(Bucket=self.bucket_name, Key="file.zip", Body="...")
client.put_object(Bucket=self.bucket_name, Key="file.nonzip", Body="...")
# Retrieve the files again using whatever logic
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
Full documentation for Moto can be found here:
http://docs.getmoto.org/en/latest/index.html
Disclaimer: I am a maintainer for Moto.

Related

mock boto3 response for downloading file from S3

I've got code that downloads a file from an S3 bucket using boto3.
# foo.py
def dl(src_f, dest_f):
s3 = boto3.resource('s3')
s3.Bucket('mybucket').download_file(src_f, dest_f)
I'd now like to write a unit test for dl() using pytest and by mocking the interaction with AWS using the stubber available in botocore.
#pytest.fixture
def s3_client():
yield boto3.client("s3")
from foo import dl
def test_dl(s3_client):
with Stubber(s3_client) as stubber:
params = {"Bucket": ANY, "Key": ANY}
response = {"Body": "lorem"}
stubber.add_response(SOME_OBJ, response, params)
dl('bucket_file.txt', 'tmp/bucket_file.txt')
assert os.path.isfile('tmp/bucket_file.txt')
I'm not sure about the right approach for this. How do I add bucket_file.txt to the stubbed reponse? What object do I need to add_response() to (shown as SOME_OBJ)?
Have you considered using moto3?
Your code could look the same way as it is right now:
# foo.py
def dl(src_f, dest_f):
s3 = boto3.resource('s3')
s3.Bucket('mybucket').download_file(src_f, dest_f)
and the test:
import boto3
import os
from moto import mock_s3
#mock_s3
def test_dl():
s3 = boto3.client('s3', region_name='us-east-1')
# We need to create the bucket since this is all in Moto's 'virtual' AWS account
s3.create_bucket(Bucket='mybucket')
s3.put_object(Bucket='mybucket', Key= 'bucket_file.txt', Body='')
dl('bucket_file.txt', 'bucket_file.txt')
assert os.path.isfile('bucket_file.txt')
The intention of the code becomes a bit more obvious since you simply work with s3 as usual, except for there is no real s3 behind the method calls.

Testing behavior of a mocked S3 bucket using s3fs (by dask) is unexpected

I have a simple function which adds a file to a base S3 location. The base location is given as an environment variable:
os.environ["TEST_BASE"] = "my-bucket/testing/"
The function is:
def add_file(file):
print(f"In function: {os.getenv('TEST_BASE')}")
s3 = s3fs.S3FileSystem()
s3.touch(os.getenv('TEST_BASE') + file)
print("In function: " + str(s3.ls(os.getenv('TEST_BASE'))))
Now, I want to test its behavior:
with mock_s3():
with unittest.mock.patch.dict(os.environ, {"TEST_BASE": "foo/bar/"}):
print(f"TEST_BASE = {os.getenv('TEST_BASE')}")
s3_conn = boto3.client('s3', 'us-west-2')
s3_conn.create_bucket(Bucket='foo')
s3 = s3fs.S3FileSystem()
s3.touch(os.getenv('TEST_BASE') + 'yoo')
print(s3.ls(os.getenv("TEST_BASE")))
add_file('goo')
print(s3.exists(os.getenv("TEST_BASE") + 'goo')) # (*)
print(s3.ls(os.getenv("TEST_BASE"))) # (**)
print(f"TEST_BASE = {os.getenv('TEST_BASE')}")
Now, the part I cannot understand is that (*) prints True but (**) lists only a single object (yoo). What's going on here?
Likely what is going on, is that the s3 object in the mock_s3 function is caching the directory listing for performance reasons. You create the new empty file with a separate instance, so the outer instance doesn't know that anything has changed. Try inserting s3.invalidate_cache() between add_file and s3.ls.
In the context of unittesting, I ended up with the following setUp and teadDown:
def setUp(self):
self.mock_s3.start()
s3_conn = boto3.client('s3', 'us-west-2')
s3_conn.create_bucket(Bucket='bucket')
self.s3 = s3fs.S3FileSystem()
def tearDown(self):
self.mock_s3.stop()
and avoided the usage of the decorator. This is not directly helping in the case described in the question, but, as this use case started the problem for me, I put it here for reference.

How to get list_blobs to behave like gsutil

I would like to only get the first level of a fake folder structure on GCS.
If I run e.g.:
gsutil ls 'gs://gcp-public-data-sentinel-2/tiles/'
I get a list like this:
gs://gcp-public-data-sentinel-2/tiles/01/
gs://gcp-public-data-sentinel-2/tiles/02/
gs://gcp-public-data-sentinel-2/tiles/03/
gs://gcp-public-data-sentinel-2/tiles/04/
gs://gcp-public-data-sentinel-2/tiles/05/
gs://gcp-public-data-sentinel-2/tiles/06/
gs://gcp-public-data-sentinel-2/tiles/07/
gs://gcp-public-data-sentinel-2/tiles/08/
gs://gcp-public-data-sentinel-2/tiles/09/
gs://gcp-public-data-sentinel-2/tiles/10/
gs://gcp-public-data-sentinel-2/tiles/11/
gs://gcp-public-data-sentinel-2/tiles/12/
gs://gcp-public-data-sentinel-2/tiles/13/
gs://gcp-public-data-sentinel-2/tiles/14/
gs://gcp-public-data-sentinel-2/tiles/15/
.
.
.
Running code like the following in the Python API give me an empty result:
from google.cloud import storage
bucket_name = 'gcp-public-data-sentinel-2'
prefix = 'tiles/'
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
for blob in bucket.list_blobs(max_results=10, prefix=prefix,
delimiter='/'):
print blob.name
If I don't use the delimiter option I get all the results in the bucket which is not very useful.
Maybe not the best way, but, inspired by this comment on the official repository:
iterator = bucket.list_blobs(delimiter='/', prefix=prefix)
response = iterator._get_next_page_response()
for prefix in response['prefixes']:
print('gs://'+bucket_name+'/'+prefix)
Gives:
gs://gcp-public-data-sentinel-2/tiles/01/
gs://gcp-public-data-sentinel-2/tiles/02/
gs://gcp-public-data-sentinel-2/tiles/03/
gs://gcp-public-data-sentinel-2/tiles/04/
gs://gcp-public-data-sentinel-2/tiles/05/
gs://gcp-public-data-sentinel-2/tiles/06/
gs://gcp-public-data-sentinel-2/tiles/07/
gs://gcp-public-data-sentinel-2/tiles/08/
gs://gcp-public-data-sentinel-2/tiles/09/
gs://gcp-public-data-sentinel-2/tiles/10/
...
If one finds this ticket like me after a long time: currently (google-cloud-storage 2.1.0) one can list the bucket contents using '//' instead of '/'. However, it lists "recursively" down to the actual blob (as it is not a real FS)
Here is a faster way (found this in a github thread, posted by #evanj https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920):
def list_gcs_directories(bucket, prefix):
iterator = bucket.list_blobs(prefix=prefix, delimiter='/')
prefixes = set()
for page in iterator.pages:
print(page, page.prefixes)
prefixes.update(page.prefixes)
return prefixes
You want to call this function as follows:
client = storage.Client()
bucket_name = 'my_bucket_name'
bucket_obj = client.bucket(bucket_name)
list_folders = list_gcs_directories(bucket_obj, prefix='my/prefix/path/within/bucket/')
# Getting rid of the prefix
list_folders = [''.join(indiv_folder.split('/')[-1])
for indiv_folder in list_folders]

mock s3 connection and boto.S3key to check set_content_from_string method

I am doing unit test with python mock. I've gone through blogs and python docs related to mocking but confuse about mocking the test case.
Here is the snippet for which I want to write test case.
The agenda is to test the method "set_contents_from_string()" using mock.
def write_to_customer_registry(customer):
# establish a connection with S3
conn = _connect_to_s3()
# build customer registry dict and convert it to json
customer_registry_dict = json.dumps(build_customer_registry_dict(customer))
# attempt to access requested bucket
bucket = _get_customer_bucket(conn)
s3_key = _get_customer_key(bucket, customer)
s3_key.set_metadata('Content-Type', 'application/json')
s3_key.set_contents_from_string(customer_registry_dict)
return s3_key
As you are testing some private methods I have added them to a module which I called s3.py that contains your code:
import json
def _connect_to_s3():
raise
def _get_customer_bucket(conn):
raise
def _get_customer_key(bucket, customer):
raise
def build_customer_registry_dict(cust):
raise
def write_to_customer_registry(customer):
# establish a connection with S3
conn = _connect_to_s3()
# build customer registry dict and convert it to json
customer_registry_dict = json.dumps(build_customer_registry_dict(customer))
# attempt to access requested bucket
bucket = _get_customer_bucket(conn)
s3_key = _get_customer_key(bucket, customer)
s3_key.set_metadata('Content-Type', 'application/json')
s3_key.set_contents_from_string(customer_registry_dict)
return s3_key
Next, in another module test_s3.py, I tested your code taking into account that for Unit Tests all interactions with third parties, such as network calls to s3 should be patched:
from unittest.mock import MagicMock, Mock, patch
from s3 import write_to_customer_registry
import json
#patch('json.dumps', return_value={})
#patch('s3._get_customer_key')
#patch('s3.build_customer_registry_dict')
#patch('s3._get_customer_bucket')
#patch('s3._connect_to_s3')
def test_write_to_customer_registry(connect_mock, get_bucket_mock, build_customer_registry_dict_mock, get_customer_key_mock, json_mock):
customer = MagicMock()
connect_mock.return_value = 'connection'
get_bucket_mock.return_value = 'bucket'
get_customer_key_mock.return_value = MagicMock()
write_to_customer_registry(customer)
assert connect_mock.call_count == 1
assert get_bucket_mock.call_count == 1
assert get_customer_key_mock.call_count == 1
get_bucket_mock.assert_called_with('connection')
get_customer_key_mock.assert_called_with('bucket', customer)
get_customer_key_mock.return_value.set_metadata.assert_called_with('Content-Type', 'application/json')
get_customer_key_mock.return_value.set_contents_from_string.assert_called_with({})
As you can see from the tests I am not testing that set_contents_from_string is doing what is supposed to do (since that should already be tested by the boto library) but that is being called with the proper arguments.
If you still doubt that the boto library is not properly testing such call you can always check it yourself in boto Github or boto3 Github
Something else you could test is that your are handling the different exceptions and edge cases in your code properly.
Finally, you can find more about patching and mocking in the docs. Usually the section about where to patch is really useful.
Some other resources are this blog post with python mock gotchas or this blog post I wrote myself (shameless self plug) after answering related pytest, patching and mocking questions in Stackoverflow.
came up with solution that worked for me, Posting it here, may be helpful for someone.
def setup(self):
self.customer = Customer.objects.create('tiertranstests')
self.customer.save()
def test_build_customer_registry(self):
mock_connection = Mock()
mock_bucket = Mock()
mock_s3_key = Mock()
customer_registry_dict = json.dumps(build_customer_registry_dict(self.customer))
# Patch S3 connection and Key class of registry method
with patch('<path>.customer_registry.S3Connection', Mock(return_value=mock_connection)),\
patch('<path>.customer_registry.Key', Mock(return_value=mock_s3_key)):
mock_connection.get_bucket = Mock(return_value=mock_bucket)
mock_s3_key.set_metadata.return_value = None
mock_s3_key.set_contents_from_string = Mock(return_value=customer_registry_dict)
write_to_customer_registry(self.customer)
mock_s3_key.set_contents_from_string.assert_called_once_with(customer_registry_dict)

how do I test methods using boto3 with moto

I am writing test cases for a quick class to find / fetch keys from s3, using boto3. I have used moto in the past to test boto (not 3) code but am trying to move to boto3 with this project, and running into an issue:
class TestS3Actor(unittest.TestCase):
#mock_s3
def setUp(self):
self.bucket_name = 'test_bucket_01'
self.key_name = 'stats_com/fake_fake/test.json'
self.key_contents = 'This is test data.'
s3 = boto3.session.Session().resource('s3')
s3.create_bucket(Bucket=self.bucket_name)
s3.Object(self.bucket_name, self.key_name).put(Body=self.key_contents)
error:
...
File "/Library/Python/2.7/site-packages/botocore/vendored/requests/packages/urllib3/connectionpool.py", line 344, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
File "/Library/Python/2.7/site-packages/botocore/vendored/requests/packages/urllib3/connectionpool.py", line 314, in _raise_timeout
if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
TypeError: __str__ returned non-string (type WantWriteError)
botocore.hooks: DEBUG: Event needs-retry.s3.CreateBucket: calling handler <botocore.retryhandler.RetryHandler object at 0x10ce75310>
It looks like moto is not mocking out the boto3 call correctly - how do I make that work?
What worked for me is setting up the environment with boto before running my mocked tests with boto3.
Here's a working snippet:
import unittest
import boto
from boto.s3.key import Key
from moto import mock_s3
import boto3
class TestS3Actor(unittest.TestCase):
mock_s3 = mock_s3()
def setUp(self):
self.mock_s3.start()
self.location = "eu-west-1"
self.bucket_name = 'test_bucket_01'
self.key_name = 'stats_com/fake_fake/test.json'
self.key_contents = 'This is test data.'
s3 = boto.connect_s3()
bucket = s3.create_bucket(self.bucket_name, location=self.location)
k = Key(bucket)
k.key = self.key_name
k.set_contents_from_string(self.key_contents)
def tearDown(self):
self.mock_s3.stop()
def test_s3_boto3(self):
s3 = boto3.resource('s3', region_name=self.location)
bucket = s3.Bucket(self.bucket_name)
assert bucket.name == self.bucket_name
# retrieve already setup keys
keys = list(bucket.objects.filter(Prefix=self.key_name))
assert len(keys) == 1
assert keys[0].key == self.key_name
# update key
s3.Object(self.bucket_name, self.key_name).put(Body='new')
key = s3.Object(self.bucket_name, self.key_name).get()
assert 'new' == key['Body'].read()
When run with py.test test.py you get the following output:
collected 1 items
test.py .
========================================================================================= 1 passed in 2.22 seconds =========================================================================================
According to this information, it looks like streaming upload to s3 using Boto3 S3 Put is not yet supported.
In my case, I used following to successfully upload an object to a bucket:
s3.Object(self.s3_bucket_name, self.s3_key).put(Body=open("file_to_upload", 'rb'))
where "file_to_upload" is your local file to be uploaded to s3 bucket. For your test case, you can just create a temporary file to check this functionality:
test_file = open("test_file.json", "w")
test_file.write("some test contents")
test_file.close()
s3.Object(self.s3_bucket_name, self.s3_key).put(Body=open("test_file", 'rb'))

Categories