I am writing test cases for a quick class to find / fetch keys from s3, using boto3. I have used moto in the past to test boto (not 3) code but am trying to move to boto3 with this project, and running into an issue:
class TestS3Actor(unittest.TestCase):
#mock_s3
def setUp(self):
self.bucket_name = 'test_bucket_01'
self.key_name = 'stats_com/fake_fake/test.json'
self.key_contents = 'This is test data.'
s3 = boto3.session.Session().resource('s3')
s3.create_bucket(Bucket=self.bucket_name)
s3.Object(self.bucket_name, self.key_name).put(Body=self.key_contents)
error:
...
File "/Library/Python/2.7/site-packages/botocore/vendored/requests/packages/urllib3/connectionpool.py", line 344, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
File "/Library/Python/2.7/site-packages/botocore/vendored/requests/packages/urllib3/connectionpool.py", line 314, in _raise_timeout
if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
TypeError: __str__ returned non-string (type WantWriteError)
botocore.hooks: DEBUG: Event needs-retry.s3.CreateBucket: calling handler <botocore.retryhandler.RetryHandler object at 0x10ce75310>
It looks like moto is not mocking out the boto3 call correctly - how do I make that work?
What worked for me is setting up the environment with boto before running my mocked tests with boto3.
Here's a working snippet:
import unittest
import boto
from boto.s3.key import Key
from moto import mock_s3
import boto3
class TestS3Actor(unittest.TestCase):
mock_s3 = mock_s3()
def setUp(self):
self.mock_s3.start()
self.location = "eu-west-1"
self.bucket_name = 'test_bucket_01'
self.key_name = 'stats_com/fake_fake/test.json'
self.key_contents = 'This is test data.'
s3 = boto.connect_s3()
bucket = s3.create_bucket(self.bucket_name, location=self.location)
k = Key(bucket)
k.key = self.key_name
k.set_contents_from_string(self.key_contents)
def tearDown(self):
self.mock_s3.stop()
def test_s3_boto3(self):
s3 = boto3.resource('s3', region_name=self.location)
bucket = s3.Bucket(self.bucket_name)
assert bucket.name == self.bucket_name
# retrieve already setup keys
keys = list(bucket.objects.filter(Prefix=self.key_name))
assert len(keys) == 1
assert keys[0].key == self.key_name
# update key
s3.Object(self.bucket_name, self.key_name).put(Body='new')
key = s3.Object(self.bucket_name, self.key_name).get()
assert 'new' == key['Body'].read()
When run with py.test test.py you get the following output:
collected 1 items
test.py .
========================================================================================= 1 passed in 2.22 seconds =========================================================================================
According to this information, it looks like streaming upload to s3 using Boto3 S3 Put is not yet supported.
In my case, I used following to successfully upload an object to a bucket:
s3.Object(self.s3_bucket_name, self.s3_key).put(Body=open("file_to_upload", 'rb'))
where "file_to_upload" is your local file to be uploaded to s3 bucket. For your test case, you can just create a temporary file to check this functionality:
test_file = open("test_file.json", "w")
test_file.write("some test contents")
test_file.close()
s3.Object(self.s3_bucket_name, self.s3_key).put(Body=open("test_file", 'rb'))
Related
I'd like to get all archives from a specific directory on S3 bucket like the following:
def get_files_from_s3(bucket_name, s3_prefix):
files = []
s3_resource = boto3.resource("s3")
bucket = s3_resource.Bucket(bucket_name)
response = bucket.objects.filter(Prefix=s3_prefix)
for obj in response:
if obj.key.endswidth('.zip'):
# get all archives
files.append(obj.key)
return files
My question is about testing it; because I'd like to mock the list of objects in the response to be able to iterate on it. Here is what I tried:
from unittest.mock import patch
from dataclasses import dataclass
#dataclass
class MockZip:
key = 'file.zip'
#patch('module.boto3')
def test_get_files_from_s3(self, mock_boto3):
bucket = mock_boto3.resource('s3').Bucket(self.bucket_name)
response = bucket.objects.filter(Prefix=S3_PREFIX)
response.return_value = [MockZip()]
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
I get an assertion error like this: E AssertionError: ['file.zip'] != []
Does anyone have a better approach? I used struct but I don't think this is the problem, I guess I get an empty list because the response is not iterable. So how can I mock it to be a list of mock objects instead of just a MockMagick type?
Thanks
You could use moto, which is an open-source libray specifically build to mock boto3-calls. It allows you to work directly with boto3, without having to worry about setting up mocks manually.
The testfunction that you're currently using would look like this:
from moto import mock_s3
#pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials, to ensure we're not touching AWS directly"""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
#mock_s3
def test_get_files_from_s3(self, aws_credentials):
s3 = boto3.resource('s3')
bucket = s3.Bucket(self.bucket_name)
# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket.create()
# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-east-1')
client.put_object(Bucket=self.bucket_name, Key="file.zip", Body="...")
client.put_object(Bucket=self.bucket_name, Key="file.nonzip", Body="...")
# Retrieve the files again using whatever logic
files = module.get_files_from_s3(BUCKET_NAME, S3_PREFIX)
self.assertEqual(['file.zip'], files)
Full documentation for Moto can be found here:
http://docs.getmoto.org/en/latest/index.html
Disclaimer: I am a maintainer for Moto.
I'm running a Python unit test on this script api.py
def download_models(gcs_model_path, local_path):
filesystem = gcsfs.GCSFileSystem(project=PROJECT) # , token='creds.json')
# download models from gcs
filesystem.get(gcs_model_path, local_path, recursive=True)
Here is the unit test script embedding_api_test.py
class EmbeddingAPITest(unittest.TestCase):
#mock.patch('gcsfs.GCSFileSystem')
def test_download_models(self, mock_filesystem):
mock_filesystem.return_value.get.return_value = []
download_models('gcs_model_path', 'local_path')
I'm getting this error on my terminal:
_______________________ ERROR collecting qa_api/tests/qa_api_test.py ________________________
qa_api/tests/qa_api_test.py:3: in <module>
from qa_api.api import app
qa_api/api.py:37: in <module>
download_models(GCS_MODEL_PATH, LOCAL_MODEL_PATH)
qa_api/api.py:21: in download_models
filesystem.get(gcs_model_path, local_path, recursive=True)
/opt/anaconda3/lib/python3.7/site-packages/fsspec/spec.py:556: in get
rpaths = self.find(rpath)
/opt/anaconda3/lib/python3.7/site-packages/fsspec/spec.py:371: in find
for path, dirs, files in self.walk(path, maxdepth, **kwargs):
/opt/anaconda3/lib/python3.7/site-packages/fsspec/spec.py:326: in walk
listing = self.ls(path, detail=True, **kwargs)
/opt/anaconda3/lib/python3.7/site-packages/gcsfs/core.py:767: in ls
out = self._list_objects(path)
/opt/anaconda3/lib/python3.7/site-packages/gcsfs/core.py:571: in _list_objects
items, prefixes = self._do_list_objects(path)
/opt/anaconda3/lib/python3.7/site-packages/gcsfs/core.py:604: in _do_list_objects
maxResults=max_results,
/opt/anaconda3/lib/python3.7/site-packages/gcsfs/core.py:504: in _call
raise e
/opt/anaconda3/lib/python3.7/site-packages/gcsfs/core.py:487: in _call
validate_response(r, path)
/opt/anaconda3/lib/python3.7/site-packages/gcsfs/core.py:130: in validate_response
raise HttpError(error)
E gcsfs.utils.HttpError: Anonymous caller does not have storage.objects.list access to the Google Cloud Storage bucket.
It seems I'm mocking it incorrectly but I don't understand why. Would really appreciate any help. Please let me know if more details are required. Thanks!
You should patch 'api.gcsfs.GCSFileSystem'. E.g.
api.py:
import gcsfs
PROJECT = "teresa.teng"
def download_models(gcs_model_path, local_path):
filesystem = gcsfs.GCSFileSystem(project=PROJECT)
filesystem.get(gcs_model_path, local_path, recursive=True)
test_api.py:
import unittest
from unittest import mock
from api import download_models
class EmbeddingAPITest(unittest.TestCase):
#mock.patch('api.gcsfs.GCSFileSystem')
def test_download_models(self, mock_filesystem):
mock_filesystem.return_value.get.return_value = []
download_models('gcs_model_path', 'local_path')
mock_filesystem.assert_called_once_with(project="teresa.teng")
mock_filesystem.return_value.get.assert_called_once_with('gcs_model_path', 'local_path', recursive=True)
if __name__ == '__main__':
unittest.main()
unit test result:
.
----------------------------------------------------------------------
Ran 1 test in 0.001s
OK
Name Stmts Miss Cover Missing
----------------------------------------------------------------------
src/stackoverflow/64673419/api.py 5 0 100%
src/stackoverflow/64673419/test_api.py 11 0 100%
----------------------------------------------------------------------
TOTAL 16 0 100%
requirements.txt:
gcsfs==0.7.1
I've got code that downloads a file from an S3 bucket using boto3.
# foo.py
def dl(src_f, dest_f):
s3 = boto3.resource('s3')
s3.Bucket('mybucket').download_file(src_f, dest_f)
I'd now like to write a unit test for dl() using pytest and by mocking the interaction with AWS using the stubber available in botocore.
#pytest.fixture
def s3_client():
yield boto3.client("s3")
from foo import dl
def test_dl(s3_client):
with Stubber(s3_client) as stubber:
params = {"Bucket": ANY, "Key": ANY}
response = {"Body": "lorem"}
stubber.add_response(SOME_OBJ, response, params)
dl('bucket_file.txt', 'tmp/bucket_file.txt')
assert os.path.isfile('tmp/bucket_file.txt')
I'm not sure about the right approach for this. How do I add bucket_file.txt to the stubbed reponse? What object do I need to add_response() to (shown as SOME_OBJ)?
Have you considered using moto3?
Your code could look the same way as it is right now:
# foo.py
def dl(src_f, dest_f):
s3 = boto3.resource('s3')
s3.Bucket('mybucket').download_file(src_f, dest_f)
and the test:
import boto3
import os
from moto import mock_s3
#mock_s3
def test_dl():
s3 = boto3.client('s3', region_name='us-east-1')
# We need to create the bucket since this is all in Moto's 'virtual' AWS account
s3.create_bucket(Bucket='mybucket')
s3.put_object(Bucket='mybucket', Key= 'bucket_file.txt', Body='')
dl('bucket_file.txt', 'bucket_file.txt')
assert os.path.isfile('bucket_file.txt')
The intention of the code becomes a bit more obvious since you simply work with s3 as usual, except for there is no real s3 behind the method calls.
I am looking for all the methods for moving/copying the data from one folder to another on AWS S3 bucket.
Method 1: Via AWS CLI (Most easy)
Download and install awscli on ur instance, I am using here windows(64-bit link) and run "asw configure" to fill up your configuration and just run this single command on cmd
aws s3 cp s3://from-source/ s3://to-destination/ --recursive
Here cp for copy and recursive to copy all files
Method 2: Via AWS CLI using python
import os
import awscli
if os.environ.get('LC_CTYPE', '') == 'UTF-8':
os.environ['LC_CTYPE'] = 'en_US.UTF-8'
from awscli.clidriver import create_clidriver
driver = create_clidriver()
driver.main('s3 mv s3://staging/AwsTesting/research/ s3://staging/AwsTesting/research_archive/ --recursive'.split())
Even this worked for me perfectly
Method 3: Via Boto using python
import boto3
s3 = boto3.resource('s3')
copy_source = {
'Bucket': 'staging',
'Key': '/AwsTesting/research/'
}
s3.meta.client.copy(copy_source, 'staging', '/AwsTesting/research_archive/')
With my understanding I have assumed the 'key' for bucket is just the folder prefix so I have mentioned the folder path here
Error:
Invalid bucket name "s3://staging": Bucket name must match the regex "^[a-zA-Z0-9.-_]{1,255}$"
Even I changed it to simple bucket name as "staging" but no success.
How can I understand bucket connectivity via boto and the concept of this key?
import boto3
s3 = boto3.resource('s3')
copy_source = {
'Bucket': 'staging',
'Key': 'AwsTesting/research/filename.csv'
}
s3.meta.client.copy(copy_source, 'staging', 'AwsTesting/')
An alternative to using cp with the CLI is sync - https://docs.aws.amazon.com/cli/latest/reference/s3/sync.html
aws s3 sync s3://mybucket s3://mybucket2
It will essentially do the same thing.
Use the following snippet which is working.
def s3candidateavtarcopy(old,new):
try:
response = s3.list_objects_v2(Bucket = s3_candidate_bucket,Prefix=old)
keycount = response['KeyCount']
if(keycount > 0):
for key in response['Contents']:
file = key['Key']
try:
output = file.split(old)
newfile = new + output[1]
input_source = {'Bucket': s3_candidate_bucket,'Key' : file }
s3_resource.Object(s3_candidate_bucket,newfile).copy_from(CopySource=input_source)
except ClientError as e:
print(e.response['Error']['Message'])
else:
print('Success')
else:
print('No matching records')
except ClientError as e:
print(e.response['Error']['Message'])
else:
print('Operatio completed')
I am doing unit test with python mock. I've gone through blogs and python docs related to mocking but confuse about mocking the test case.
Here is the snippet for which I want to write test case.
The agenda is to test the method "set_contents_from_string()" using mock.
def write_to_customer_registry(customer):
# establish a connection with S3
conn = _connect_to_s3()
# build customer registry dict and convert it to json
customer_registry_dict = json.dumps(build_customer_registry_dict(customer))
# attempt to access requested bucket
bucket = _get_customer_bucket(conn)
s3_key = _get_customer_key(bucket, customer)
s3_key.set_metadata('Content-Type', 'application/json')
s3_key.set_contents_from_string(customer_registry_dict)
return s3_key
As you are testing some private methods I have added them to a module which I called s3.py that contains your code:
import json
def _connect_to_s3():
raise
def _get_customer_bucket(conn):
raise
def _get_customer_key(bucket, customer):
raise
def build_customer_registry_dict(cust):
raise
def write_to_customer_registry(customer):
# establish a connection with S3
conn = _connect_to_s3()
# build customer registry dict and convert it to json
customer_registry_dict = json.dumps(build_customer_registry_dict(customer))
# attempt to access requested bucket
bucket = _get_customer_bucket(conn)
s3_key = _get_customer_key(bucket, customer)
s3_key.set_metadata('Content-Type', 'application/json')
s3_key.set_contents_from_string(customer_registry_dict)
return s3_key
Next, in another module test_s3.py, I tested your code taking into account that for Unit Tests all interactions with third parties, such as network calls to s3 should be patched:
from unittest.mock import MagicMock, Mock, patch
from s3 import write_to_customer_registry
import json
#patch('json.dumps', return_value={})
#patch('s3._get_customer_key')
#patch('s3.build_customer_registry_dict')
#patch('s3._get_customer_bucket')
#patch('s3._connect_to_s3')
def test_write_to_customer_registry(connect_mock, get_bucket_mock, build_customer_registry_dict_mock, get_customer_key_mock, json_mock):
customer = MagicMock()
connect_mock.return_value = 'connection'
get_bucket_mock.return_value = 'bucket'
get_customer_key_mock.return_value = MagicMock()
write_to_customer_registry(customer)
assert connect_mock.call_count == 1
assert get_bucket_mock.call_count == 1
assert get_customer_key_mock.call_count == 1
get_bucket_mock.assert_called_with('connection')
get_customer_key_mock.assert_called_with('bucket', customer)
get_customer_key_mock.return_value.set_metadata.assert_called_with('Content-Type', 'application/json')
get_customer_key_mock.return_value.set_contents_from_string.assert_called_with({})
As you can see from the tests I am not testing that set_contents_from_string is doing what is supposed to do (since that should already be tested by the boto library) but that is being called with the proper arguments.
If you still doubt that the boto library is not properly testing such call you can always check it yourself in boto Github or boto3 Github
Something else you could test is that your are handling the different exceptions and edge cases in your code properly.
Finally, you can find more about patching and mocking in the docs. Usually the section about where to patch is really useful.
Some other resources are this blog post with python mock gotchas or this blog post I wrote myself (shameless self plug) after answering related pytest, patching and mocking questions in Stackoverflow.
came up with solution that worked for me, Posting it here, may be helpful for someone.
def setup(self):
self.customer = Customer.objects.create('tiertranstests')
self.customer.save()
def test_build_customer_registry(self):
mock_connection = Mock()
mock_bucket = Mock()
mock_s3_key = Mock()
customer_registry_dict = json.dumps(build_customer_registry_dict(self.customer))
# Patch S3 connection and Key class of registry method
with patch('<path>.customer_registry.S3Connection', Mock(return_value=mock_connection)),\
patch('<path>.customer_registry.Key', Mock(return_value=mock_s3_key)):
mock_connection.get_bucket = Mock(return_value=mock_bucket)
mock_s3_key.set_metadata.return_value = None
mock_s3_key.set_contents_from_string = Mock(return_value=customer_registry_dict)
write_to_customer_registry(self.customer)
mock_s3_key.set_contents_from_string.assert_called_once_with(customer_registry_dict)