From b0a280bde2c7a722c8d7e1b278c3b458781ecf6d Mon Sep 17 00:00:00 2001 From: Diego Argueta Date: Tue, 18 Dec 2018 14:08:37 -0800 Subject: [PATCH 1/6] Move S3 storage to SpooledTemporaryFile --- moto/s3/models.py | 42 ++++++++++++++----- tests/test_s3/test_s3.py | 4 +- tests/test_s3/test_server.py | 2 +- .../test_bucket_path_server.py | 6 +-- .../test_s3bucket_path/test_s3bucket_path.py | 4 +- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index bb4d7848c..4ce2afb34 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -8,6 +8,7 @@ import itertools import codecs import random import string +import tempfile import six @@ -21,6 +22,7 @@ from .utils import clean_key_name, _VersionedKeyStore UPLOAD_ID_BYTES = 43 UPLOAD_PART_MIN_SIZE = 5242880 STORAGE_CLASS = ["STANDARD", "REDUCED_REDUNDANCY", "STANDARD_IA", "ONEZONE_IA"] +DEFAULT_KEY_BUFFER_SIZE = 2 ** 24 class FakeDeleteMarker(BaseModel): @@ -42,9 +44,9 @@ class FakeDeleteMarker(BaseModel): class FakeKey(BaseModel): - def __init__(self, name, value, storage="STANDARD", etag=None, is_versioned=False, version_id=0): + def __init__(self, name, value, storage="STANDARD", etag=None, is_versioned=False, version_id=0, + max_buffer_size=DEFAULT_KEY_BUFFER_SIZE): self.name = name - self.value = value self.last_modified = datetime.datetime.utcnow() self.acl = get_canned_acl('private') self.website_redirect_location = None @@ -56,10 +58,24 @@ class FakeKey(BaseModel): self._is_versioned = is_versioned self._tagging = FakeTagging() + self.value_buffer = tempfile.SpooledTemporaryFile(max_size=max_buffer_size) + self.value = value + @property def version_id(self): return self._version_id + @property + def value(self): + self.value_buffer.seek(0) + return self.value_buffer.read() + + @value.setter + def value(self, new_value): + self.value_buffer.seek(0) + self.value_buffer.truncate() + self.value_buffer.write(new_value) + def copy(self, new_name=None): r = copy.deepcopy(self) if new_name is not None: @@ -83,7 +99,9 @@ class FakeKey(BaseModel): self.acl = acl def append_to_value(self, value): - self.value += value + self.value_buffer.seek(0, os.SEEK_END) + self.value_buffer.write(value) + self.last_modified = datetime.datetime.utcnow() self._etag = None # must recalculate etag if self._is_versioned: @@ -101,11 +119,14 @@ class FakeKey(BaseModel): def etag(self): if self._etag is None: value_md5 = hashlib.md5() - if isinstance(self.value, six.text_type): - value = self.value.encode("utf-8") - else: - value = self.value - value_md5.update(value) + + self.value_buffer.seek(0) + while True: + block = self.value_buffer.read(DEFAULT_KEY_BUFFER_SIZE) + if not block: + break + value_md5.update(block) + self._etag = value_md5.hexdigest() return '"{0}"'.format(self._etag) @@ -132,7 +153,7 @@ class FakeKey(BaseModel): res = { 'ETag': self.etag, 'last-modified': self.last_modified_RFC1123, - 'content-length': str(len(self.value)), + 'content-length': str(self.size), } if self._storage_class != 'STANDARD': res['x-amz-storage-class'] = self._storage_class @@ -150,7 +171,8 @@ class FakeKey(BaseModel): @property def size(self): - return len(self.value) + self.value_buffer.seek(0, os.SEEK_END) + return self.value_buffer.tell() @property def storage_class(self): diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index 6e339abb6..07137bcfa 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -524,7 +524,7 @@ def test_post_to_bucket(): requests.post("https://foobar.s3.amazonaws.com/", { 'key': 'the-key', - 'file': 'nothing' + 'file': b'nothing' }) bucket.get_key('the-key').get_contents_as_string().should.equal(b'nothing') @@ -538,7 +538,7 @@ def test_post_with_metadata_to_bucket(): requests.post("https://foobar.s3.amazonaws.com/", { 'key': 'the-key', - 'file': 'nothing', + 'file': b'nothing', 'x-amz-meta-test': 'metadata' }) diff --git a/tests/test_s3/test_server.py b/tests/test_s3/test_server.py index 9c8252a04..934dd5a42 100644 --- a/tests/test_s3/test_server.py +++ b/tests/test_s3/test_server.py @@ -72,7 +72,7 @@ def test_s3_server_post_to_bucket(): test_client.post('/', "https://tester.localhost:5000/", data={ 'key': 'the-key', - 'file': 'nothing' + 'file': b'nothing' }) res = test_client.get('/the-key', 'http://tester.localhost:5000/') diff --git a/tests/test_s3bucket_path/test_bucket_path_server.py b/tests/test_s3bucket_path/test_bucket_path_server.py index 434110e87..604adc289 100644 --- a/tests/test_s3bucket_path/test_bucket_path_server.py +++ b/tests/test_s3bucket_path/test_bucket_path_server.py @@ -73,7 +73,7 @@ def test_s3_server_post_to_bucket(): test_client.post('/foobar2', "https://localhost:5000/", data={ 'key': 'the-key', - 'file': 'nothing' + 'file': b'nothing' }) res = test_client.get('/foobar2/the-key', 'http://localhost:5000/') @@ -89,7 +89,7 @@ def test_s3_server_put_ipv6(): test_client.post('/foobar2', "https://[::]:5000/", data={ 'key': 'the-key', - 'file': 'nothing' + 'file': b'nothing' }) res = test_client.get('/foobar2/the-key', 'http://[::]:5000/') @@ -105,7 +105,7 @@ def test_s3_server_put_ipv4(): test_client.post('/foobar2', "https://127.0.0.1:5000/", data={ 'key': 'the-key', - 'file': 'nothing' + 'file': b'nothing' }) res = test_client.get('/foobar2/the-key', 'http://127.0.0.1:5000/') diff --git a/tests/test_s3bucket_path/test_s3bucket_path.py b/tests/test_s3bucket_path/test_s3bucket_path.py index 21d786c61..58ae4db6f 100644 --- a/tests/test_s3bucket_path/test_s3bucket_path.py +++ b/tests/test_s3bucket_path/test_s3bucket_path.py @@ -198,7 +198,7 @@ def test_post_to_bucket(): requests.post("https://s3.amazonaws.com/foobar", { 'key': 'the-key', - 'file': 'nothing' + 'file': b'nothing' }) bucket.get_key('the-key').get_contents_as_string().should.equal(b'nothing') @@ -212,7 +212,7 @@ def test_post_with_metadata_to_bucket(): requests.post("https://s3.amazonaws.com/foobar", { 'key': 'the-key', - 'file': 'nothing', + 'file': b'nothing', 'x-amz-meta-test': 'metadata' }) From 26adaeefa0cb24983aa46bc873a16f857f2bb686 Mon Sep 17 00:00:00 2001 From: Diego Argueta Date: Tue, 18 Dec 2018 14:08:51 -0800 Subject: [PATCH 2/6] Start testing on 3.7 --- .travis.yml | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3a5de0fa2..bf0222afb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,4 @@ +dist: xenial language: python sudo: false services: @@ -5,22 +6,10 @@ services: python: - 2.7 - 3.6 + - 3.7 env: - TEST_SERVER_MODE=false - TEST_SERVER_MODE=true -# Due to incomplete Python 3.7 support on Travis CI ( -# https://github.com/travis-ci/travis-ci/issues/9815), -# using a matrix is necessary -matrix: - include: - - python: 3.7 - env: TEST_SERVER_MODE=false - dist: xenial - sudo: true - - python: 3.7 - env: TEST_SERVER_MODE=true - dist: xenial - sudo: true before_install: - export BOTO_CONFIG=/dev/null - export AWS_SECRET_ACCESS_KEY=foobar_secret From 2cc8784e5c31b95659df5b9fdf4f755ad37c726a Mon Sep 17 00:00:00 2001 From: Diego Argueta Date: Tue, 18 Dec 2018 14:53:52 -0800 Subject: [PATCH 3/6] Restore files modified in non-working fix. --- tests/test_s3/test_s3.py | 4 ++-- tests/test_s3/test_server.py | 2 +- tests/test_s3bucket_path/test_bucket_path_server.py | 6 +++--- tests/test_s3bucket_path/test_s3bucket_path.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index 07137bcfa..6e339abb6 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -524,7 +524,7 @@ def test_post_to_bucket(): requests.post("https://foobar.s3.amazonaws.com/", { 'key': 'the-key', - 'file': b'nothing' + 'file': 'nothing' }) bucket.get_key('the-key').get_contents_as_string().should.equal(b'nothing') @@ -538,7 +538,7 @@ def test_post_with_metadata_to_bucket(): requests.post("https://foobar.s3.amazonaws.com/", { 'key': 'the-key', - 'file': b'nothing', + 'file': 'nothing', 'x-amz-meta-test': 'metadata' }) diff --git a/tests/test_s3/test_server.py b/tests/test_s3/test_server.py index 934dd5a42..9c8252a04 100644 --- a/tests/test_s3/test_server.py +++ b/tests/test_s3/test_server.py @@ -72,7 +72,7 @@ def test_s3_server_post_to_bucket(): test_client.post('/', "https://tester.localhost:5000/", data={ 'key': 'the-key', - 'file': b'nothing' + 'file': 'nothing' }) res = test_client.get('/the-key', 'http://tester.localhost:5000/') diff --git a/tests/test_s3bucket_path/test_bucket_path_server.py b/tests/test_s3bucket_path/test_bucket_path_server.py index 604adc289..434110e87 100644 --- a/tests/test_s3bucket_path/test_bucket_path_server.py +++ b/tests/test_s3bucket_path/test_bucket_path_server.py @@ -73,7 +73,7 @@ def test_s3_server_post_to_bucket(): test_client.post('/foobar2', "https://localhost:5000/", data={ 'key': 'the-key', - 'file': b'nothing' + 'file': 'nothing' }) res = test_client.get('/foobar2/the-key', 'http://localhost:5000/') @@ -89,7 +89,7 @@ def test_s3_server_put_ipv6(): test_client.post('/foobar2', "https://[::]:5000/", data={ 'key': 'the-key', - 'file': b'nothing' + 'file': 'nothing' }) res = test_client.get('/foobar2/the-key', 'http://[::]:5000/') @@ -105,7 +105,7 @@ def test_s3_server_put_ipv4(): test_client.post('/foobar2', "https://127.0.0.1:5000/", data={ 'key': 'the-key', - 'file': b'nothing' + 'file': 'nothing' }) res = test_client.get('/foobar2/the-key', 'http://127.0.0.1:5000/') diff --git a/tests/test_s3bucket_path/test_s3bucket_path.py b/tests/test_s3bucket_path/test_s3bucket_path.py index 58ae4db6f..21d786c61 100644 --- a/tests/test_s3bucket_path/test_s3bucket_path.py +++ b/tests/test_s3bucket_path/test_s3bucket_path.py @@ -198,7 +198,7 @@ def test_post_to_bucket(): requests.post("https://s3.amazonaws.com/foobar", { 'key': 'the-key', - 'file': b'nothing' + 'file': 'nothing' }) bucket.get_key('the-key').get_contents_as_string().should.equal(b'nothing') @@ -212,7 +212,7 @@ def test_post_with_metadata_to_bucket(): requests.post("https://s3.amazonaws.com/foobar", { 'key': 'the-key', - 'file': b'nothing', + 'file': 'nothing', 'x-amz-meta-test': 'metadata' }) From f15f006f7834ab69a5ee38ae30955fdc3863562e Mon Sep 17 00:00:00 2001 From: Diego Argueta <620513-dargueta@users.noreply.github.com> Date: Thu, 20 Dec 2018 00:34:04 -0800 Subject: [PATCH 4/6] Hack around text problem in unit tests. Now that payloads are not allowed to be text, some unit tests will cause crashes on Python 3 because the payload sent by requests gets passed to FakeKey as a string instead of raw bytes. I haven't been able to figure out a way around the issue that doesn't get super messy inside s3/responses.py so I'm just converting the value to bytes using the system's default encoding. --- moto/s3/models.py | 7 +++++++ tox.ini | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/moto/s3/models.py b/moto/s3/models.py index 4ce2afb34..525b3c81a 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -9,6 +9,7 @@ import codecs import random import string import tempfile +import sys import six @@ -23,6 +24,7 @@ UPLOAD_ID_BYTES = 43 UPLOAD_PART_MIN_SIZE = 5242880 STORAGE_CLASS = ["STANDARD", "REDUCED_REDUNDANCY", "STANDARD_IA", "ONEZONE_IA"] DEFAULT_KEY_BUFFER_SIZE = 2 ** 24 +DEFAULT_TEXT_ENCODING = sys.getdefaultencoding() class FakeDeleteMarker(BaseModel): @@ -74,6 +76,11 @@ class FakeKey(BaseModel): def value(self, new_value): self.value_buffer.seek(0) self.value_buffer.truncate() + + # Hack for working around moto's own unit tests; this probably won't + # actually get hit in normal use. + if isinstance(new_value, six.text_type): + new_value = new_value.encode(DEFAULT_TEXT_ENCODING) self.value_buffer.write(new_value) def copy(self, new_name=None): diff --git a/tox.ini b/tox.ini index 0f3f1466a..454409d72 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,10 @@ envlist = py27, py36 [testenv] +setenv = + BOTO_CONFIG=/dev/null + AWS_SECRET_ACCESS_KEY=foobar_secret + AWS_ACCESS_KEY_ID=foobar_key deps = -r{toxinidir}/requirements.txt -r{toxinidir}/requirements-dev.txt From 191ad6d778c9f0769726dac312032c86bbbb48f4 Mon Sep 17 00:00:00 2001 From: Diego Argueta Date: Thu, 20 Dec 2018 11:15:15 -0800 Subject: [PATCH 5/6] Make keys pickleable --- moto/s3/models.py | 48 ++++++++++++++++++++++++++++------------ tests/test_s3/test_s3.py | 11 +++++++++ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index 525b3c81a..767a8332f 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -23,7 +23,7 @@ from .utils import clean_key_name, _VersionedKeyStore UPLOAD_ID_BYTES = 43 UPLOAD_PART_MIN_SIZE = 5242880 STORAGE_CLASS = ["STANDARD", "REDUCED_REDUNDANCY", "STANDARD_IA", "ONEZONE_IA"] -DEFAULT_KEY_BUFFER_SIZE = 2 ** 24 +DEFAULT_KEY_BUFFER_SIZE = 16 * 1024 * 1024 DEFAULT_TEXT_ENCODING = sys.getdefaultencoding() @@ -60,7 +60,8 @@ class FakeKey(BaseModel): self._is_versioned = is_versioned self._tagging = FakeTagging() - self.value_buffer = tempfile.SpooledTemporaryFile(max_size=max_buffer_size) + self._value_buffer = tempfile.SpooledTemporaryFile(max_size=max_buffer_size) + self._max_buffer_size = max_buffer_size self.value = value @property @@ -69,19 +70,19 @@ class FakeKey(BaseModel): @property def value(self): - self.value_buffer.seek(0) - return self.value_buffer.read() + self._value_buffer.seek(0) + return self._value_buffer.read() @value.setter def value(self, new_value): - self.value_buffer.seek(0) - self.value_buffer.truncate() + self._value_buffer.seek(0) + self._value_buffer.truncate() # Hack for working around moto's own unit tests; this probably won't # actually get hit in normal use. if isinstance(new_value, six.text_type): new_value = new_value.encode(DEFAULT_TEXT_ENCODING) - self.value_buffer.write(new_value) + self._value_buffer.write(new_value) def copy(self, new_name=None): r = copy.deepcopy(self) @@ -106,8 +107,8 @@ class FakeKey(BaseModel): self.acl = acl def append_to_value(self, value): - self.value_buffer.seek(0, os.SEEK_END) - self.value_buffer.write(value) + self._value_buffer.seek(0, os.SEEK_END) + self._value_buffer.write(value) self.last_modified = datetime.datetime.utcnow() self._etag = None # must recalculate etag @@ -126,10 +127,9 @@ class FakeKey(BaseModel): def etag(self): if self._etag is None: value_md5 = hashlib.md5() - - self.value_buffer.seek(0) + self._value_buffer.seek(0) while True: - block = self.value_buffer.read(DEFAULT_KEY_BUFFER_SIZE) + block = self._value_buffer.read(DEFAULT_KEY_BUFFER_SIZE) if not block: break value_md5.update(block) @@ -178,8 +178,8 @@ class FakeKey(BaseModel): @property def size(self): - self.value_buffer.seek(0, os.SEEK_END) - return self.value_buffer.tell() + self._value_buffer.seek(0, os.SEEK_END) + return self._value_buffer.tell() @property def storage_class(self): @@ -190,6 +190,26 @@ class FakeKey(BaseModel): if self._expiry is not None: return self._expiry.strftime("%a, %d %b %Y %H:%M:%S GMT") + # Keys need to be pickleable due to some implementation details of boto3. + # Since file objects aren't pickleable, we need to override the default + # behavior. The following is adapted from the Python docs: + # https://docs.python.org/3/library/pickle.html#handling-stateful-objects + def __getstate__(self): + state = self.__dict__.copy() + state['value'] = self.value + del state['_value_buffer'] + return state + + def __setstate__(self, state): + self.__dict__.update({ + k: v for k, v in six.iteritems(state) + if k != 'value' + }) + + self._value_buffer = \ + tempfile.SpooledTemporaryFile(max_size=self._max_buffer_size) + self.value = state['value'] + class FakeMultipart(BaseModel): diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index 6e339abb6..3af214c39 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -8,6 +8,7 @@ from functools import wraps from gzip import GzipFile from io import BytesIO import zlib +import pickle import json import boto @@ -65,6 +66,16 @@ class MyModel(object): s3.put_object(Bucket='mybucket', Key=self.name, Body=self.value) +@mock_s3 +def test_keys_are_pickleable(): + """Keys must be pickleable due to boto3 implementation details.""" + key = s3model.FakeKey('name', b'data!') + pickled = pickle.dumps(key) + loaded = pickle.loads(pickled) + + assert loaded.value == key.value + + @mock_s3 def test_my_model_save(): # Create Bucket so that test can run From 1998d59cfcbff4cf6240a16889c2c98cd7e5a416 Mon Sep 17 00:00:00 2001 From: Diego Argueta Date: Thu, 20 Dec 2018 11:40:13 -0800 Subject: [PATCH 6/6] Add more tests to please Coveralls --- tests/test_s3/test_s3.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index 3af214c39..10fd68f48 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -70,12 +70,46 @@ class MyModel(object): def test_keys_are_pickleable(): """Keys must be pickleable due to boto3 implementation details.""" key = s3model.FakeKey('name', b'data!') + assert key.value == b'data!' + pickled = pickle.dumps(key) loaded = pickle.loads(pickled) - assert loaded.value == key.value +@mock_s3 +def test_append_to_value__basic(): + key = s3model.FakeKey('name', b'data!') + assert key.value == b'data!' + assert key.size == 5 + + key.append_to_value(b' And even more data') + assert key.value == b'data! And even more data' + assert key.size == 24 + + +@mock_s3 +def test_append_to_value__nothing_added(): + key = s3model.FakeKey('name', b'data!') + assert key.value == b'data!' + assert key.size == 5 + + key.append_to_value(b'') + assert key.value == b'data!' + assert key.size == 5 + + +@mock_s3 +def test_append_to_value__empty_key(): + key = s3model.FakeKey('name', b'') + assert key.value == b'' + assert key.size == 0 + + key.append_to_value(b'stuff') + assert key.value == b'stuff' + assert key.size == 5 + + @mock_s3 def test_my_model_save(): # Create Bucket so that test can run