Merge pull request #1999 from dargueta/s3-spooling

Don't store S3 entirely in memory
This commit is contained in:
Steve Pulec 2018-12-28 21:29:19 -05:00 committed by GitHub
commit 97d9d46770
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 23 deletions

View File

@ -1,3 +1,4 @@
dist: xenial
language: python
sudo: false
services:
@ -5,22 +6,10 @@ services:
python:
- 2.7
- 3.6
- 3.7
env:
- TEST_SERVER_MODE=false
- TEST_SERVER_MODE=true
# Due to incomplete Python 3.7 support on Travis CI (
# https://github.com/travis-ci/travis-ci/issues/9815),
# using a matrix is necessary
matrix:
include:
- python: 3.7
env: TEST_SERVER_MODE=false
dist: xenial
sudo: true
- python: 3.7
env: TEST_SERVER_MODE=true
dist: xenial
sudo: true
before_install:
- export BOTO_CONFIG=/dev/null
install:

View File

@ -8,6 +8,8 @@ import itertools
import codecs
import random
import string
import tempfile
import sys
import six
@ -23,6 +25,8 @@ MIN_BUCKET_NAME_LENGTH = 3
UPLOAD_ID_BYTES = 43
UPLOAD_PART_MIN_SIZE = 5242880
STORAGE_CLASS = ["STANDARD", "REDUCED_REDUNDANCY", "STANDARD_IA", "ONEZONE_IA"]
DEFAULT_KEY_BUFFER_SIZE = 16 * 1024 * 1024
DEFAULT_TEXT_ENCODING = sys.getdefaultencoding()
class FakeDeleteMarker(BaseModel):
@ -44,9 +48,9 @@ class FakeDeleteMarker(BaseModel):
class FakeKey(BaseModel):
def __init__(self, name, value, storage="STANDARD", etag=None, is_versioned=False, version_id=0):
def __init__(self, name, value, storage="STANDARD", etag=None, is_versioned=False, version_id=0,
max_buffer_size=DEFAULT_KEY_BUFFER_SIZE):
self.name = name
self.value = value
self.last_modified = datetime.datetime.utcnow()
self.acl = get_canned_acl('private')
self.website_redirect_location = None
@ -58,10 +62,30 @@ class FakeKey(BaseModel):
self._is_versioned = is_versioned
self._tagging = FakeTagging()
self._value_buffer = tempfile.SpooledTemporaryFile(max_size=max_buffer_size)
self._max_buffer_size = max_buffer_size
self.value = value
@property
def version_id(self):
return self._version_id
@property
def value(self):
self._value_buffer.seek(0)
return self._value_buffer.read()
@value.setter
def value(self, new_value):
self._value_buffer.seek(0)
self._value_buffer.truncate()
# Hack for working around moto's own unit tests; this probably won't
# actually get hit in normal use.
if isinstance(new_value, six.text_type):
new_value = new_value.encode(DEFAULT_TEXT_ENCODING)
self._value_buffer.write(new_value)
def copy(self, new_name=None):
r = copy.deepcopy(self)
if new_name is not None:
@ -85,7 +109,9 @@ class FakeKey(BaseModel):
self.acl = acl
def append_to_value(self, value):
self.value += value
self._value_buffer.seek(0, os.SEEK_END)
self._value_buffer.write(value)
self.last_modified = datetime.datetime.utcnow()
self._etag = None # must recalculate etag
if self._is_versioned:
@ -103,11 +129,13 @@ class FakeKey(BaseModel):
def etag(self):
if self._etag is None:
value_md5 = hashlib.md5()
if isinstance(self.value, six.text_type):
value = self.value.encode("utf-8")
else:
value = self.value
value_md5.update(value)
self._value_buffer.seek(0)
while True:
block = self._value_buffer.read(DEFAULT_KEY_BUFFER_SIZE)
if not block:
break
value_md5.update(block)
self._etag = value_md5.hexdigest()
return '"{0}"'.format(self._etag)
@ -134,7 +162,7 @@ class FakeKey(BaseModel):
res = {
'ETag': self.etag,
'last-modified': self.last_modified_RFC1123,
'content-length': str(len(self.value)),
'content-length': str(self.size),
}
if self._storage_class != 'STANDARD':
res['x-amz-storage-class'] = self._storage_class
@ -152,7 +180,8 @@ class FakeKey(BaseModel):
@property
def size(self):
return len(self.value)
self._value_buffer.seek(0, os.SEEK_END)
return self._value_buffer.tell()
@property
def storage_class(self):
@ -163,6 +192,26 @@ class FakeKey(BaseModel):
if self._expiry is not None:
return self._expiry.strftime("%a, %d %b %Y %H:%M:%S GMT")
# Keys need to be pickleable due to some implementation details of boto3.
# Since file objects aren't pickleable, we need to override the default
# behavior. The following is adapted from the Python docs:
# https://docs.python.org/3/library/pickle.html#handling-stateful-objects
def __getstate__(self):
state = self.__dict__.copy()
state['value'] = self.value
del state['_value_buffer']
return state
def __setstate__(self, state):
self.__dict__.update({
k: v for k, v in six.iteritems(state)
if k != 'value'
})
self._value_buffer = \
tempfile.SpooledTemporaryFile(max_size=self._max_buffer_size)
self.value = state['value']
class FakeMultipart(BaseModel):

View File

@ -8,6 +8,7 @@ from functools import wraps
from gzip import GzipFile
from io import BytesIO
import zlib
import pickle
import json
import boto
@ -65,6 +66,50 @@ class MyModel(object):
s3.put_object(Bucket='mybucket', Key=self.name, Body=self.value)
@mock_s3
def test_keys_are_pickleable():
"""Keys must be pickleable due to boto3 implementation details."""
key = s3model.FakeKey('name', b'data!')
assert key.value == b'data!'
pickled = pickle.dumps(key)
loaded = pickle.loads(pickled)
assert loaded.value == key.value
@mock_s3
def test_append_to_value__basic():
key = s3model.FakeKey('name', b'data!')
assert key.value == b'data!'
assert key.size == 5
key.append_to_value(b' And even more data')
assert key.value == b'data! And even more data'
assert key.size == 24
@mock_s3
def test_append_to_value__nothing_added():
key = s3model.FakeKey('name', b'data!')
assert key.value == b'data!'
assert key.size == 5
key.append_to_value(b'')
assert key.value == b'data!'
assert key.size == 5
@mock_s3
def test_append_to_value__empty_key():
key = s3model.FakeKey('name', b'')
assert key.value == b''
assert key.size == 0
key.append_to_value(b'stuff')
assert key.value == b'stuff'
assert key.size == 5
@mock_s3
def test_my_model_save():
# Create Bucket so that test can run

View File

@ -2,6 +2,10 @@
envlist = py27, py36
[testenv]
setenv =
BOTO_CONFIG=/dev/null
AWS_SECRET_ACCESS_KEY=foobar_secret
AWS_ACCESS_KEY_ID=foobar_key
deps =
-r{toxinidir}/requirements.txt
-r{toxinidir}/requirements-dev.txt