Merge pull request #1999 from dargueta/s3-spooling

Don't store S3 entirely in memory
This commit is contained in:
Steve Pulec 2018-12-28 21:29:19 -05:00 committed by GitHub
commit 97d9d46770
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 23 deletions

View File

@ -1,3 +1,4 @@
dist: xenial
language: python language: python
sudo: false sudo: false
services: services:
@ -5,22 +6,10 @@ services:
python: python:
- 2.7 - 2.7
- 3.6 - 3.6
- 3.7
env: env:
- TEST_SERVER_MODE=false - TEST_SERVER_MODE=false
- TEST_SERVER_MODE=true - TEST_SERVER_MODE=true
# Due to incomplete Python 3.7 support on Travis CI (
# https://github.com/travis-ci/travis-ci/issues/9815),
# using a matrix is necessary
matrix:
include:
- python: 3.7
env: TEST_SERVER_MODE=false
dist: xenial
sudo: true
- python: 3.7
env: TEST_SERVER_MODE=true
dist: xenial
sudo: true
before_install: before_install:
- export BOTO_CONFIG=/dev/null - export BOTO_CONFIG=/dev/null
install: install:

View File

@ -8,6 +8,8 @@ import itertools
import codecs import codecs
import random import random
import string import string
import tempfile
import sys
import six import six
@ -23,6 +25,8 @@ MIN_BUCKET_NAME_LENGTH = 3
UPLOAD_ID_BYTES = 43 UPLOAD_ID_BYTES = 43
UPLOAD_PART_MIN_SIZE = 5242880 UPLOAD_PART_MIN_SIZE = 5242880
STORAGE_CLASS = ["STANDARD", "REDUCED_REDUNDANCY", "STANDARD_IA", "ONEZONE_IA"] STORAGE_CLASS = ["STANDARD", "REDUCED_REDUNDANCY", "STANDARD_IA", "ONEZONE_IA"]
DEFAULT_KEY_BUFFER_SIZE = 16 * 1024 * 1024
DEFAULT_TEXT_ENCODING = sys.getdefaultencoding()
class FakeDeleteMarker(BaseModel): class FakeDeleteMarker(BaseModel):
@ -44,9 +48,9 @@ class FakeDeleteMarker(BaseModel):
class FakeKey(BaseModel): class FakeKey(BaseModel):
def __init__(self, name, value, storage="STANDARD", etag=None, is_versioned=False, version_id=0): def __init__(self, name, value, storage="STANDARD", etag=None, is_versioned=False, version_id=0,
max_buffer_size=DEFAULT_KEY_BUFFER_SIZE):
self.name = name self.name = name
self.value = value
self.last_modified = datetime.datetime.utcnow() self.last_modified = datetime.datetime.utcnow()
self.acl = get_canned_acl('private') self.acl = get_canned_acl('private')
self.website_redirect_location = None self.website_redirect_location = None
@ -58,10 +62,30 @@ class FakeKey(BaseModel):
self._is_versioned = is_versioned self._is_versioned = is_versioned
self._tagging = FakeTagging() self._tagging = FakeTagging()
self._value_buffer = tempfile.SpooledTemporaryFile(max_size=max_buffer_size)
self._max_buffer_size = max_buffer_size
self.value = value
@property @property
def version_id(self): def version_id(self):
return self._version_id return self._version_id
@property
def value(self):
self._value_buffer.seek(0)
return self._value_buffer.read()
@value.setter
def value(self, new_value):
self._value_buffer.seek(0)
self._value_buffer.truncate()
# Hack for working around moto's own unit tests; this probably won't
# actually get hit in normal use.
if isinstance(new_value, six.text_type):
new_value = new_value.encode(DEFAULT_TEXT_ENCODING)
self._value_buffer.write(new_value)
def copy(self, new_name=None): def copy(self, new_name=None):
r = copy.deepcopy(self) r = copy.deepcopy(self)
if new_name is not None: if new_name is not None:
@ -85,7 +109,9 @@ class FakeKey(BaseModel):
self.acl = acl self.acl = acl
def append_to_value(self, value): def append_to_value(self, value):
self.value += value self._value_buffer.seek(0, os.SEEK_END)
self._value_buffer.write(value)
self.last_modified = datetime.datetime.utcnow() self.last_modified = datetime.datetime.utcnow()
self._etag = None # must recalculate etag self._etag = None # must recalculate etag
if self._is_versioned: if self._is_versioned:
@ -103,11 +129,13 @@ class FakeKey(BaseModel):
def etag(self): def etag(self):
if self._etag is None: if self._etag is None:
value_md5 = hashlib.md5() value_md5 = hashlib.md5()
if isinstance(self.value, six.text_type): self._value_buffer.seek(0)
value = self.value.encode("utf-8") while True:
else: block = self._value_buffer.read(DEFAULT_KEY_BUFFER_SIZE)
value = self.value if not block:
value_md5.update(value) break
value_md5.update(block)
self._etag = value_md5.hexdigest() self._etag = value_md5.hexdigest()
return '"{0}"'.format(self._etag) return '"{0}"'.format(self._etag)
@ -134,7 +162,7 @@ class FakeKey(BaseModel):
res = { res = {
'ETag': self.etag, 'ETag': self.etag,
'last-modified': self.last_modified_RFC1123, 'last-modified': self.last_modified_RFC1123,
'content-length': str(len(self.value)), 'content-length': str(self.size),
} }
if self._storage_class != 'STANDARD': if self._storage_class != 'STANDARD':
res['x-amz-storage-class'] = self._storage_class res['x-amz-storage-class'] = self._storage_class
@ -152,7 +180,8 @@ class FakeKey(BaseModel):
@property @property
def size(self): def size(self):
return len(self.value) self._value_buffer.seek(0, os.SEEK_END)
return self._value_buffer.tell()
@property @property
def storage_class(self): def storage_class(self):
@ -163,6 +192,26 @@ class FakeKey(BaseModel):
if self._expiry is not None: if self._expiry is not None:
return self._expiry.strftime("%a, %d %b %Y %H:%M:%S GMT") return self._expiry.strftime("%a, %d %b %Y %H:%M:%S GMT")
# Keys need to be pickleable due to some implementation details of boto3.
# Since file objects aren't pickleable, we need to override the default
# behavior. The following is adapted from the Python docs:
# https://docs.python.org/3/library/pickle.html#handling-stateful-objects
def __getstate__(self):
state = self.__dict__.copy()
state['value'] = self.value
del state['_value_buffer']
return state
def __setstate__(self, state):
self.__dict__.update({
k: v for k, v in six.iteritems(state)
if k != 'value'
})
self._value_buffer = \
tempfile.SpooledTemporaryFile(max_size=self._max_buffer_size)
self.value = state['value']
class FakeMultipart(BaseModel): class FakeMultipart(BaseModel):

View File

@ -8,6 +8,7 @@ from functools import wraps
from gzip import GzipFile from gzip import GzipFile
from io import BytesIO from io import BytesIO
import zlib import zlib
import pickle
import json import json
import boto import boto
@ -65,6 +66,50 @@ class MyModel(object):
s3.put_object(Bucket='mybucket', Key=self.name, Body=self.value) s3.put_object(Bucket='mybucket', Key=self.name, Body=self.value)
@mock_s3
def test_keys_are_pickleable():
"""Keys must be pickleable due to boto3 implementation details."""
key = s3model.FakeKey('name', b'data!')
assert key.value == b'data!'
pickled = pickle.dumps(key)
loaded = pickle.loads(pickled)
assert loaded.value == key.value
@mock_s3
def test_append_to_value__basic():
key = s3model.FakeKey('name', b'data!')
assert key.value == b'data!'
assert key.size == 5
key.append_to_value(b' And even more data')
assert key.value == b'data! And even more data'
assert key.size == 24
@mock_s3
def test_append_to_value__nothing_added():
key = s3model.FakeKey('name', b'data!')
assert key.value == b'data!'
assert key.size == 5
key.append_to_value(b'')
assert key.value == b'data!'
assert key.size == 5
@mock_s3
def test_append_to_value__empty_key():
key = s3model.FakeKey('name', b'')
assert key.value == b''
assert key.size == 0
key.append_to_value(b'stuff')
assert key.value == b'stuff'
assert key.size == 5
@mock_s3 @mock_s3
def test_my_model_save(): def test_my_model_save():
# Create Bucket so that test can run # Create Bucket so that test can run

View File

@ -2,6 +2,10 @@
envlist = py27, py36 envlist = py27, py36
[testenv] [testenv]
setenv =
BOTO_CONFIG=/dev/null
AWS_SECRET_ACCESS_KEY=foobar_secret
AWS_ACCESS_KEY_ID=foobar_key
deps = deps =
-r{toxinidir}/requirements.txt -r{toxinidir}/requirements.txt
-r{toxinidir}/requirements-dev.txt -r{toxinidir}/requirements-dev.txt