refactor etag calculation to support a correct multipart key etag

This commit is contained in:
Konstantinos Koukopoulos 2014-04-02 14:40:04 +03:00
parent f764e9e0f3
commit 84ec801a52
2 changed files with 50 additions and 10 deletions

View File

@ -14,13 +14,14 @@ UPLOAD_PART_MIN_SIZE = 5242880
class FakeKey(object):
def __init__(self, name, value, storage="STANDARD"):
def __init__(self, name, value, storage="STANDARD", etag=None):
self.name = name
self.value = value
self.last_modified = datetime.datetime.now()
self._storage_class = storage
self._metadata = {}
self._expiry = None
self._etag = etag
def copy(self, new_name=None):
r = copy.deepcopy(self)
@ -40,15 +41,18 @@ class FakeKey(object):
def append_to_value(self, value):
self.value += value
self.last_modified = datetime.datetime.now()
self._etag = None # must recalculate etag
def restore(self, days):
self._expiry = datetime.datetime.now() + datetime.timedelta(days)
@property
def etag(self):
value_md5 = hashlib.md5()
value_md5.update(bytes(self.value))
return '"{0}"'.format(value_md5.hexdigest())
if self._etag is None:
value_md5 = hashlib.md5()
value_md5.update(bytes(self.value))
self._etag = value_md5.hexdigest()
return '"{0}"'.format(self._etag)
@property
def last_modified_ISO8601(self):
@ -99,14 +103,18 @@ class FakeMultipart(object):
def complete(self):
total = bytearray()
md5s = bytearray()
last_part_name = len(self.list_parts())
for part in self.list_parts():
if part.name != last_part_name and len(part.value) < UPLOAD_PART_MIN_SIZE:
return
return None, None
md5s.extend(part.etag.replace('"', '').decode('hex'))
total.extend(part.value)
return total
etag = hashlib.md5()
etag.update(bytes(md5s))
return total, "{}-{}".format(etag.hexdigest(), last_part_name)
def set_part(self, part_id, value):
if part_id < 1:
@ -163,12 +171,12 @@ class S3Backend(BaseBackend):
return self.buckets.pop(bucket_name)
return None
def set_key(self, bucket_name, key_name, value, storage=None):
def set_key(self, bucket_name, key_name, value, storage=None, etag=None):
key_name = clean_key_name(key_name)
bucket = self.buckets[bucket_name]
new_key = FakeKey(name=key_name, value=value,
storage=storage)
storage=storage, etag=etag)
bucket.keys[key_name] = new_key
return new_key
@ -196,12 +204,12 @@ class S3Backend(BaseBackend):
def complete_multipart(self, bucket_name, multipart_id):
bucket = self.buckets[bucket_name]
multipart = bucket.multiparts[multipart_id]
value = multipart.complete()
value, etag = multipart.complete()
if value is None:
return
del bucket.multiparts[multipart_id]
return self.set_key(bucket_name, multipart.key_name, value)
return self.set_key(bucket_name, multipart.key_name, value, etag=etag)
def cancel_multipart(self, bucket_name, multipart_id):
bucket = self.buckets[bucket_name]

View File

@ -38,6 +38,20 @@ def test_my_model_save():
conn.get_bucket('mybucket').get_key('steve').get_contents_as_string().should.equal('is awesome')
@mock_s3
def test_key_etag():
# Create Bucket so that test can run
conn = boto.connect_s3('the_key', 'the_secret')
conn.create_bucket('mybucket')
####################################
model_instance = MyModel('steve', 'is awesome')
model_instance.save()
conn.get_bucket('mybucket').get_key('steve').etag.should.equal(
'"d32bda93738f7e03adb22e66c90fbc04"')
@mock_s3
def test_multipart_upload_too_small():
conn = boto.connect_s3('the_key', 'the_secret')
@ -95,6 +109,24 @@ def test_multipart_upload_cancel():
# have the ability to list mulipart uploads for a bucket.
@mock_s3
def test_multipart_etag():
# Create Bucket so that test can run
conn = boto.connect_s3('the_key', 'the_secret')
bucket = conn.create_bucket('mybucket')
multipart = bucket.initiate_multipart_upload("the-key")
part1 = '0' * 5242880
multipart.upload_part_from_file(BytesIO(part1), 1)
# last part, can be less than 5 MB
part2 = '1'
multipart.upload_part_from_file(BytesIO(part2), 2)
multipart.complete_upload()
# we should get both parts as the key contents
bucket.get_key("the-key").etag.should.equal(
'"140f92a6df9f9e415f74a1463bcee9bb-2"')
@mock_s3
def test_missing_key():
conn = boto.connect_s3('the_key', 'the_secret')