From 84ec801a522e60d69ca0566ac33dfae318d6a708 Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Wed, 2 Apr 2014 14:40:04 +0300 Subject: [PATCH 1/2] refactor etag calculation to support a correct multipart key etag --- moto/s3/models.py | 28 ++++++++++++++++++---------- tests/test_s3/test_s3.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index 03a7d4873..7f3ad50f8 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -14,13 +14,14 @@ UPLOAD_PART_MIN_SIZE = 5242880 class FakeKey(object): - def __init__(self, name, value, storage="STANDARD"): + def __init__(self, name, value, storage="STANDARD", etag=None): self.name = name self.value = value self.last_modified = datetime.datetime.now() self._storage_class = storage self._metadata = {} self._expiry = None + self._etag = etag def copy(self, new_name=None): r = copy.deepcopy(self) @@ -40,15 +41,18 @@ class FakeKey(object): def append_to_value(self, value): self.value += value self.last_modified = datetime.datetime.now() + self._etag = None # must recalculate etag def restore(self, days): self._expiry = datetime.datetime.now() + datetime.timedelta(days) @property def etag(self): - value_md5 = hashlib.md5() - value_md5.update(bytes(self.value)) - return '"{0}"'.format(value_md5.hexdigest()) + if self._etag is None: + value_md5 = hashlib.md5() + value_md5.update(bytes(self.value)) + self._etag = value_md5.hexdigest() + return '"{0}"'.format(self._etag) @property def last_modified_ISO8601(self): @@ -99,14 +103,18 @@ class FakeMultipart(object): def complete(self): total = bytearray() + md5s = bytearray() last_part_name = len(self.list_parts()) for part in self.list_parts(): if part.name != last_part_name and len(part.value) < UPLOAD_PART_MIN_SIZE: - return + return None, None + md5s.extend(part.etag.replace('"', '').decode('hex')) total.extend(part.value) - return total + etag = hashlib.md5() + etag.update(bytes(md5s)) + return total, "{}-{}".format(etag.hexdigest(), last_part_name) def set_part(self, part_id, value): if part_id < 1: @@ -163,12 +171,12 @@ class S3Backend(BaseBackend): return self.buckets.pop(bucket_name) return None - def set_key(self, bucket_name, key_name, value, storage=None): + def set_key(self, bucket_name, key_name, value, storage=None, etag=None): key_name = clean_key_name(key_name) bucket = self.buckets[bucket_name] new_key = FakeKey(name=key_name, value=value, - storage=storage) + storage=storage, etag=etag) bucket.keys[key_name] = new_key return new_key @@ -196,12 +204,12 @@ class S3Backend(BaseBackend): def complete_multipart(self, bucket_name, multipart_id): bucket = self.buckets[bucket_name] multipart = bucket.multiparts[multipart_id] - value = multipart.complete() + value, etag = multipart.complete() if value is None: return del bucket.multiparts[multipart_id] - return self.set_key(bucket_name, multipart.key_name, value) + return self.set_key(bucket_name, multipart.key_name, value, etag=etag) def cancel_multipart(self, bucket_name, multipart_id): bucket = self.buckets[bucket_name] diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index 3e8a53917..9d9a5f063 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -38,6 +38,20 @@ def test_my_model_save(): conn.get_bucket('mybucket').get_key('steve').get_contents_as_string().should.equal('is awesome') +@mock_s3 +def test_key_etag(): + # Create Bucket so that test can run + conn = boto.connect_s3('the_key', 'the_secret') + conn.create_bucket('mybucket') + #################################### + + model_instance = MyModel('steve', 'is awesome') + model_instance.save() + + conn.get_bucket('mybucket').get_key('steve').etag.should.equal( + '"d32bda93738f7e03adb22e66c90fbc04"') + + @mock_s3 def test_multipart_upload_too_small(): conn = boto.connect_s3('the_key', 'the_secret') @@ -95,6 +109,24 @@ def test_multipart_upload_cancel(): # have the ability to list mulipart uploads for a bucket. +@mock_s3 +def test_multipart_etag(): + # Create Bucket so that test can run + conn = boto.connect_s3('the_key', 'the_secret') + bucket = conn.create_bucket('mybucket') + + multipart = bucket.initiate_multipart_upload("the-key") + part1 = '0' * 5242880 + multipart.upload_part_from_file(BytesIO(part1), 1) + # last part, can be less than 5 MB + part2 = '1' + multipart.upload_part_from_file(BytesIO(part2), 2) + multipart.complete_upload() + # we should get both parts as the key contents + bucket.get_key("the-key").etag.should.equal( + '"140f92a6df9f9e415f74a1463bcee9bb-2"') + + @mock_s3 def test_missing_key(): conn = boto.connect_s3('the_key', 'the_secret') From 9954612035c1018a295a1fe9690bbdb2f6658ceb Mon Sep 17 00:00:00 2001 From: Konstantinos Koukopoulos Date: Wed, 2 Apr 2014 16:34:29 +0300 Subject: [PATCH 2/2] add format field numbers for python 2.6 support --- moto/s3/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index 7f3ad50f8..76da68657 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -114,7 +114,7 @@ class FakeMultipart(object): etag = hashlib.md5() etag.update(bytes(md5s)) - return total, "{}-{}".format(etag.hexdigest(), last_part_name) + return total, "{0}-{1}".format(etag.hexdigest(), last_part_name) def set_part(self, part_id, value): if part_id < 1: