From d916fd636fc0024002bca662ee9bc24500455c05 Mon Sep 17 00:00:00 2001 From: Bert Blommers Date: Thu, 14 Oct 2021 18:13:40 +0000 Subject: [PATCH] S3:list_object_versions() - Implement Delimiter + KeyMarker (#4413) --- moto/s3/models.py | 67 ++++++++++++++--------- moto/s3/responses.py | 40 +++++++------- tests/test_s3/test_s3.py | 113 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 175 insertions(+), 45 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index a9a2c300e..97205af59 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -19,7 +19,11 @@ from bisect import insort import pytz from moto.core import ACCOUNT_ID, BaseBackend, BaseModel, CloudFormationModel -from moto.core.utils import iso_8601_datetime_without_milliseconds_s3, rfc_1123_datetime +from moto.core.utils import ( + iso_8601_datetime_without_milliseconds_s3, + rfc_1123_datetime, + unix_time_millis, +) from moto.cloudwatch.models import MetricDatum from moto.utilities.tagging_service import TaggingService from .exceptions import ( @@ -1405,23 +1409,6 @@ class S3Backend(BaseBackend): def get_bucket_encryption(self, bucket_name): return self.get_bucket(bucket_name).encryption - def get_bucket_latest_versions(self, bucket_name): - versions = self.list_object_versions(bucket_name) - latest_modified_per_key = {} - latest_versions = {} - - for version in versions: - name = version.name - last_modified = version.last_modified - version_id = version.version_id - latest_modified_per_key[name] = max( - last_modified, latest_modified_per_key.get(name, datetime.datetime.min) - ) - if last_modified == latest_modified_per_key[name]: - latest_versions[name] = version_id - - return latest_versions - def list_object_versions( self, bucket_name, @@ -1434,14 +1421,44 @@ class S3Backend(BaseBackend): ): bucket = self.get_bucket(bucket_name) - if any((delimiter, key_marker, version_id_marker)): - raise NotImplementedError( - "Called get_bucket_versions with some of delimiter, encoding_type, key_marker, version_id_marker" - ) - - return itertools.chain( - *(l for key, l in bucket.keys.iterlists() if key.startswith(prefix)) + common_prefixes = [] + requested_versions = [] + delete_markers = [] + all_versions = itertools.chain( + *(copy.deepcopy(l) for key, l in bucket.keys.iterlists()) ) + all_versions = list(all_versions) + # sort by name, revert last-modified-date + all_versions.sort(key=lambda r: (r.name, -unix_time_millis(r.last_modified))) + last_name = None + for version in all_versions: + name = version.name + # guaranteed to be sorted - so the first key with this name will be the latest + version.is_latest = name != last_name + if version.is_latest: + last_name = name + # Differentiate between FakeKey and FakeDeleteMarkers + if not isinstance(version, FakeKey): + delete_markers.append(version) + continue + # skip all keys that alphabetically come before keymarker + if key_marker and name < key_marker: + continue + # Filter for keys that start with prefix + if not name.startswith(prefix): + continue + # separate out all keys that contain delimiter + if delimiter and delimiter in name: + index = name.index(delimiter) + len(delimiter) + prefix_including_delimiter = name[0:index] + common_prefixes.append(prefix_including_delimiter) + continue + + requested_versions.append(version) + + common_prefixes = sorted(set(common_prefixes)) + + return requested_versions, common_prefixes, delete_markers def get_bucket_policy(self, bucket_name): return self.get_bucket(bucket_name).policy diff --git a/moto/s3/responses.py b/moto/s3/responses.py index 2039c0cb9..7904253dc 100644 --- a/moto/s3/responses.py +++ b/moto/s3/responses.py @@ -11,7 +11,6 @@ from moto.core.utils import ( amzn_request_id, str_to_rfc_1123_datetime, py2_strip_unicode_keys, - unix_time_millis, ) from urllib.parse import ( parse_qs, @@ -474,7 +473,11 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin): version_id_marker = querystring.get("version-id-marker", [None])[0] bucket = self.backend.get_bucket(bucket_name) - versions = self.backend.list_object_versions( + ( + versions, + common_prefixes, + delete_markers, + ) = self.backend.list_object_versions( bucket_name, delimiter=delimiter, encoding_type=encoding_type, @@ -483,30 +486,21 @@ class ResponseObject(_TemplateEnvironmentMixin, ActionAuthenticatorMixin): version_id_marker=version_id_marker, prefix=prefix, ) - latest_versions = self.backend.get_bucket_latest_versions( - bucket_name=bucket_name - ) - key_list = [] - delete_marker_list = [] - for version in versions: - if isinstance(version, FakeKey): - key_list.append(version) - else: - delete_marker_list.append(version) + key_list = versions template = self.response_template(S3_BUCKET_GET_VERSIONS) - key_list.sort(key=lambda r: (r.name, -unix_time_millis(r.last_modified))) return ( 200, {}, template.render( + common_prefixes=common_prefixes, key_list=key_list, - delete_marker_list=delete_marker_list, - latest_versions=latest_versions, + delete_marker_list=delete_markers, bucket=bucket, prefix=prefix, max_keys=1000, - delimiter="", + delimiter=delimiter, + key_marker=key_marker, is_truncated="false", ), ) @@ -2243,14 +2237,22 @@ S3_BUCKET_GET_VERSIONS = """ {% if prefix != None %} {{ prefix }} {% endif %} - {{ key_marker }} + {% if common_prefixes %} + {% for prefix in common_prefixes %} + + {{ prefix }} + + {% endfor %} + {% endif %} + {{ delimiter }} + {{ key_marker or "" }} {{ max_keys }} {{ is_truncated }} {% for key in key_list %} {{ key.name }} {% if key.version_id is none %}null{% else %}{{ key.version_id }}{% endif %} - {% if latest_versions[key.name] == key.version_id %}true{% else %}false{% endif %} + {{ 'true' if key.is_latest else 'false' }} {{ key.last_modified_ISO8601 }} {{ key.etag }} {{ key.size }} @@ -2265,7 +2267,7 @@ S3_BUCKET_GET_VERSIONS = """ {{ marker.name }} {{ marker.version_id }} - {% if latest_versions[marker.name] == marker.version_id %}true{% else %}false{% endif %} + {{ 'true' if marker.is_latest else 'false' }} {{ marker.last_modified_ISO8601 }} 75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index 32301b103..31ad13c57 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -36,6 +36,7 @@ from moto import settings, mock_s3, mock_s3_deprecated, mock_config import moto.s3.models as s3model from moto.core.exceptions import InvalidNextTokenException from moto.settings import get_s3_default_key_buffer_size, S3_UPLOAD_PART_MIN_SIZE +from uuid import uuid4 if settings.TEST_SERVER_MODE: REDUCED_PART_SIZE = S3_UPLOAD_PART_MIN_SIZE @@ -4879,7 +4880,7 @@ def test_boto3_get_object_tagging(): @mock_s3 def test_boto3_list_object_versions(): s3 = boto3.client("s3", region_name=DEFAULT_REGION_NAME) - bucket_name = "mybucket" + bucket_name = "000" + str(uuid4()) key = "key-with-versions" s3.create_bucket(Bucket=bucket_name) s3.put_bucket_versioning( @@ -4902,6 +4903,116 @@ def test_boto3_list_object_versions(): response["Body"].read().should.equal(items[-1]) +@mock_s3 +def test_boto3_list_object_versions_with_delimiter(): + s3 = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + bucket_name = "000" + str(uuid4()) + s3.create_bucket(Bucket=bucket_name) + s3.put_bucket_versioning( + Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"} + ) + for key_index in list(range(1, 5)) + list(range(10, 14)): + for version_index in range(1, 4): + body = f"data-{version_index}".encode("UTF-8") + s3.put_object( + Bucket=bucket_name, Key=f"key{key_index}-with-data", Body=body + ) + s3.put_object( + Bucket=bucket_name, Key=f"key{key_index}-without-data", Body=b"" + ) + response = s3.list_object_versions(Bucket=bucket_name) + # All object versions should be returned + len(response["Versions"]).should.equal( + 48 + ) # 8 keys * 2 (one with, one without) * 3 versions per key + + # Use start of key as delimiter + response = s3.list_object_versions(Bucket=bucket_name, Delimiter="key1") + response.should.have.key("CommonPrefixes").equal([{"Prefix": "key1"}]) + response.should.have.key("Delimiter").equal("key1") + # 3 keys that do not contain the phrase 'key1' (key2, key3, key4) * * 2 * 3 + response.should.have.key("Versions").length_of(18) + + # Use in-between key as delimiter + response = s3.list_object_versions(Bucket=bucket_name, Delimiter="-with-") + response.should.have.key("CommonPrefixes").equal( + [ + {"Prefix": "key1-with-"}, + {"Prefix": "key10-with-"}, + {"Prefix": "key11-with-"}, + {"Prefix": "key12-with-"}, + {"Prefix": "key13-with-"}, + {"Prefix": "key2-with-"}, + {"Prefix": "key3-with-"}, + {"Prefix": "key4-with-"}, + ] + ) + response.should.have.key("Delimiter").equal("-with-") + # key(1/10/11/12/13)-without, key(2/3/4)-without + response.should.have.key("Versions").length_of(8 * 1 * 3) + + # Use in-between key as delimiter + response = s3.list_object_versions(Bucket=bucket_name, Delimiter="1-with-") + response.should.have.key("CommonPrefixes").equal( + [{"Prefix": "key1-with-"}, {"Prefix": "key11-with-"}] + ) + response.should.have.key("Delimiter").equal("1-with-") + response.should.have.key("Versions").length_of(42) + all_keys = set([v["Key"] for v in response["Versions"]]) + all_keys.should.contain("key1-without-data") + all_keys.shouldnt.contain("key1-with-data") + all_keys.should.contain("key4-with-data") + all_keys.should.contain("key4-without-data") + + # Use in-between key as delimiter + prefix + response = s3.list_object_versions( + Bucket=bucket_name, Prefix="key1", Delimiter="with-" + ) + response.should.have.key("CommonPrefixes").equal( + [ + {"Prefix": "key1-with-"}, + {"Prefix": "key10-with-"}, + {"Prefix": "key11-with-"}, + {"Prefix": "key12-with-"}, + {"Prefix": "key13-with-"}, + ] + ) + response.should.have.key("Delimiter").equal("with-") + response.should.have.key("KeyMarker").equal("") + response.shouldnt.have.key("NextKeyMarker") + response.should.have.key("Versions").length_of(15) + all_keys = set([v["Key"] for v in response["Versions"]]) + all_keys.should.equal( + { + "key1-without-data", + "key10-without-data", + "key11-without-data", + "key13-without-data", + "key12-without-data", + } + ) + + # Start at KeyMarker, and filter using Prefix+Delimiter for all subsequent keys + response = s3.list_object_versions( + Bucket=bucket_name, Prefix="key1", Delimiter="with-", KeyMarker="key11" + ) + response.should.have.key("CommonPrefixes").equal( + [ + {"Prefix": "key11-with-"}, + {"Prefix": "key12-with-"}, + {"Prefix": "key13-with-"}, + ] + ) + response.should.have.key("Delimiter").equal("with-") + response.should.have.key("KeyMarker").equal("key11") + response.shouldnt.have.key("NextKeyMarker") + response.should.have.key("Versions").length_of(9) + all_keys = set([v["Key"] for v in response["Versions"]]) + all_keys.should.equal( + {"key11-without-data", "key12-without-data", "key13-without-data"} + ) + + @mock_s3 def test_boto3_list_object_versions_with_versioning_disabled(): s3 = boto3.client("s3", region_name=DEFAULT_REGION_NAME)