From 2522bc495d71603fb6d38c41a34e6e9b122f27a1 Mon Sep 17 00:00:00 2001 From: Sven Ulland Date: Mon, 11 Dec 2023 21:03:03 +0100 Subject: [PATCH] S3: Add paging to list_object_versions() (#6896) --- moto/s3/models.py | 71 +++++++++--- moto/s3/responses.py | 34 +++++- tests/test_s3/test_s3.py | 239 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 325 insertions(+), 19 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index a1ece84df..c3fb764d8 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -1820,13 +1820,19 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): bucket_name: str, delimiter: Optional[str] = None, key_marker: Optional[str] = None, + max_keys: Optional[int] = 1000, prefix: str = "", - ) -> Tuple[List[FakeKey], List[str], List[FakeDeleteMarker]]: + version_id_marker: Optional[str] = None, + ) -> Tuple[ + List[FakeKey], List[str], List[FakeDeleteMarker], Optional[str], Optional[str] + ]: bucket = self.get_bucket(bucket_name) - common_prefixes: List[str] = [] + common_prefixes: Set[str] = set() requested_versions: List[FakeKey] = [] delete_markers: List[FakeDeleteMarker] = [] + next_key_marker: Optional[str] = None + next_version_id_marker: Optional[str] = None all_versions = list( itertools.chain( *( @@ -1838,37 +1844,72 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): # sort by name, revert last-modified-date all_versions.sort(key=lambda r: (r.name, -unix_time_millis(r.last_modified))) last_name = None - for version in all_versions: + + def key_or_version_match(ver: Union[FakeKey, FakeDeleteMarker]) -> bool: + if key_marker is None: + return True + + if version_id_marker is None or version_id_marker == "": + return ver.name >= key_marker + + return ver.name == key_marker and ver.version_id == version_id_marker + + for version in itertools.dropwhile( + lambda ver: not key_or_version_match(ver), + all_versions, + ): name = version.name # guaranteed to be sorted - so the first key with this name will be the latest version.is_latest = name != last_name if version.is_latest: last_name = name - # skip all keys that alphabetically come before keymarker - if key_marker and name < key_marker: - continue + # Filter for keys that start with prefix if not name.startswith(prefix): continue # separate keys that contain the same string between the prefix and the first occurrence of the delimiter + is_common_prefix = False if delimiter and delimiter in name[len(prefix) :]: end_of_delimiter = ( len(prefix) + name[len(prefix) :].index(delimiter) + len(delimiter) ) prefix_including_delimiter = name[0:end_of_delimiter] - common_prefixes.append(prefix_including_delimiter) - continue - # Differentiate between FakeKey and FakeDeleteMarkers - if not isinstance(version, FakeKey): - delete_markers.append(version) - continue + # Skip already-processed common prefix. + if prefix_including_delimiter == key_marker: + continue - requested_versions.append(version) + common_prefixes.add(prefix_including_delimiter) + name = prefix_including_delimiter + is_common_prefix = True - common_prefixes = sorted(set(common_prefixes)) + # Only return max_keys items. + if ( + max_keys is not None + and len(requested_versions) + len(delete_markers) + len(common_prefixes) + >= max_keys + ): + next_key_marker = name + next_version_id_marker = ( + version.version_id if not is_common_prefix else None + ) + break - return requested_versions, common_prefixes, delete_markers + if not is_common_prefix: + # Differentiate between FakeKey and FakeDeleteMarkers + if not isinstance(version, FakeKey): + delete_markers.append(version) + continue + + requested_versions.append(version) + + return ( + requested_versions, + sorted(common_prefixes), + delete_markers, + next_key_marker, + next_version_id_marker, + ) def get_bucket_policy(self, bucket_name: str) -> Optional[bytes]: return self.get_bucket(bucket_name).policy diff --git a/moto/s3/responses.py b/moto/s3/responses.py index 186ee4d71..efde8dea1 100644 --- a/moto/s3/responses.py +++ b/moto/s3/responses.py @@ -615,17 +615,31 @@ class S3Response(BaseResponse): elif "versions" in querystring: delimiter = querystring.get("delimiter", [None])[0] key_marker = querystring.get("key-marker", [None])[0] + max_keys = int(querystring.get("max-keys", [1000])[0]) prefix = querystring.get("prefix", [""])[0] + version_id_marker = querystring.get("version-id-marker", [None])[0] bucket = self.backend.get_bucket(bucket_name) ( versions, common_prefixes, delete_markers, + next_key_marker, + next_version_id_marker, ) = self.backend.list_object_versions( - bucket_name, delimiter=delimiter, key_marker=key_marker, prefix=prefix + bucket_name, + delimiter=delimiter, + key_marker=key_marker, + max_keys=max_keys, + prefix=prefix, + version_id_marker=version_id_marker, ) key_list = versions + + is_truncated = False + if next_key_marker is not None: + is_truncated = True + template = self.response_template(S3_BUCKET_GET_VERSIONS) return ( @@ -637,10 +651,13 @@ class S3Response(BaseResponse): delete_marker_list=delete_markers, bucket=bucket, prefix=prefix, - max_keys=1000, + max_keys=max_keys, delimiter=delimiter, key_marker=key_marker, - is_truncated="false", + version_id_marker=version_id_marker, + is_truncated=is_truncated, + next_key_marker=next_key_marker, + next_version_id_marker=next_version_id_marker, ), ) elif "encryption" in querystring: @@ -2603,8 +2620,17 @@ S3_BUCKET_GET_VERSIONS = """ {% endif %} {{ delimiter }} {{ key_marker or "" }} + {{ version_id_marker or "" }} {{ max_keys }} - {{ is_truncated }} + {% if is_truncated %} + true + {{ next_key_marker }} + {% if next_version_id_marker %} + {{ next_version_id_marker }} + {% endif %} + {% else %} + false + {% endif %} {% for key in key_list %} {{ key.name }} diff --git a/tests/test_s3/test_s3.py b/tests/test_s3/test_s3.py index db4aff3fe..941e79ec3 100644 --- a/tests/test_s3/test_s3.py +++ b/tests/test_s3/test_s3.py @@ -2626,6 +2626,245 @@ def test_list_object_versions_with_versioning_enabled_late(): assert response["Body"].read() == items[-1] +@mock_s3 +def test_list_object_versions_with_paging(): + s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + bucket_name = "000" + str(uuid4()) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_bucket_versioning( + Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"} + ) + + obj1ver1 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver1") + obj1ver2 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver2") + obj1ver3 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver3") + + page1 = s3_client.list_object_versions( + Bucket=bucket_name, + MaxKeys=2, + ) + + # Page should have two versions only, and should be truncated. + assert len(page1["Versions"]) == 2 + assert "DeleteMarkers" not in page1 + assert page1["IsTruncated"] is True + + # This should be obj1 ver3 (latest). + assert page1["Versions"][0]["VersionId"] == obj1ver3["VersionId"] + assert page1["Versions"][0]["IsLatest"] is True + + # This should be obj1 ver2. + assert page1["Versions"][1]["VersionId"] == obj1ver2["VersionId"] + + # The next key/version markers should point to obj1 ver1. + assert "NextKeyMarker" in page1 + assert page1["NextKeyMarker"] == "obj1" + assert "NextVersionIdMarker" in page1 + assert page1["NextVersionIdMarker"] == obj1ver1["VersionId"] + + # Second page should be the last page and have the oldest version. + page2 = s3_client.list_object_versions( + Bucket=bucket_name, + MaxKeys=2, + KeyMarker=page1["NextKeyMarker"], + VersionIdMarker=page1["NextVersionIdMarker"], + ) + + # Page should have one version only, and not be truncated. + assert len(page2["Versions"]) == 1 + assert "DeleteMarkers" not in page2 + assert page2["IsTruncated"] is False + assert "NextKeyMarker" not in page2 + assert "NextVersionIdMarker" not in page2 + + # This should be obj1 ver1. + assert page2["Versions"][0]["VersionId"] == obj1ver1["VersionId"] + + +@mock_s3 +def test_list_object_versions_with_paging_and_delete_markers(): + s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + bucket_name = "000" + str(uuid4()) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_bucket_versioning( + Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"} + ) + + # A mix of versions and delete markers. + obj1ver1 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver1") + obj2ver1 = s3_client.put_object(Bucket=bucket_name, Key="obj2", Body=b"ver1") + obj1dmk1 = s3_client.delete_object(Bucket=bucket_name, Key="obj1") + obj1dmk2 = s3_client.delete_object(Bucket=bucket_name, Key="obj1") + obj1dmk3 = s3_client.delete_object(Bucket=bucket_name, Key="obj1") + obj2dmk1 = s3_client.delete_object(Bucket=bucket_name, Key="obj2") + + page1 = s3_client.list_object_versions( + Bucket=bucket_name, + MaxKeys=2, + ) + + # Page should have one version and one delete marker, and be truncated. + assert "Versions" not in page1 + assert len(page1["DeleteMarkers"]) == 2 + assert page1["IsTruncated"] is True + + # This should be obj1 dmk3 (latest). + assert page1["DeleteMarkers"][0]["VersionId"] == obj1dmk3["VersionId"] + assert page1["DeleteMarkers"][0]["IsLatest"] is True + + # This should be obj1 dmk2. + assert page1["DeleteMarkers"][1]["VersionId"] == obj1dmk2["VersionId"] + + # The next key/version markers should point to obj1 dmk1. + assert "NextKeyMarker" in page1 + assert page1["NextKeyMarker"] == "obj1" + assert "NextVersionIdMarker" in page1 + assert page1["NextVersionIdMarker"] == obj1dmk1["VersionId"] + + page2 = s3_client.list_object_versions( + Bucket=bucket_name, + MaxKeys=2, + KeyMarker=page1["NextKeyMarker"], + VersionIdMarker=page1["NextVersionIdMarker"], + ) + + # Page should have one version and one delete marker, and be truncated. + assert len(page2["Versions"]) == 1 + assert len(page2["DeleteMarkers"]) == 1 + assert page2["IsTruncated"] is True + + # This should be obj1 ver1. + assert page2["Versions"][0]["VersionId"] == obj1ver1["VersionId"] + + # This should be obj1 dmk1. + assert page2["DeleteMarkers"][0]["VersionId"] == obj1dmk1["VersionId"] + + # The next key/version markers should point to obj2 dmk1. + assert "NextKeyMarker" in page2 + assert page2["NextKeyMarker"] == "obj2" + assert "NextVersionIdMarker" in page2 + assert page2["NextVersionIdMarker"] == obj2dmk1["VersionId"] + + page3 = s3_client.list_object_versions( + Bucket=bucket_name, + MaxKeys=2, + KeyMarker=page2["NextKeyMarker"], + VersionIdMarker=page2["NextVersionIdMarker"], + ) + + # Page should have one version and one delete marker, and not be truncated. + assert len(page3["Versions"]) == 1 + assert len(page3["DeleteMarkers"]) == 1 + assert page3["IsTruncated"] is False + + # This should be obj2 ver1. + assert page3["Versions"][0]["VersionId"] == obj2ver1["VersionId"] + + # This should be obj2 dmk1 (latest). + assert page3["DeleteMarkers"][0]["VersionId"] == obj2dmk1["VersionId"] + assert page3["DeleteMarkers"][0]["IsLatest"] + + # There should not be any next key/version marker. + assert "NextKeyMarker" not in page3 + assert "NextVersionIdMarker" not in page3 + + +@mock_s3 +def test_list_object_versions_with_paging_and_delimiter(): + s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + bucket_name = "000" + str(uuid4()) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_bucket_versioning( + Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"} + ) + + # Copied from test_list_object_versions_with_delimiter. + for key_index in list(range(1, 5)) + list(range(10, 14)): + for version_index in range(1, 4): + body = f"data-{version_index}".encode("UTF-8") + s3_client.put_object( + Bucket=bucket_name, Key=f"key{key_index}-with-data", Body=body + ) + s3_client.put_object( + Bucket=bucket_name, Key=f"key{key_index}-without-data", Body=b"" + ) + + page1 = s3_client.list_object_versions( + Bucket=bucket_name, + Delimiter="with-", + Prefix="key1", + KeyMarker="key11", + MaxKeys=5, + ) + + assert "Versions" in page1 + assert len(page1["Versions"]) == 3 + assert [v["Key"] for v in page1["Versions"]] == [ + "key11-without-data", + "key11-without-data", + "key11-without-data", + ] + + assert "CommonPrefixes" in page1 + assert [p["Prefix"] for p in page1["CommonPrefixes"]] == [ + "key11-with-", + "key12-with-", + ] + + assert page1["IsTruncated"] is True + assert page1["NextKeyMarker"] == "key12-with-" + assert "NextVersionIdMarker" not in page1 + + page2 = s3_client.list_object_versions( + Bucket=bucket_name, + Delimiter="with-", + Prefix="key1", + KeyMarker="key12-with-", + VersionIdMarker="", + MaxKeys=5, + ) + + assert "Versions" in page2 + assert len(page2["Versions"]) == 4 + assert [v["Key"] for v in page2["Versions"]] == [ + "key12-without-data", + "key12-without-data", + "key12-without-data", + "key13-without-data", + ] + + assert "CommonPrefixes" in page2 + assert [p["Prefix"] for p in page2["CommonPrefixes"]] == [ + "key13-with-", + ] + + assert page2["IsTruncated"] is True + assert page2["NextKeyMarker"] == "key13-without-data" + assert "NextVersionIdMarker" in page2 # FIXME check VersionId? + + page3 = s3_client.list_object_versions( + Bucket=bucket_name, + Delimiter="with-", + Prefix="key1", + KeyMarker="key13-without-data", + VersionIdMarker=page2["NextVersionIdMarker"], + MaxKeys=5, + ) + + assert "Versions" in page3 + assert len(page3["Versions"]) == 2 + assert [v["Key"] for v in page3["Versions"]] == [ + "key13-without-data", + "key13-without-data", + ] + + assert "CommonPrefixes" not in page3 + + assert page3["IsTruncated"] is False + assert "NextKeyMarker" not in page3 + assert "NextVersionIdMarker" not in page3 + + @mock_s3 def test_bad_prefix_list_object_versions(): s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)