S3: Add paging to list_object_versions() (#6896)

This commit is contained in:
Sven Ulland 2023-12-11 21:03:03 +01:00 committed by GitHub
parent 7295622837
commit 2522bc495d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 325 additions and 19 deletions

View File

@ -1820,13 +1820,19 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
bucket_name: str,
delimiter: Optional[str] = None,
key_marker: Optional[str] = None,
max_keys: Optional[int] = 1000,
prefix: str = "",
) -> Tuple[List[FakeKey], List[str], List[FakeDeleteMarker]]:
version_id_marker: Optional[str] = None,
) -> Tuple[
List[FakeKey], List[str], List[FakeDeleteMarker], Optional[str], Optional[str]
]:
bucket = self.get_bucket(bucket_name)
common_prefixes: List[str] = []
common_prefixes: Set[str] = set()
requested_versions: List[FakeKey] = []
delete_markers: List[FakeDeleteMarker] = []
next_key_marker: Optional[str] = None
next_version_id_marker: Optional[str] = None
all_versions = list(
itertools.chain(
*(
@ -1838,37 +1844,72 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
# sort by name, revert last-modified-date
all_versions.sort(key=lambda r: (r.name, -unix_time_millis(r.last_modified)))
last_name = None
for version in all_versions:
def key_or_version_match(ver: Union[FakeKey, FakeDeleteMarker]) -> bool:
if key_marker is None:
return True
if version_id_marker is None or version_id_marker == "":
return ver.name >= key_marker
return ver.name == key_marker and ver.version_id == version_id_marker
for version in itertools.dropwhile(
lambda ver: not key_or_version_match(ver),
all_versions,
):
name = version.name
# guaranteed to be sorted - so the first key with this name will be the latest
version.is_latest = name != last_name
if version.is_latest:
last_name = name
# skip all keys that alphabetically come before keymarker
if key_marker and name < key_marker:
continue
# Filter for keys that start with prefix
if not name.startswith(prefix):
continue
# separate keys that contain the same string between the prefix and the first occurrence of the delimiter
is_common_prefix = False
if delimiter and delimiter in name[len(prefix) :]:
end_of_delimiter = (
len(prefix) + name[len(prefix) :].index(delimiter) + len(delimiter)
)
prefix_including_delimiter = name[0:end_of_delimiter]
common_prefixes.append(prefix_including_delimiter)
continue
# Differentiate between FakeKey and FakeDeleteMarkers
if not isinstance(version, FakeKey):
delete_markers.append(version)
continue
# Skip already-processed common prefix.
if prefix_including_delimiter == key_marker:
continue
requested_versions.append(version)
common_prefixes.add(prefix_including_delimiter)
name = prefix_including_delimiter
is_common_prefix = True
common_prefixes = sorted(set(common_prefixes))
# Only return max_keys items.
if (
max_keys is not None
and len(requested_versions) + len(delete_markers) + len(common_prefixes)
>= max_keys
):
next_key_marker = name
next_version_id_marker = (
version.version_id if not is_common_prefix else None
)
break
return requested_versions, common_prefixes, delete_markers
if not is_common_prefix:
# Differentiate between FakeKey and FakeDeleteMarkers
if not isinstance(version, FakeKey):
delete_markers.append(version)
continue
requested_versions.append(version)
return (
requested_versions,
sorted(common_prefixes),
delete_markers,
next_key_marker,
next_version_id_marker,
)
def get_bucket_policy(self, bucket_name: str) -> Optional[bytes]:
return self.get_bucket(bucket_name).policy

View File

@ -615,17 +615,31 @@ class S3Response(BaseResponse):
elif "versions" in querystring:
delimiter = querystring.get("delimiter", [None])[0]
key_marker = querystring.get("key-marker", [None])[0]
max_keys = int(querystring.get("max-keys", [1000])[0])
prefix = querystring.get("prefix", [""])[0]
version_id_marker = querystring.get("version-id-marker", [None])[0]
bucket = self.backend.get_bucket(bucket_name)
(
versions,
common_prefixes,
delete_markers,
next_key_marker,
next_version_id_marker,
) = self.backend.list_object_versions(
bucket_name, delimiter=delimiter, key_marker=key_marker, prefix=prefix
bucket_name,
delimiter=delimiter,
key_marker=key_marker,
max_keys=max_keys,
prefix=prefix,
version_id_marker=version_id_marker,
)
key_list = versions
is_truncated = False
if next_key_marker is not None:
is_truncated = True
template = self.response_template(S3_BUCKET_GET_VERSIONS)
return (
@ -637,10 +651,13 @@ class S3Response(BaseResponse):
delete_marker_list=delete_markers,
bucket=bucket,
prefix=prefix,
max_keys=1000,
max_keys=max_keys,
delimiter=delimiter,
key_marker=key_marker,
is_truncated="false",
version_id_marker=version_id_marker,
is_truncated=is_truncated,
next_key_marker=next_key_marker,
next_version_id_marker=next_version_id_marker,
),
)
elif "encryption" in querystring:
@ -2603,8 +2620,17 @@ S3_BUCKET_GET_VERSIONS = """<?xml version="1.0" encoding="UTF-8"?>
{% endif %}
<Delimiter>{{ delimiter }}</Delimiter>
<KeyMarker>{{ key_marker or "" }}</KeyMarker>
<VersionIdMarker>{{ version_id_marker or "" }}</VersionIdMarker>
<MaxKeys>{{ max_keys }}</MaxKeys>
<IsTruncated>{{ is_truncated }}</IsTruncated>
{% if is_truncated %}
<IsTruncated>true</IsTruncated>
<NextKeyMarker>{{ next_key_marker }}</NextKeyMarker>
{% if next_version_id_marker %}
<NextVersionIdMarker>{{ next_version_id_marker }}</NextVersionIdMarker>
{% endif %}
{% else %}
<IsTruncated>false</IsTruncated>
{% endif %}
{% for key in key_list %}
<Version>
<Key>{{ key.name }}</Key>

View File

@ -2626,6 +2626,245 @@ def test_list_object_versions_with_versioning_enabled_late():
assert response["Body"].read() == items[-1]
@mock_s3
def test_list_object_versions_with_paging():
s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "000" + str(uuid4())
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_bucket_versioning(
Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"}
)
obj1ver1 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver1")
obj1ver2 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver2")
obj1ver3 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver3")
page1 = s3_client.list_object_versions(
Bucket=bucket_name,
MaxKeys=2,
)
# Page should have two versions only, and should be truncated.
assert len(page1["Versions"]) == 2
assert "DeleteMarkers" not in page1
assert page1["IsTruncated"] is True
# This should be obj1 ver3 (latest).
assert page1["Versions"][0]["VersionId"] == obj1ver3["VersionId"]
assert page1["Versions"][0]["IsLatest"] is True
# This should be obj1 ver2.
assert page1["Versions"][1]["VersionId"] == obj1ver2["VersionId"]
# The next key/version markers should point to obj1 ver1.
assert "NextKeyMarker" in page1
assert page1["NextKeyMarker"] == "obj1"
assert "NextVersionIdMarker" in page1
assert page1["NextVersionIdMarker"] == obj1ver1["VersionId"]
# Second page should be the last page and have the oldest version.
page2 = s3_client.list_object_versions(
Bucket=bucket_name,
MaxKeys=2,
KeyMarker=page1["NextKeyMarker"],
VersionIdMarker=page1["NextVersionIdMarker"],
)
# Page should have one version only, and not be truncated.
assert len(page2["Versions"]) == 1
assert "DeleteMarkers" not in page2
assert page2["IsTruncated"] is False
assert "NextKeyMarker" not in page2
assert "NextVersionIdMarker" not in page2
# This should be obj1 ver1.
assert page2["Versions"][0]["VersionId"] == obj1ver1["VersionId"]
@mock_s3
def test_list_object_versions_with_paging_and_delete_markers():
s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "000" + str(uuid4())
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_bucket_versioning(
Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"}
)
# A mix of versions and delete markers.
obj1ver1 = s3_client.put_object(Bucket=bucket_name, Key="obj1", Body=b"ver1")
obj2ver1 = s3_client.put_object(Bucket=bucket_name, Key="obj2", Body=b"ver1")
obj1dmk1 = s3_client.delete_object(Bucket=bucket_name, Key="obj1")
obj1dmk2 = s3_client.delete_object(Bucket=bucket_name, Key="obj1")
obj1dmk3 = s3_client.delete_object(Bucket=bucket_name, Key="obj1")
obj2dmk1 = s3_client.delete_object(Bucket=bucket_name, Key="obj2")
page1 = s3_client.list_object_versions(
Bucket=bucket_name,
MaxKeys=2,
)
# Page should have one version and one delete marker, and be truncated.
assert "Versions" not in page1
assert len(page1["DeleteMarkers"]) == 2
assert page1["IsTruncated"] is True
# This should be obj1 dmk3 (latest).
assert page1["DeleteMarkers"][0]["VersionId"] == obj1dmk3["VersionId"]
assert page1["DeleteMarkers"][0]["IsLatest"] is True
# This should be obj1 dmk2.
assert page1["DeleteMarkers"][1]["VersionId"] == obj1dmk2["VersionId"]
# The next key/version markers should point to obj1 dmk1.
assert "NextKeyMarker" in page1
assert page1["NextKeyMarker"] == "obj1"
assert "NextVersionIdMarker" in page1
assert page1["NextVersionIdMarker"] == obj1dmk1["VersionId"]
page2 = s3_client.list_object_versions(
Bucket=bucket_name,
MaxKeys=2,
KeyMarker=page1["NextKeyMarker"],
VersionIdMarker=page1["NextVersionIdMarker"],
)
# Page should have one version and one delete marker, and be truncated.
assert len(page2["Versions"]) == 1
assert len(page2["DeleteMarkers"]) == 1
assert page2["IsTruncated"] is True
# This should be obj1 ver1.
assert page2["Versions"][0]["VersionId"] == obj1ver1["VersionId"]
# This should be obj1 dmk1.
assert page2["DeleteMarkers"][0]["VersionId"] == obj1dmk1["VersionId"]
# The next key/version markers should point to obj2 dmk1.
assert "NextKeyMarker" in page2
assert page2["NextKeyMarker"] == "obj2"
assert "NextVersionIdMarker" in page2
assert page2["NextVersionIdMarker"] == obj2dmk1["VersionId"]
page3 = s3_client.list_object_versions(
Bucket=bucket_name,
MaxKeys=2,
KeyMarker=page2["NextKeyMarker"],
VersionIdMarker=page2["NextVersionIdMarker"],
)
# Page should have one version and one delete marker, and not be truncated.
assert len(page3["Versions"]) == 1
assert len(page3["DeleteMarkers"]) == 1
assert page3["IsTruncated"] is False
# This should be obj2 ver1.
assert page3["Versions"][0]["VersionId"] == obj2ver1["VersionId"]
# This should be obj2 dmk1 (latest).
assert page3["DeleteMarkers"][0]["VersionId"] == obj2dmk1["VersionId"]
assert page3["DeleteMarkers"][0]["IsLatest"]
# There should not be any next key/version marker.
assert "NextKeyMarker" not in page3
assert "NextVersionIdMarker" not in page3
@mock_s3
def test_list_object_versions_with_paging_and_delimiter():
s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "000" + str(uuid4())
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_bucket_versioning(
Bucket=bucket_name, VersioningConfiguration={"Status": "Enabled"}
)
# Copied from test_list_object_versions_with_delimiter.
for key_index in list(range(1, 5)) + list(range(10, 14)):
for version_index in range(1, 4):
body = f"data-{version_index}".encode("UTF-8")
s3_client.put_object(
Bucket=bucket_name, Key=f"key{key_index}-with-data", Body=body
)
s3_client.put_object(
Bucket=bucket_name, Key=f"key{key_index}-without-data", Body=b""
)
page1 = s3_client.list_object_versions(
Bucket=bucket_name,
Delimiter="with-",
Prefix="key1",
KeyMarker="key11",
MaxKeys=5,
)
assert "Versions" in page1
assert len(page1["Versions"]) == 3
assert [v["Key"] for v in page1["Versions"]] == [
"key11-without-data",
"key11-without-data",
"key11-without-data",
]
assert "CommonPrefixes" in page1
assert [p["Prefix"] for p in page1["CommonPrefixes"]] == [
"key11-with-",
"key12-with-",
]
assert page1["IsTruncated"] is True
assert page1["NextKeyMarker"] == "key12-with-"
assert "NextVersionIdMarker" not in page1
page2 = s3_client.list_object_versions(
Bucket=bucket_name,
Delimiter="with-",
Prefix="key1",
KeyMarker="key12-with-",
VersionIdMarker="",
MaxKeys=5,
)
assert "Versions" in page2
assert len(page2["Versions"]) == 4
assert [v["Key"] for v in page2["Versions"]] == [
"key12-without-data",
"key12-without-data",
"key12-without-data",
"key13-without-data",
]
assert "CommonPrefixes" in page2
assert [p["Prefix"] for p in page2["CommonPrefixes"]] == [
"key13-with-",
]
assert page2["IsTruncated"] is True
assert page2["NextKeyMarker"] == "key13-without-data"
assert "NextVersionIdMarker" in page2 # FIXME check VersionId?
page3 = s3_client.list_object_versions(
Bucket=bucket_name,
Delimiter="with-",
Prefix="key1",
KeyMarker="key13-without-data",
VersionIdMarker=page2["NextVersionIdMarker"],
MaxKeys=5,
)
assert "Versions" in page3
assert len(page3["Versions"]) == 2
assert [v["Key"] for v in page3["Versions"]] == [
"key13-without-data",
"key13-without-data",
]
assert "CommonPrefixes" not in page3
assert page3["IsTruncated"] is False
assert "NextKeyMarker" not in page3
assert "NextVersionIdMarker" not in page3
@mock_s3
def test_bad_prefix_list_object_versions():
s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)