S3: list_objects_v2() should return more than 1000 files (#7283)

This commit is contained in:
Bert Blommers 2024-01-30 20:51:36 +00:00 committed by GitHub
parent cab030f4a0
commit 28811effdd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 5 deletions

View File

@ -2451,7 +2451,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
prefix: Optional[str], prefix: Optional[str],
delimiter: Optional[str], delimiter: Optional[str],
marker: Optional[str], marker: Optional[str],
max_keys: int, max_keys: Optional[int],
) -> Tuple[Set[FakeKey], Set[str], bool, Optional[str]]: ) -> Tuple[Set[FakeKey], Set[str], bool, Optional[str]]:
key_results = set() key_results = set()
folder_results = set() folder_results = set()
@ -2487,9 +2487,13 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
limit = self._pagination_tokens.get(marker) limit = self._pagination_tokens.get(marker)
key_results = self._get_results_from_token(key_results, limit) key_results = self._get_results_from_token(key_results, limit)
key_results, is_truncated, next_marker = self._truncate_result( if max_keys is not None:
key_results, max_keys key_results, is_truncated, next_marker = self._truncate_result(
) key_results, max_keys
)
else:
is_truncated = False
next_marker = None
return key_results, folder_results, is_truncated, next_marker return key_results, folder_results, is_truncated, next_marker
@ -2503,7 +2507,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
max_keys: int, max_keys: int,
) -> Tuple[Set[Union[FakeKey, str]], bool, Optional[str]]: ) -> Tuple[Set[Union[FakeKey, str]], bool, Optional[str]]:
result_keys, result_folders, _, _ = self.list_objects( result_keys, result_folders, _, _ = self.list_objects(
bucket, prefix, delimiter, marker=None, max_keys=1000 bucket, prefix, delimiter, marker=None, max_keys=None
) )
# sort the combination of folders and keys into lexicographical order # sort the combination of folders and keys into lexicographical order
all_keys = result_keys + result_folders # type: ignore all_keys = result_keys + result_folders # type: ignore

View File

@ -21,6 +21,7 @@ import moto.s3.models as s3model
from moto import mock_aws, moto_proxy, settings from moto import mock_aws, moto_proxy, settings
from moto.core.utils import utcnow from moto.core.utils import utcnow
from moto.moto_api import state_manager from moto.moto_api import state_manager
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME from moto.s3.responses import DEFAULT_REGION_NAME
from tests import DEFAULT_ACCOUNT_ID from tests import DEFAULT_ACCOUNT_ID
@ -1453,6 +1454,33 @@ def test_list_objects_v2_truncate_combined_keys_and_folders():
assert resp["CommonPrefixes"][0]["Prefix"] == "3/" assert resp["CommonPrefixes"][0]["Prefix"] == "3/"
@mock_aws
def test_list_objects_v2__more_than_1000():
# Verify that the default pagination size (1000) works
if not settings.TEST_DECORATOR_MODE:
raise SkipTest("Accessing backends directly")
s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="mybucket")
# Uploading >1000 files using boto3 takes ages, so let's just use the backend directly
backend = s3_backends[DEFAULT_ACCOUNT_ID]["global"]
for i in range(1100):
backend.put_object(bucket_name="mybucket", key_name=f"{i}", value=b"")
# Page 1
resp = s3_client.list_objects_v2(Bucket="mybucket", Delimiter="/")
assert resp["KeyCount"] == 1000
assert len(resp["Contents"]) == 1000
assert resp["IsTruncated"] is True
# Page2
tail = resp["Contents"][-1]["Key"]
resp = s3_client.list_objects_v2(Bucket="mybucket", Delimiter="/", StartAfter=tail)
assert resp["KeyCount"] == 100
assert len(resp["Contents"]) == 100
assert resp["IsTruncated"] is False
@mock_aws @mock_aws
def test_list_objects_v2_checksum_algo(): def test_list_objects_v2_checksum_algo():
s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) s3_client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)