From 5ea02ec1b60a95b44e7672516baee8a75540233a Mon Sep 17 00:00:00 2001 From: Bert Blommers Date: Tue, 29 Nov 2022 18:38:53 -0100 Subject: [PATCH] DynamoDB: batch_get_item() now only returns up to 16MB of data (#5718) --- moto/dynamodb/responses.py | 13 +- tests/test_dynamodb/test_dynamodb.py | 184 -------------- .../test_dynamodb_batch_get_item.py | 231 ++++++++++++++++++ 3 files changed, 242 insertions(+), 186 deletions(-) create mode 100644 tests/test_dynamodb/test_dynamodb_batch_get_item.py diff --git a/moto/dynamodb/responses.py b/moto/dynamodb/responses.py index 7d4309232..1b4fb83f8 100644 --- a/moto/dynamodb/responses.py +++ b/moto/dynamodb/responses.py @@ -531,6 +531,7 @@ class DynamoHandler(BaseResponse): "Too many items requested for the BatchGetItem call" ) + result_size: int = 0 for table_name, table_request in table_batches.items(): keys = table_request["Keys"] if self._contains_duplicates(keys): @@ -553,8 +554,16 @@ class DynamoHandler(BaseResponse): table_name, key, projection_expression ) if item: - item_describe = item.describe_attrs(attributes_to_get) - results["Responses"][table_name].append(item_describe["Item"]) + # A single operation can retrieve up to 16 MB of data [and] returns a partial result if the response size limit is exceeded + if result_size + item.size() > (16 * 1024 * 1024): + # Result is already getting too big - next results should be part of UnprocessedKeys + if table_name not in results["UnprocessedKeys"]: + results["UnprocessedKeys"][table_name] = {"Keys": []} + results["UnprocessedKeys"][table_name]["Keys"].append(key) + else: + item_describe = item.describe_attrs(attributes_to_get) + results["Responses"][table_name].append(item_describe["Item"]) + result_size += item.size() results["ConsumedCapacity"].append( {"CapacityUnits": len(keys), "TableName": table_name} diff --git a/tests/test_dynamodb/test_dynamodb.py b/tests/test_dynamodb/test_dynamodb.py index 1a78618b3..873c8738e 100644 --- a/tests/test_dynamodb/test_dynamodb.py +++ b/tests/test_dynamodb/test_dynamodb.py @@ -2459,170 +2459,6 @@ def test_query_by_non_exists_index(): ) -@mock_dynamodb -def test_batch_items_returns_all(): - dynamodb = _create_user_table() - returned_items = dynamodb.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user0"}}, - {"username": {"S": "user1"}}, - {"username": {"S": "user2"}}, - {"username": {"S": "user3"}}, - ], - "ConsistentRead": True, - } - } - )["Responses"]["users"] - assert len(returned_items) == 3 - assert [item["username"]["S"] for item in returned_items] == [ - "user1", - "user2", - "user3", - ] - - -@mock_dynamodb -def test_batch_items_throws_exception_when_requesting_100_items_for_single_table(): - dynamodb = _create_user_table() - with pytest.raises(ClientError) as ex: - dynamodb.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user" + str(i)}} for i in range(0, 104) - ], - "ConsistentRead": True, - } - } - ) - ex.value.response["Error"]["Code"].should.equal("ValidationException") - msg = ex.value.response["Error"]["Message"] - msg.should.contain("1 validation error detected: Value") - msg.should.contain( - "at 'requestItems.users.member.keys' failed to satisfy constraint: Member must have length less than or equal to 100" - ) - - -@mock_dynamodb -def test_batch_items_throws_exception_when_requesting_100_items_across_all_tables(): - dynamodb = _create_user_table() - with pytest.raises(ClientError) as ex: - dynamodb.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user" + str(i)}} for i in range(0, 75) - ], - "ConsistentRead": True, - }, - "users2": { - "Keys": [ - {"username": {"S": "user" + str(i)}} for i in range(0, 75) - ], - "ConsistentRead": True, - }, - } - ) - ex.value.response["Error"]["Code"].should.equal("ValidationException") - ex.value.response["Error"]["Message"].should.equal( - "Too many items requested for the BatchGetItem call" - ) - - -@mock_dynamodb -def test_batch_items_with_basic_projection_expression(): - dynamodb = _create_user_table() - returned_items = dynamodb.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user0"}}, - {"username": {"S": "user1"}}, - {"username": {"S": "user2"}}, - {"username": {"S": "user3"}}, - ], - "ConsistentRead": True, - "ProjectionExpression": "username", - } - } - )["Responses"]["users"] - - returned_items.should.have.length_of(3) - [item["username"]["S"] for item in returned_items].should.be.equal( - ["user1", "user2", "user3"] - ) - [item.get("foo") for item in returned_items].should.be.equal([None, None, None]) - - # The projection expression should not remove data from storage - returned_items = dynamodb.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user0"}}, - {"username": {"S": "user1"}}, - {"username": {"S": "user2"}}, - {"username": {"S": "user3"}}, - ], - "ConsistentRead": True, - } - } - )["Responses"]["users"] - - [item["username"]["S"] for item in returned_items].should.be.equal( - ["user1", "user2", "user3"] - ) - [item["foo"]["S"] for item in returned_items].should.be.equal(["bar", "bar", "bar"]) - - -@mock_dynamodb -def test_batch_items_with_basic_projection_expression_and_attr_expression_names(): - dynamodb = _create_user_table() - returned_items = dynamodb.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user0"}}, - {"username": {"S": "user1"}}, - {"username": {"S": "user2"}}, - {"username": {"S": "user3"}}, - ], - "ConsistentRead": True, - "ProjectionExpression": "#rl", - "ExpressionAttributeNames": {"#rl": "username"}, - } - } - )["Responses"]["users"] - - returned_items.should.have.length_of(3) - [item["username"]["S"] for item in returned_items].should.be.equal( - ["user1", "user2", "user3"] - ) - [item.get("foo") for item in returned_items].should.be.equal([None, None, None]) - - -@mock_dynamodb -def test_batch_items_should_throw_exception_for_duplicate_request(): - client = _create_user_table() - with pytest.raises(ClientError) as ex: - client.batch_get_item( - RequestItems={ - "users": { - "Keys": [ - {"username": {"S": "user0"}}, - {"username": {"S": "user0"}}, - ], - "ConsistentRead": True, - } - } - ) - ex.value.response["Error"]["Code"].should.equal("ValidationException") - ex.value.response["Error"]["Message"].should.equal( - "Provided list of item keys contains duplicates" - ) - - @mock_dynamodb def test_index_with_unknown_attributes_should_fail(): dynamodb = boto3.client("dynamodb", region_name="us-east-1") @@ -3443,26 +3279,6 @@ def test_update_supports_list_append_with_nested_if_not_exists_operation_and_pro ) -def _create_user_table(): - client = boto3.client("dynamodb", region_name="us-east-1") - client.create_table( - TableName="users", - KeySchema=[{"AttributeName": "username", "KeyType": "HASH"}], - AttributeDefinitions=[{"AttributeName": "username", "AttributeType": "S"}], - ProvisionedThroughput={"ReadCapacityUnits": 5, "WriteCapacityUnits": 5}, - ) - client.put_item( - TableName="users", Item={"username": {"S": "user1"}, "foo": {"S": "bar"}} - ) - client.put_item( - TableName="users", Item={"username": {"S": "user2"}, "foo": {"S": "bar"}} - ) - client.put_item( - TableName="users", Item={"username": {"S": "user3"}, "foo": {"S": "bar"}} - ) - return client - - @mock_dynamodb def test_update_item_if_original_value_is_none(): dynamo = boto3.resource("dynamodb", region_name="eu-central-1") diff --git a/tests/test_dynamodb/test_dynamodb_batch_get_item.py b/tests/test_dynamodb/test_dynamodb_batch_get_item.py new file mode 100644 index 000000000..b91e03175 --- /dev/null +++ b/tests/test_dynamodb/test_dynamodb_batch_get_item.py @@ -0,0 +1,231 @@ +import boto3 +import sure # noqa # pylint: disable=unused-import +import pytest + +from moto import mock_dynamodb +from botocore.exceptions import ClientError + + +def _create_user_table(): + client = boto3.client("dynamodb", region_name="us-east-1") + client.create_table( + TableName="users", + KeySchema=[{"AttributeName": "username", "KeyType": "HASH"}], + AttributeDefinitions=[{"AttributeName": "username", "AttributeType": "S"}], + ProvisionedThroughput={"ReadCapacityUnits": 5, "WriteCapacityUnits": 5}, + ) + client.put_item( + TableName="users", Item={"username": {"S": "user1"}, "foo": {"S": "bar"}} + ) + client.put_item( + TableName="users", Item={"username": {"S": "user2"}, "foo": {"S": "bar"}} + ) + client.put_item( + TableName="users", Item={"username": {"S": "user3"}, "foo": {"S": "bar"}} + ) + return client + + +@mock_dynamodb +def test_batch_items_returns_all(): + dynamodb = _create_user_table() + returned_items = dynamodb.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user0"}}, + {"username": {"S": "user1"}}, + {"username": {"S": "user2"}}, + {"username": {"S": "user3"}}, + ], + "ConsistentRead": True, + } + } + )["Responses"]["users"] + assert len(returned_items) == 3 + assert [item["username"]["S"] for item in returned_items] == [ + "user1", + "user2", + "user3", + ] + + +@mock_dynamodb +def test_batch_items_throws_exception_when_requesting_100_items_for_single_table(): + dynamodb = _create_user_table() + with pytest.raises(ClientError) as ex: + dynamodb.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user" + str(i)}} for i in range(0, 104) + ], + "ConsistentRead": True, + } + } + ) + ex.value.response["Error"]["Code"].should.equal("ValidationException") + msg = ex.value.response["Error"]["Message"] + msg.should.contain("1 validation error detected: Value") + msg.should.contain( + "at 'requestItems.users.member.keys' failed to satisfy constraint: Member must have length less than or equal to 100" + ) + + +@mock_dynamodb +def test_batch_items_throws_exception_when_requesting_100_items_across_all_tables(): + dynamodb = _create_user_table() + with pytest.raises(ClientError) as ex: + dynamodb.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user" + str(i)}} for i in range(0, 75) + ], + "ConsistentRead": True, + }, + "users2": { + "Keys": [ + {"username": {"S": "user" + str(i)}} for i in range(0, 75) + ], + "ConsistentRead": True, + }, + } + ) + ex.value.response["Error"]["Code"].should.equal("ValidationException") + ex.value.response["Error"]["Message"].should.equal( + "Too many items requested for the BatchGetItem call" + ) + + +@mock_dynamodb +def test_batch_items_with_basic_projection_expression(): + dynamodb = _create_user_table() + returned_items = dynamodb.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user0"}}, + {"username": {"S": "user1"}}, + {"username": {"S": "user2"}}, + {"username": {"S": "user3"}}, + ], + "ConsistentRead": True, + "ProjectionExpression": "username", + } + } + )["Responses"]["users"] + + returned_items.should.have.length_of(3) + [item["username"]["S"] for item in returned_items].should.be.equal( + ["user1", "user2", "user3"] + ) + [item.get("foo") for item in returned_items].should.be.equal([None, None, None]) + + # The projection expression should not remove data from storage + returned_items = dynamodb.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user0"}}, + {"username": {"S": "user1"}}, + {"username": {"S": "user2"}}, + {"username": {"S": "user3"}}, + ], + "ConsistentRead": True, + } + } + )["Responses"]["users"] + + [item["username"]["S"] for item in returned_items].should.be.equal( + ["user1", "user2", "user3"] + ) + [item["foo"]["S"] for item in returned_items].should.be.equal(["bar", "bar", "bar"]) + + +@mock_dynamodb +def test_batch_items_with_basic_projection_expression_and_attr_expression_names(): + dynamodb = _create_user_table() + returned_items = dynamodb.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user0"}}, + {"username": {"S": "user1"}}, + {"username": {"S": "user2"}}, + {"username": {"S": "user3"}}, + ], + "ConsistentRead": True, + "ProjectionExpression": "#rl", + "ExpressionAttributeNames": {"#rl": "username"}, + } + } + )["Responses"]["users"] + + returned_items.should.have.length_of(3) + [item["username"]["S"] for item in returned_items].should.be.equal( + ["user1", "user2", "user3"] + ) + [item.get("foo") for item in returned_items].should.be.equal([None, None, None]) + + +@mock_dynamodb +def test_batch_items_should_throw_exception_for_duplicate_request(): + client = _create_user_table() + with pytest.raises(ClientError) as ex: + client.batch_get_item( + RequestItems={ + "users": { + "Keys": [ + {"username": {"S": "user0"}}, + {"username": {"S": "user0"}}, + ], + "ConsistentRead": True, + } + } + ) + ex.value.response["Error"]["Code"].should.equal("ValidationException") + ex.value.response["Error"]["Message"].should.equal( + "Provided list of item keys contains duplicates" + ) + + +@mock_dynamodb +def test_batch_items_should_return_16mb_max(): + """ + A single operation can retrieve up to 16 MB of data [...]. BatchGetItem returns a partial result if the response size limit is exceeded [..]. + + For example, if you ask to retrieve 100 items, but each individual item is 300 KB in size, + the system returns 52 items (so as not to exceed the 16 MB limit). + + It also returns an appropriate UnprocessedKeys value so you can get the next page of results. + If desired, your application can include its own logic to assemble the pages of results into one dataset. + """ + client = _create_user_table() + # Fill table with all the data + for i in range(100): + client.put_item( + TableName="users", + Item={"username": {"S": f"largedata{i}"}, "foo": {"S": "x" * 300000}}, + ) + + resp = client.batch_get_item( + RequestItems={ + "users": { + "Keys": [{"username": {"S": f"largedata{i}"}} for i in range(75)], + "ConsistentRead": True, + } + } + ) + + resp["Responses"]["users"].should.have.length_of(55) + unprocessed_keys = resp["UnprocessedKeys"]["users"]["Keys"] + # 75 requested, 55 returned --> 20 unprocessed + unprocessed_keys.should.have.length_of(20) + + # Keys 55-75 are unprocessed + unprocessed_keys.should.contain({"username": {"S": "largedata55"}}) + unprocessed_keys.should.contain({"username": {"S": "largedata65"}}) + + # Keys 0-55 are processed in the regular response, so they shouldn't show up here + unprocessed_keys.shouldnt.contain({"username": {"S": "largedata45"}})