DynamoDB: Fix ScannedCount calculation for Limit-ed calls (#7085)
This commit is contained in:
parent
ad316dd837
commit
bb23df4244
@ -20,6 +20,8 @@ from moto.dynamodb.models.dynamo_type import DynamoType, Item
|
|||||||
from moto.dynamodb.models.utilities import dynamo_json_dump
|
from moto.dynamodb.models.utilities import dynamo_json_dump
|
||||||
from moto.moto_api._internal import mock_random
|
from moto.moto_api._internal import mock_random
|
||||||
|
|
||||||
|
RESULT_SIZE_LIMIT = 1000000 # DynamoDB has a 1MB size limit
|
||||||
|
|
||||||
|
|
||||||
class SecondaryIndex(BaseModel):
|
class SecondaryIndex(BaseModel):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -813,7 +815,8 @@ class Table(CloudFormationModel):
|
|||||||
index_name: Optional[str] = None,
|
index_name: Optional[str] = None,
|
||||||
projection_expression: Optional[List[List[str]]] = None,
|
projection_expression: Optional[List[List[str]]] = None,
|
||||||
) -> Tuple[List[Item], int, Optional[Dict[str, Any]]]:
|
) -> Tuple[List[Item], int, Optional[Dict[str, Any]]]:
|
||||||
results = []
|
results: List[Item] = []
|
||||||
|
result_size = 0
|
||||||
scanned_count = 0
|
scanned_count = 0
|
||||||
|
|
||||||
if index_name:
|
if index_name:
|
||||||
@ -822,16 +825,32 @@ class Table(CloudFormationModel):
|
|||||||
else:
|
else:
|
||||||
items = self.all_items()
|
items = self.all_items()
|
||||||
|
|
||||||
|
last_evaluated_key = None
|
||||||
|
processing_previous_page = exclusive_start_key is not None
|
||||||
for item in items:
|
for item in items:
|
||||||
scanned_count += 1
|
# Cycle through the previous page of results
|
||||||
|
# When we encounter our start key, we know we've reached the end of the previous page
|
||||||
|
if processing_previous_page:
|
||||||
|
if self._item_equals_dct(item, exclusive_start_key):
|
||||||
|
processing_previous_page = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check wether we've reached the limit of our result set
|
||||||
|
# That can be either in number, or in size
|
||||||
|
reached_length_limit = len(results) == limit
|
||||||
|
reached_size_limit = (result_size + item.size()) > RESULT_SIZE_LIMIT
|
||||||
|
if reached_length_limit or reached_size_limit:
|
||||||
|
last_evaluated_key = self._get_last_evaluated_key(
|
||||||
|
results[-1], index_name
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
passes_all_conditions = True
|
passes_all_conditions = True
|
||||||
for (
|
for attribute_name in filters:
|
||||||
attribute_name,
|
|
||||||
(comparison_operator, comparison_objs),
|
|
||||||
) in filters.items():
|
|
||||||
attribute = item.attrs.get(attribute_name)
|
attribute = item.attrs.get(attribute_name)
|
||||||
|
|
||||||
if attribute:
|
if attribute:
|
||||||
|
(comparison_operator, comparison_objs) = filters[attribute_name]
|
||||||
# Attribute found
|
# Attribute found
|
||||||
if not attribute.compare(comparison_operator, comparison_objs):
|
if not attribute.compare(comparison_operator, comparison_objs):
|
||||||
passes_all_conditions = False
|
passes_all_conditions = False
|
||||||
@ -846,17 +865,17 @@ class Table(CloudFormationModel):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if passes_all_conditions:
|
if passes_all_conditions:
|
||||||
results.append(item)
|
|
||||||
|
|
||||||
results = copy.deepcopy(results)
|
|
||||||
if index_name:
|
if index_name:
|
||||||
index = self.get_index(index_name)
|
index = self.get_index(index_name)
|
||||||
results = [index.project(r) for r in results]
|
results.append(index.project(copy.deepcopy(item)))
|
||||||
|
else:
|
||||||
|
results.append(copy.deepcopy(item))
|
||||||
|
result_size += item.size()
|
||||||
|
|
||||||
results, last_evaluated_key = self._trim_results(
|
scanned_count += 1
|
||||||
results, limit, exclusive_start_key, scanned_index=index_name
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.FilterExpression
|
||||||
|
# the filter expression should be evaluated after the query.
|
||||||
if filter_expression is not None:
|
if filter_expression is not None:
|
||||||
results = [item for item in results if filter_expression.expr(item)]
|
results = [item for item in results if filter_expression.expr(item)]
|
||||||
|
|
||||||
@ -865,6 +884,13 @@ class Table(CloudFormationModel):
|
|||||||
|
|
||||||
return results, scanned_count, last_evaluated_key
|
return results, scanned_count, last_evaluated_key
|
||||||
|
|
||||||
|
def _item_equals_dct(self, item: Item, dct: Dict[str, Any]) -> bool:
|
||||||
|
hash_key = DynamoType(dct.get(self.hash_key_attr)) # type: ignore[arg-type]
|
||||||
|
range_key = dct.get(self.range_key_attr) if self.range_key_attr else None
|
||||||
|
if range_key is not None:
|
||||||
|
range_key = DynamoType(range_key)
|
||||||
|
return item.hash_key == hash_key and item.range_key == range_key
|
||||||
|
|
||||||
def _trim_results(
|
def _trim_results(
|
||||||
self,
|
self,
|
||||||
results: List[Item],
|
results: List[Item],
|
||||||
@ -873,45 +899,40 @@ class Table(CloudFormationModel):
|
|||||||
scanned_index: Optional[str] = None,
|
scanned_index: Optional[str] = None,
|
||||||
) -> Tuple[List[Item], Optional[Dict[str, Any]]]:
|
) -> Tuple[List[Item], Optional[Dict[str, Any]]]:
|
||||||
if exclusive_start_key is not None:
|
if exclusive_start_key is not None:
|
||||||
hash_key = DynamoType(exclusive_start_key.get(self.hash_key_attr)) # type: ignore[arg-type]
|
|
||||||
range_key = (
|
|
||||||
exclusive_start_key.get(self.range_key_attr)
|
|
||||||
if self.range_key_attr
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
if range_key is not None:
|
|
||||||
range_key = DynamoType(range_key)
|
|
||||||
for i in range(len(results)):
|
for i in range(len(results)):
|
||||||
if (
|
if self._item_equals_dct(results[i], exclusive_start_key):
|
||||||
results[i].hash_key == hash_key
|
|
||||||
and results[i].range_key == range_key
|
|
||||||
):
|
|
||||||
results = results[i + 1 :]
|
results = results[i + 1 :]
|
||||||
break
|
break
|
||||||
|
|
||||||
last_evaluated_key = None
|
last_evaluated_key = None
|
||||||
size_limit = 1000000 # DynamoDB has a 1MB size limit
|
|
||||||
item_size = sum(res.size() for res in results)
|
item_size = sum(res.size() for res in results)
|
||||||
if item_size > size_limit:
|
if item_size > RESULT_SIZE_LIMIT:
|
||||||
item_size = idx = 0
|
item_size = idx = 0
|
||||||
while item_size + results[idx].size() < size_limit:
|
while item_size + results[idx].size() < RESULT_SIZE_LIMIT:
|
||||||
item_size += results[idx].size()
|
item_size += results[idx].size()
|
||||||
idx += 1
|
idx += 1
|
||||||
limit = min(limit, idx) if limit else idx
|
limit = min(limit, idx) if limit else idx
|
||||||
if limit and len(results) > limit:
|
if limit and len(results) > limit:
|
||||||
results = results[:limit]
|
results = results[:limit]
|
||||||
last_evaluated_key = {self.hash_key_attr: results[-1].hash_key}
|
last_evaluated_key = self._get_last_evaluated_key(
|
||||||
if self.range_key_attr is not None and results[-1].range_key is not None:
|
last_result=results[-1], index_name=scanned_index
|
||||||
last_evaluated_key[self.range_key_attr] = results[-1].range_key
|
)
|
||||||
|
|
||||||
if scanned_index:
|
|
||||||
index = self.get_index(scanned_index)
|
|
||||||
idx_col_list = [i["AttributeName"] for i in index.schema]
|
|
||||||
for col in idx_col_list:
|
|
||||||
last_evaluated_key[col] = results[-1].attrs[col]
|
|
||||||
|
|
||||||
return results, last_evaluated_key
|
return results, last_evaluated_key
|
||||||
|
|
||||||
|
def _get_last_evaluated_key(
|
||||||
|
self, last_result: Item, index_name: Optional[str]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
last_evaluated_key = {self.hash_key_attr: last_result.hash_key}
|
||||||
|
if self.range_key_attr is not None and last_result.range_key is not None:
|
||||||
|
last_evaluated_key[self.range_key_attr] = last_result.range_key
|
||||||
|
if index_name:
|
||||||
|
index = self.get_index(index_name)
|
||||||
|
idx_col_list = [i["AttributeName"] for i in index.schema]
|
||||||
|
for col in idx_col_list:
|
||||||
|
last_evaluated_key[col] = last_result.attrs[col]
|
||||||
|
return last_evaluated_key
|
||||||
|
|
||||||
def delete(self, account_id: str, region_name: str) -> None:
|
def delete(self, account_id: str, region_name: str) -> None:
|
||||||
from moto.dynamodb.models import dynamodb_backends
|
from moto.dynamodb.models import dynamodb_backends
|
||||||
|
|
||||||
|
@ -1170,24 +1170,42 @@ def test_scan_by_index():
|
|||||||
assert res["Count"] == 3
|
assert res["Count"] == 3
|
||||||
assert len(res["Items"]) == 3
|
assert len(res["Items"]) == 3
|
||||||
|
|
||||||
|
res = dynamodb.scan(TableName="test", Limit=1)
|
||||||
|
assert res["Count"] == 1
|
||||||
|
assert res["ScannedCount"] == 1
|
||||||
|
|
||||||
|
res = dynamodb.scan(TableName="test", ExclusiveStartKey=res["LastEvaluatedKey"])
|
||||||
|
assert res["Count"] == 2
|
||||||
|
assert res["ScannedCount"] == 2
|
||||||
|
|
||||||
res = dynamodb.scan(TableName="test", IndexName="test_gsi")
|
res = dynamodb.scan(TableName="test", IndexName="test_gsi")
|
||||||
assert res["Count"] == 2
|
assert res["Count"] == 2
|
||||||
|
assert res["ScannedCount"] == 2
|
||||||
assert len(res["Items"]) == 2
|
assert len(res["Items"]) == 2
|
||||||
|
|
||||||
res = dynamodb.scan(TableName="test", IndexName="test_gsi", Limit=1)
|
res = dynamodb.scan(TableName="test", IndexName="test_gsi", Limit=1)
|
||||||
assert res["Count"] == 1
|
assert res["Count"] == 1
|
||||||
|
assert res["ScannedCount"] == 1
|
||||||
assert len(res["Items"]) == 1
|
assert len(res["Items"]) == 1
|
||||||
last_eval_key = res["LastEvaluatedKey"]
|
last_eval_key = res["LastEvaluatedKey"]
|
||||||
assert last_eval_key["id"]["S"] == "1"
|
assert last_eval_key["id"]["S"] == "1"
|
||||||
assert last_eval_key["gsi_col"]["S"] == "1"
|
assert last_eval_key["gsi_col"]["S"] == "1"
|
||||||
assert last_eval_key["gsi_range_key"]["S"] == "1"
|
assert last_eval_key["gsi_range_key"]["S"] == "1"
|
||||||
|
|
||||||
|
res = dynamodb.scan(
|
||||||
|
TableName="test", IndexName="test_gsi", ExclusiveStartKey=last_eval_key
|
||||||
|
)
|
||||||
|
assert res["Count"] == 1
|
||||||
|
assert res["ScannedCount"] == 1
|
||||||
|
|
||||||
res = dynamodb.scan(TableName="test", IndexName="test_lsi")
|
res = dynamodb.scan(TableName="test", IndexName="test_lsi")
|
||||||
assert res["Count"] == 2
|
assert res["Count"] == 2
|
||||||
|
assert res["ScannedCount"] == 2
|
||||||
assert len(res["Items"]) == 2
|
assert len(res["Items"]) == 2
|
||||||
|
|
||||||
res = dynamodb.scan(TableName="test", IndexName="test_lsi", Limit=1)
|
res = dynamodb.scan(TableName="test", IndexName="test_lsi", Limit=1)
|
||||||
assert res["Count"] == 1
|
assert res["Count"] == 1
|
||||||
|
assert res["ScannedCount"] == 1
|
||||||
assert len(res["Items"]) == 1
|
assert len(res["Items"]) == 1
|
||||||
last_eval_key = res["LastEvaluatedKey"]
|
last_eval_key = res["LastEvaluatedKey"]
|
||||||
assert last_eval_key["id"]["S"] == "1"
|
assert last_eval_key["id"]["S"] == "1"
|
||||||
|
@ -523,11 +523,15 @@ def test_scan_pagination():
|
|||||||
page1 = table.scan(Limit=6)
|
page1 = table.scan(Limit=6)
|
||||||
assert page1["Count"] == 6
|
assert page1["Count"] == 6
|
||||||
assert len(page1["Items"]) == 6
|
assert len(page1["Items"]) == 6
|
||||||
|
page1_results = set([r["username"] for r in page1["Items"]])
|
||||||
|
assert page1_results == {"user0", "user3", "user1", "user2", "user5", "user4"}
|
||||||
|
|
||||||
page2 = table.scan(Limit=6, ExclusiveStartKey=page1["LastEvaluatedKey"])
|
page2 = table.scan(Limit=6, ExclusiveStartKey=page1["LastEvaluatedKey"])
|
||||||
assert page2["Count"] == 4
|
assert page2["Count"] == 4
|
||||||
assert len(page2["Items"]) == 4
|
assert len(page2["Items"]) == 4
|
||||||
assert "LastEvaluatedKey" not in page2
|
assert "LastEvaluatedKey" not in page2
|
||||||
|
page2_results = set([r["username"] for r in page2["Items"]])
|
||||||
|
assert page2_results == {"user6", "user7", "user8", "user9"}
|
||||||
|
|
||||||
results = page1["Items"] + page2["Items"]
|
results = page1["Items"] + page2["Items"]
|
||||||
usernames = set([r["username"] for r in results])
|
usernames = set([r["username"] for r in results])
|
||||||
|
Loading…
Reference in New Issue
Block a user