add support for Scan method using LSI or GSI
This commit is contained in:
cm-iwata 2019-05-22 01:45:30 +09:00 committed by Terry Cain
parent b8ba7980a0
commit 9bf8fd3417
6 changed files with 282 additions and 10 deletions

View File

@ -0,0 +1,2 @@
class InvalidIndexNameError(ValueError):
pass

View File

@ -13,6 +13,7 @@ from moto.core import BaseBackend, BaseModel
from moto.core.utils import unix_time
from moto.core.exceptions import JsonRESTError
from .comparisons import get_comparison_func, get_filter_expression, Op
from .exceptions import InvalidIndexNameError
class DynamoJsonEncoder(json.JSONEncoder):
@ -572,7 +573,7 @@ class Table(BaseModel):
results = []
if index_name:
all_indexes = (self.global_indexes or []) + (self.indexes or [])
all_indexes = self.all_indexes()
indexes_by_name = dict((i['IndexName'], i) for i in all_indexes)
if index_name not in indexes_by_name:
raise ValueError('Invalid index: %s for table: %s. Available indexes are: %s' % (
@ -672,11 +673,39 @@ class Table(BaseModel):
else:
yield hash_set
def scan(self, filters, limit, exclusive_start_key, filter_expression=None):
def all_indexes(self):
return (self.global_indexes or []) + (self.indexes or [])
def has_idx_items(self, index_name):
all_indexes = self.all_indexes()
indexes_by_name = dict((i['IndexName'], i) for i in all_indexes)
idx = indexes_by_name[index_name]
idx_col_set = set([i['AttributeName'] for i in idx['KeySchema']])
for hash_set in self.items.values():
if self.range_key_attr:
for item in hash_set.values():
if idx_col_set.issubset(set(item.attrs)):
yield item
else:
if idx_col_set.issubset(set(hash_set.attrs)):
yield hash_set
def scan(self, filters, limit, exclusive_start_key, filter_expression=None, index_name=None):
results = []
scanned_count = 0
all_indexes = self.all_indexes()
indexes_by_name = dict((i['IndexName'], i) for i in all_indexes)
for item in self.all_items():
if index_name:
if index_name not in indexes_by_name:
raise InvalidIndexNameError('The table does not have the specified index: %s' % index_name)
items = self.has_idx_items(index_name)
else:
items = self.all_items()
for item in items:
scanned_count += 1
passes_all_conditions = True
for attribute_name, (comparison_operator, comparison_objs) in filters.items():
@ -703,10 +732,10 @@ class Table(BaseModel):
results.append(item)
results, last_evaluated_key = self._trim_results(results, limit,
exclusive_start_key)
exclusive_start_key, index_name)
return results, scanned_count, last_evaluated_key
def _trim_results(self, results, limit, exclusive_start_key):
def _trim_results(self, results, limit, exclusive_start_key, scaned_index=None):
if exclusive_start_key is not None:
hash_key = DynamoType(exclusive_start_key.get(self.hash_key_attr))
range_key = exclusive_start_key.get(self.range_key_attr)
@ -726,6 +755,14 @@ class Table(BaseModel):
if results[-1].range_key is not None:
last_evaluated_key[self.range_key_attr] = results[-1].range_key
if scaned_index:
all_indexes = self.all_indexes()
indexes_by_name = dict((i['IndexName'], i) for i in all_indexes)
idx = indexes_by_name[scaned_index]
idx_col_list = [i['AttributeName'] for i in idx['KeySchema']]
for col in idx_col_list:
last_evaluated_key[col] = results[-1].attrs[col]
return results, last_evaluated_key
def lookup(self, *args, **kwargs):
@ -893,7 +930,7 @@ class DynamoDBBackend(BaseBackend):
return table.query(hash_key, range_comparison, range_values, limit,
exclusive_start_key, scan_index_forward, projection_expression, index_name, filter_expression, **filter_kwargs)
def scan(self, table_name, filters, limit, exclusive_start_key, filter_expression, expr_names, expr_values):
def scan(self, table_name, filters, limit, exclusive_start_key, filter_expression, expr_names, expr_values, index_name):
table = self.tables.get(table_name)
if not table:
return None, None, None
@ -908,7 +945,7 @@ class DynamoDBBackend(BaseBackend):
else:
filter_expression = Op(None, None) # Will always eval to true
return table.scan(scan_filters, limit, exclusive_start_key, filter_expression)
return table.scan(scan_filters, limit, exclusive_start_key, filter_expression, index_name)
def update_item(self, table_name, key, update_expression, attribute_updates, expression_attribute_names,
expression_attribute_values, expected=None):

View File

@ -5,6 +5,7 @@ import re
from moto.core.responses import BaseResponse
from moto.core.utils import camelcase_to_underscores, amzn_request_id
from .exceptions import InvalidIndexNameError
from .models import dynamodb_backends, dynamo_json_dump
@ -560,6 +561,7 @@ class DynamoHandler(BaseResponse):
exclusive_start_key = self.body.get('ExclusiveStartKey')
limit = self.body.get("Limit")
index_name = self.body.get('IndexName')
try:
items, scanned_count, last_evaluated_key = self.dynamodb_backend.scan(name, filters,
@ -567,7 +569,11 @@ class DynamoHandler(BaseResponse):
exclusive_start_key,
filter_expression,
expression_attribute_names,
expression_attribute_values)
expression_attribute_values,
index_name)
except InvalidIndexNameError as err:
er = 'com.amazonaws.dynamodb.v20111205#ValidationException'
return self.error(er, str(err))
except ValueError as err:
er = 'com.amazonaws.dynamodb.v20111205#ValidationError'
return self.error(er, 'Bad Filter Expression: {0}'.format(err))

View File

@ -1531,6 +1531,7 @@ def test_dynamodb_streams_2():
}
assert 'LatestStreamLabel' in resp['TableDescription']
assert 'LatestStreamArn' in resp['TableDescription']
@mock_dynamodb2
def test_condition_expressions():
@ -1696,8 +1697,8 @@ def test_query_gsi_with_range_key():
res = dynamodb.query(TableName='test', IndexName='test_gsi',
KeyConditionExpression='gsi_hash_key = :gsi_hash_key AND gsi_range_key = :gsi_range_key',
ExpressionAttributeValues={
':gsi_hash_key': {'S': 'key1'},
':gsi_range_key': {'S': 'range1'}
':gsi_hash_key': {'S': 'key1'},
':gsi_range_key': {'S': 'range1'}
})
res.should.have.key("Count").equal(1)
res.should.have.key("Items")
@ -1706,3 +1707,45 @@ def test_query_gsi_with_range_key():
'gsi_hash_key': {'S': 'key1'},
'gsi_range_key': {'S': 'range1'},
})
@mock_dynamodb2
def test_scan_by_non_exists_index():
dynamodb = boto3.client('dynamodb', region_name='us-east-1')
dynamodb.create_table(
TableName='test',
KeySchema=[{'AttributeName': 'id', 'KeyType': 'HASH'}],
AttributeDefinitions=[
{'AttributeName': 'id', 'AttributeType': 'S'},
{'AttributeName': 'gsi_col', 'AttributeType': 'S'}
],
ProvisionedThroughput={'ReadCapacityUnits': 1, 'WriteCapacityUnits': 1},
GlobalSecondaryIndexes=[
{
'IndexName': 'test_gsi',
'KeySchema': [
{
'AttributeName': 'gsi_col',
'KeyType': 'HASH'
},
],
'Projection': {
'ProjectionType': 'ALL',
},
'ProvisionedThroughput': {
'ReadCapacityUnits': 1,
'WriteCapacityUnits': 1
}
},
]
)
with assert_raises(ClientError) as ex:
dynamodb.scan(TableName='test', IndexName='non_exists_index')
ex.exception.response['Error']['Code'].should.equal('ValidationException')
ex.exception.response['ResponseMetadata']['HTTPStatusCode'].should.equal(400)
ex.exception.response['Error']['Message'].should.equal(
'The table does not have the specified index: non_exists_index'
)

View File

@ -1961,3 +1961,113 @@ def test_query_pagination():
results = page1['Items'] + page2['Items']
subjects = set([int(r['subject']) for r in results])
subjects.should.equal(set(range(10)))
@mock_dynamodb2
def test_scan_by_index():
dynamodb = boto3.client('dynamodb', region_name='us-east-1')
dynamodb.create_table(
TableName='test',
KeySchema=[
{'AttributeName': 'id', 'KeyType': 'HASH'},
{'AttributeName': 'range_key', 'KeyType': 'RANGE'},
],
AttributeDefinitions=[
{'AttributeName': 'id', 'AttributeType': 'S'},
{'AttributeName': 'range_key', 'AttributeType': 'S'},
{'AttributeName': 'gsi_col', 'AttributeType': 'S'},
{'AttributeName': 'gsi_range_key', 'AttributeType': 'S'},
{'AttributeName': 'lsi_range_key', 'AttributeType': 'S'},
],
ProvisionedThroughput={'ReadCapacityUnits': 1, 'WriteCapacityUnits': 1},
GlobalSecondaryIndexes=[
{
'IndexName': 'test_gsi',
'KeySchema': [
{'AttributeName': 'gsi_col', 'KeyType': 'HASH'},
{'AttributeName': 'gsi_range_key', 'KeyType': 'RANGE'},
],
'Projection': {
'ProjectionType': 'ALL',
},
'ProvisionedThroughput': {
'ReadCapacityUnits': 1,
'WriteCapacityUnits': 1
}
},
],
LocalSecondaryIndexes=[
{
'IndexName': 'test_lsi',
'KeySchema': [
{'AttributeName': 'id', 'KeyType': 'HASH'},
{'AttributeName': 'lsi_range_key', 'KeyType': 'RANGE'},
],
'Projection': {
'ProjectionType': 'ALL',
},
},
]
)
dynamodb.put_item(
TableName='test',
Item={
'id': {'S': '1'},
'range_key': {'S': '1'},
'col1': {'S': 'val1'},
'gsi_col': {'S': '1'},
'gsi_range_key': {'S': '1'},
'lsi_range_key': {'S': '1'},
}
)
dynamodb.put_item(
TableName='test',
Item={
'id': {'S': '1'},
'range_key': {'S': '2'},
'col1': {'S': 'val2'},
'gsi_col': {'S': '1'},
'gsi_range_key': {'S': '2'},
'lsi_range_key': {'S': '2'},
}
)
dynamodb.put_item(
TableName='test',
Item={
'id': {'S': '3'},
'range_key': {'S': '1'},
'col1': {'S': 'val3'},
}
)
res = dynamodb.scan(TableName='test')
assert res['Count'] == 3
assert len(res['Items']) == 3
res = dynamodb.scan(TableName='test', IndexName='test_gsi')
assert res['Count'] == 2
assert len(res['Items']) == 2
res = dynamodb.scan(TableName='test', IndexName='test_gsi', Limit=1)
assert res['Count'] == 1
assert len(res['Items']) == 1
last_eval_key = res['LastEvaluatedKey']
assert last_eval_key['id']['S'] == '1'
assert last_eval_key['gsi_col']['S'] == '1'
assert last_eval_key['gsi_range_key']['S'] == '1'
res = dynamodb.scan(TableName='test', IndexName='test_lsi')
assert res['Count'] == 2
assert len(res['Items']) == 2
res = dynamodb.scan(TableName='test', IndexName='test_lsi', Limit=1)
assert res['Count'] == 1
assert len(res['Items']) == 1
last_eval_key = res['LastEvaluatedKey']
assert last_eval_key['id']['S'] == '1'
assert last_eval_key['range_key']['S'] == '1'
assert last_eval_key['lsi_range_key']['S'] == '1'

View File

@ -829,3 +829,77 @@ def test_scan_pagination():
results = page1['Items'] + page2['Items']
usernames = set([r['username'] for r in results])
usernames.should.equal(set(expected_usernames))
@mock_dynamodb2
def test_scan_by_index():
dynamodb = boto3.client('dynamodb', region_name='us-east-1')
dynamodb.create_table(
TableName='test',
KeySchema=[{'AttributeName': 'id', 'KeyType': 'HASH'}],
AttributeDefinitions=[
{'AttributeName': 'id', 'AttributeType': 'S'},
{'AttributeName': 'gsi_col', 'AttributeType': 'S'}
],
ProvisionedThroughput={'ReadCapacityUnits': 1, 'WriteCapacityUnits': 1},
GlobalSecondaryIndexes=[
{
'IndexName': 'test_gsi',
'KeySchema': [
{
'AttributeName': 'gsi_col',
'KeyType': 'HASH'
},
],
'Projection': {
'ProjectionType': 'ALL',
},
'ProvisionedThroughput': {
'ReadCapacityUnits': 1,
'WriteCapacityUnits': 1
}
},
]
)
dynamodb.put_item(
TableName='test',
Item={
'id': {'S': '1'},
'col1': {'S': 'val1'},
'gsi_col': {'S': 'gsi_val1'},
}
)
dynamodb.put_item(
TableName='test',
Item={
'id': {'S': '2'},
'col1': {'S': 'val2'},
'gsi_col': {'S': 'gsi_val2'},
}
)
dynamodb.put_item(
TableName='test',
Item={
'id': {'S': '3'},
'col1': {'S': 'val3'},
}
)
res = dynamodb.scan(TableName='test')
assert res['Count'] == 3
assert len(res['Items']) == 3
res = dynamodb.scan(TableName='test', IndexName='test_gsi')
assert res['Count'] == 2
assert len(res['Items']) == 2
res = dynamodb.scan(TableName='test', IndexName='test_gsi', Limit=1)
assert res['Count'] == 1
assert len(res['Items']) == 1
last_eval_key = res['LastEvaluatedKey']
assert last_eval_key['id']['S'] == '1'
assert last_eval_key['gsi_col']['S'] == 'gsi_val1'