Added FilterExpression to dynamodb scan

This commit is contained in:
Terry Cain 2017-10-07 21:57:14 +01:00
parent c23c5057f2
commit c86bece382
No known key found for this signature in database
GPG Key ID: 14D90844E4E9B9F3
4 changed files with 513 additions and 8 deletions

View File

@ -1,4 +1,6 @@
from __future__ import unicode_literals
import re
import six
# TODO add tests for all of these
EQ_FUNCTION = lambda item_value, test_value: item_value == test_value # flake8: noqa
@ -39,3 +41,422 @@ COMPARISON_FUNCS = {
def get_comparison_func(range_comparison):
return COMPARISON_FUNCS.get(range_comparison)
#
def get_filter_expression(expr, names, values):
# Examples
# expr = 'Id > 5 AND attribute_exists(test) AND Id BETWEEN 5 AND 6 OR length < 6 AND contains(test, 1) AND 5 IN (4,5, 6) OR (Id < 5 AND 5 > Id)'
# expr = 'Id > 5 AND Subs < 7'
# Need to do some dodgyness for NOT i think.
if 'NOT' in expr:
raise NotImplementedError('NOT not supported yet')
if names is None:
names = {}
if values is None:
values = {}
# Do substitutions
for key, value in names.items():
expr = expr.replace(key, value)
for key, value in values.items():
if 'N' in value:
expr.replace(key, float(value['N']))
else:
expr = expr.replace(key, value['S'])
# Remove all spaces, tbf we could just skip them in the next step.
# The number of known options is really small so we can do a fair bit of cheating
expr = list(re.sub('\s', '', expr)) # 'Id>5ANDattribute_exists(test)ORNOTlength<6'
# DodgyTokenisation stage 1
def is_value(val):
return val not in ('<', '>', '=', '(', ')')
def contains_keyword(val):
for kw in ('BETWEEN', 'IN', 'AND', 'OR', 'NOT'):
if kw in val:
return kw
return None
def is_function(val):
return val in ('attribute_exists', 'attribute_not_exists', 'attribute_type', 'begins_with', 'contains', 'size')
# Does the main part of splitting between sections of characters
tokens = []
stack = ''
while len(expr) > 0:
current_char = expr.pop(0)
if current_char == ',': # Split params ,
if len(stack) > 0:
tokens.append(stack)
stack = ''
elif is_value(current_char):
stack += current_char
kw = contains_keyword(stack)
if kw is not None:
# We have a kw in the stack, could be AND or something like 5AND
tmp = stack.replace(kw, '')
if len(tmp) > 0:
tokens.append(tmp)
tokens.append(kw)
stack = ''
else:
if len(stack) > 0:
tokens.append(stack)
tokens.append(current_char)
stack = ''
if len(stack) > 0:
tokens.append(stack)
# DodgyTokenisation stage 2, it groups together some elements to make RPN'ing it later easier.
tokens2 = []
token_iterator = iter(tokens)
for token in token_iterator:
if token == '(':
tuple_list = []
next_token = six.next(token_iterator)
while next_token != ')':
tuple_list.append(next_token)
next_token = six.next(token_iterator)
tokens2.append(tuple(tuple_list))
elif token == 'BETWEEN':
op1 = six.next(token_iterator)
and_op = six.next(token_iterator)
assert and_op == 'AND'
op2 = six.next(token_iterator)
tokens2.append('BETWEEN')
tokens2.append((op1, op2))
elif is_function(token):
function_list = [token]
lbracket = six.next(token_iterator)
assert lbracket == '('
next_token = six.next(token_iterator)
while next_token != ')':
function_list.append(next_token)
next_token = six.next(token_iterator)
tokens2.append(function_list)
else:
try:
token = int(token)
except ValueError:
try:
token = float(token)
except ValueError:
pass
tokens2.append(token)
# Start of the Shunting-Yard algorigth. <-- Proper beast algorithm!
def is_number(val):
return val not in ('<', '>', '=', '>=', '<=', '<>', 'BETWEEN', 'IN', 'AND', 'OR', 'NOT')
def is_op(val):
return val in ('<', '>', '=', '>=', '<=', '<>', 'BETWEEN', 'IN', 'AND', 'OR', 'NOT')
OPS = {'<': 5, '>': 5, '=': 5, '>=': 5, '<=': 5, '<>': 5, 'IN': 8, 'AND': 11, 'OR': 12, 'NOT': 10, 'BETWEEN': 9, '(': 1, ')': 1}
output = []
op_stack = []
# Basically takes in an infix notation calculation, converts it to a reverse polish notation where there is no
# ambiguaty on which order operators are applied.
while len(tokens2) > 0:
token = tokens2.pop(0)
if token == '(':
op_stack.append(token)
elif token == ')':
while len(op_stack) > 0 and op_stack[-1] != '(':
output.append(op_stack.pop())
if len(op_stack) == 0:
# No left paren on the stack, error
raise Exception('Missing left paren')
# Pop off the left paren
op_stack.pop()
elif is_number(token):
output.append(token)
else:
# Must be operator kw
while len(op_stack) > 0 and OPS[op_stack[-1]] <= OPS[token]:
output.append(op_stack.pop())
op_stack.append(token)
while len(op_stack) > 0:
output.append(op_stack.pop())
# Hacky funcition to convert dynamo functions (which are represented as lists) to their Class equivelent
def to_func(val):
if isinstance(val, list):
func_name = val.pop(0)
# Expand rest of the list to arguments
val = FUNC_CLASS[func_name](*val)
return val
# Simple reverse polish notation execution. Builts up a nested filter object.
# The filter object then takes a dynamo item and returns true/false
stack = []
for token in output:
if is_op(token):
op2 = stack.pop()
op1 = stack.pop()
op_cls = OP_CLASS[token]
stack.append(op_cls(op1, op2))
else:
stack.append(to_func(token))
return stack[0]
class Op(object):
"""
Base class for a FilterExpression operator
"""
OP = ''
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def _lhs(self, item):
"""
:type item: moto.dynamodb2.models.Item
"""
lhs = self.lhs
if isinstance(self.lhs, (Op, Func)):
lhs = self.lhs.expr(item)
elif isinstance(self.lhs, str):
try:
lhs = item.attrs[self.lhs].cast_value
except Exception:
pass
return lhs
def _rhs(self, item):
rhs = self.rhs
if isinstance(self.rhs, (Op, Func)):
rhs = self.rhs.expr(item)
elif isinstance(self.lhs, str):
try:
rhs = item.attrs[self.rhs].cast_value
except Exception:
pass
return rhs
def expr(self, item):
return True
def __repr__(self):
return '({0} {1} {2})'.format(self.lhs, self.OP, self.rhs)
class Func(object):
"""
Base class for a FilterExpression function
"""
FUNC = 'Unknown'
def expr(self, item):
return True
def __repr__(self):
return 'Func(...)'.format(self.FUNC)
class OpAnd(Op):
OP = 'AND'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs and rhs
class OpLessThan(Op):
OP = '<'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs < rhs
class OpGreaterThan(Op):
OP = '>'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs > rhs
class OpEqual(Op):
OP = '='
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs == rhs
class OpNotEqual(Op):
OP = '<>'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs == rhs
class OpLessThanOrEqual(Op):
OP = '<='
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs <= rhs
class OpGreaterThanOrEqual(Op):
OP = '>='
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs >= rhs
class OpOr(Op):
OP = 'OR'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs or rhs
class OpIn(Op):
OP = 'IN'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs in rhs
class OpBetween(Op):
OP = 'BETWEEN'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return rhs[0] <= lhs <= rhs[1]
class FuncAttrExists(Func):
FUNC = 'attribute_exists'
def __init__(self, attribute):
self.attr = attribute
def expr(self, item):
return self.attr in item.attrs
class FuncAttrNotExists(Func):
FUNC = 'attribute_not_exists'
def __init__(self, attribute):
self.attr = attribute
def expr(self, item):
return self.attr not in item.attrs
class FuncAttrType(Func):
FUNC = 'attribute_type'
def __init__(self, attribute, _type):
self.attr = attribute
self.type = _type
def expr(self, item):
return self.attr in item.attrs and item.attrs[self.attr].type == self.type
class FuncBeginsWith(Func):
FUNC = 'begins_with'
def __init__(self, attribute, substr):
self.attr = attribute
self.substr = substr
def expr(self, item):
return self.attr in item.attrs and item.attrs[self.attr].type == 'S' and item.attrs[self.attr].value.startswith(self.substr)
class FuncContains(Func):
FUNC = 'contains'
def __init__(self, attribute, operand):
self.attr = attribute
self.operand = operand
def expr(self, item):
if self.attr not in item.attrs:
return False
if item.attrs[self.attr].type in ('S', 'SS', 'NS', 'BS', 'L', 'M'):
return self.operand in item.attrs[self.attr].value
return False
class FuncSize(Func):
FUNC = 'contains'
def __init__(self, attribute):
self.attr = attribute
def expr(self, item):
if self.attr not in item.attrs:
raise ValueError('Invalid option')
if item.attrs[self.attr].type in ('S', 'SS', 'NS', 'B', 'BS', 'L', 'M'):
return len(item.attrs[self.attr].value)
raise ValueError('Invalid option')
OP_CLASS = {
'AND': OpAnd,
'OR': OpOr,
'IN': OpIn,
'BETWEEN': OpBetween,
'<': OpLessThan,
'>': OpGreaterThan,
'<=': OpLessThanOrEqual,
'>=': OpGreaterThanOrEqual,
'=': OpEqual,
'<>': OpNotEqual
}
FUNC_CLASS = {
'attribute_exists': FuncAttrExists,
'attribute_not_exists': FuncAttrNotExists,
'attribute_type': FuncAttrType,
'begins_with': FuncBeginsWith,
'contains': FuncContains,
'size': FuncSize
}

View File

@ -8,7 +8,7 @@ import re
from moto.compat import OrderedDict
from moto.core import BaseBackend, BaseModel
from moto.core.utils import unix_time
from .comparisons import get_comparison_func
from .comparisons import get_comparison_func, get_filter_expression, Op
class DynamoJsonEncoder(json.JSONEncoder):
@ -508,15 +508,15 @@ class Table(BaseModel):
else:
yield hash_set
def scan(self, filters, limit, exclusive_start_key):
def scan(self, filters, limit, exclusive_start_key, filter_expression=None):
results = []
scanned_count = 0
for result in self.all_items():
for item in self.all_items():
scanned_count += 1
passes_all_conditions = True
for attribute_name, (comparison_operator, comparison_objs) in filters.items():
attribute = result.attrs.get(attribute_name)
attribute = item.attrs.get(attribute_name)
if attribute:
# Attribute found
@ -532,8 +532,11 @@ class Table(BaseModel):
passes_all_conditions = False
break
if filter_expression is not None:
passes_all_conditions &= filter_expression.expr(item)
if passes_all_conditions:
results.append(result)
results.append(item)
results, last_evaluated_key = self._trim_results(results, limit,
exclusive_start_key)
@ -698,7 +701,7 @@ class DynamoDBBackend(BaseBackend):
return table.query(hash_key, range_comparison, range_values, limit,
exclusive_start_key, scan_index_forward, projection_expression, index_name, **filter_kwargs)
def scan(self, table_name, filters, limit, exclusive_start_key):
def scan(self, table_name, filters, limit, exclusive_start_key, filter_expression, expr_names, expr_values):
table = self.tables.get(table_name)
if not table:
return None, None, None
@ -708,7 +711,12 @@ class DynamoDBBackend(BaseBackend):
dynamo_types = [DynamoType(value) for value in comparison_values]
scan_filters[key] = (comparison_operator, dynamo_types)
return table.scan(scan_filters, limit, exclusive_start_key)
if filter_expression is not None:
filter_expression = get_filter_expression(filter_expression, expr_names, expr_values)
else:
filter_expression = Op(None, None) # Will always eval to true
return table.scan(scan_filters, limit, exclusive_start_key, filter_expression)
def update_item(self, table_name, key, update_expression, attribute_updates, expression_attribute_names,
expression_attribute_values, expected=None):

View File

@ -432,12 +432,19 @@ class DynamoHandler(BaseResponse):
comparison_values = scan_filter.get("AttributeValueList", [])
filters[attribute_name] = (comparison_operator, comparison_values)
filter_expression = self.body.get('FilterExpression')
expression_attribute_values = self.body.get('ExpressionAttributeValues', {})
expression_attribute_names = self.body.get('ExpressionAttributeNames', {})
exclusive_start_key = self.body.get('ExclusiveStartKey')
limit = self.body.get("Limit")
items, scanned_count, last_evaluated_key = dynamodb_backend2.scan(name, filters,
limit,
exclusive_start_key)
exclusive_start_key,
filter_expression,
expression_attribute_names,
expression_attribute_values)
if items is None:
er = 'com.amazonaws.dynamodb.v20111205#ResourceNotFoundException'

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals, print_function
import six
import boto
import boto3
from boto3.dynamodb.conditions import Attr
import sure # noqa
import requests
from moto import mock_dynamodb2, mock_dynamodb2_deprecated
@ -12,6 +13,10 @@ from botocore.exceptions import ClientError
from boto3.dynamodb.conditions import Key
from tests.helpers import requires_boto_gte
import tests.backport_assert_raises
import moto.dynamodb2.comparisons
import moto.dynamodb2.models
from nose.tools import assert_raises
try:
import boto.dynamodb2
@ -230,6 +235,7 @@ def test_scan_returns_consumed_capacity():
assert 'CapacityUnits' in response['ConsumedCapacity']
assert response['ConsumedCapacity']['TableName'] == name
@requires_boto_gte("2.9")
@mock_dynamodb2
def test_query_returns_consumed_capacity():
@ -280,6 +286,7 @@ def test_query_returns_consumed_capacity():
assert 'CapacityUnits' in results['ConsumedCapacity']
assert results['ConsumedCapacity']['CapacityUnits'] == 1
@mock_dynamodb2
def test_basic_projection_expressions():
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
@ -353,6 +360,7 @@ def test_basic_projection_expressions():
assert 'body' in results['Items'][1]
assert results['Items'][1]['body'] == 'yet another test message'
@mock_dynamodb2
def test_basic_projection_expressions_with_attr_expression_names():
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
@ -419,6 +427,7 @@ def test_basic_projection_expressions_with_attr_expression_names():
assert 'attachment' in results['Items'][0]
assert results['Items'][0]['attachment'] == 'something'
@mock_dynamodb2
def test_put_item_returns_consumed_capacity():
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
@ -461,6 +470,7 @@ def test_put_item_returns_consumed_capacity():
assert 'ConsumedCapacity' in response
@mock_dynamodb2
def test_update_item_returns_consumed_capacity():
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
@ -514,6 +524,7 @@ def test_update_item_returns_consumed_capacity():
assert 'CapacityUnits' in response['ConsumedCapacity']
assert 'TableName' in response['ConsumedCapacity']
@mock_dynamodb2
def test_get_item_returns_consumed_capacity():
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
@ -562,3 +573,61 @@ def test_get_item_returns_consumed_capacity():
assert 'ConsumedCapacity' in response
assert 'CapacityUnits' in response['ConsumedCapacity']
assert 'TableName' in response['ConsumedCapacity']
def test_filter_expression():
row1 = moto.dynamodb2.models.Item(None, None, None, None, {'Id': {'N': '8'}, 'Subs': {'N': '5'}, 'Desc': {'S': 'Some description'}, 'KV': {'SS': ['test1', 'test2']}})
row2 = moto.dynamodb2.models.Item(None, None, None, None, {'Id': {'N': '8'}, 'Subs': {'N': '10'}, 'Desc': {'S': 'A description'}, 'KV': {'SS': ['test3', 'test4']}})
filter1 = moto.dynamodb2.comparisons.get_filter_expression('Id > 5 AND Subs < 7', {}, {})
filter1.expr(row1).should.be(True)
filter1.expr(row2).should.be(False)
filter2 = moto.dynamodb2.comparisons.get_filter_expression('attribute_exists(Id) AND attribute_not_exists(User)', {}, {})
filter2.expr(row1).should.be(True)
filter3 = moto.dynamodb2.comparisons.get_filter_expression('attribute_type(Id, N)', {}, {})
filter3.expr(row1).should.be(True)
filter4 = moto.dynamodb2.comparisons.get_filter_expression('begins_with(Desc, Some)', {}, {})
filter4.expr(row1).should.be(True)
filter4.expr(row2).should.be(False)
filter5 = moto.dynamodb2.comparisons.get_filter_expression('contains(KV, test1)', {}, {})
filter5.expr(row1).should.be(True)
filter5.expr(row2).should.be(False)
filter6 = moto.dynamodb2.comparisons.get_filter_expression('size(Desc) > size(KV)', {}, {})
filter6.expr(row1).should.be(True)
@mock_dynamodb2
def test_scan_filter():
client = boto3.client('dynamodb', region_name='us-east-1')
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
# Create the DynamoDB table.
client.create_table(
TableName='test1',
AttributeDefinitions=[{'AttributeName': 'client', 'AttributeType': 'S'}, {'AttributeName': 'app', 'AttributeType': 'S'}],
KeySchema=[{'AttributeName': 'client', 'KeyType': 'HASH'}, {'AttributeName': 'app', 'KeyType': 'RANGE'}],
ProvisionedThroughput={'ReadCapacityUnits': 123, 'WriteCapacityUnits': 123}
)
client.put_item(
TableName='test1',
Item={
'client': {'S': 'client1'},
'app': {'S': 'app1'}
}
)
table = dynamodb.Table('test1')
response = table.scan(
FilterExpression=Attr('app').eq('app2')
)
assert response['Count'] == 0
response = table.scan(
FilterExpression=Attr('app').eq('app1')
)
assert response['Count'] == 1