moto/moto/dynamodb2/comparisons.py
sawandas 09ac77d979
Issue #1539 : Fixing dynamodb filtering (contains, begins with)
Currently contains and begins with are not respecting the given filter value
2018-04-09 12:10:44 +05:30

533 lines
15 KiB
Python

from __future__ import unicode_literals
import re
import six
# TODO add tests for all of these
EQ_FUNCTION = lambda item_value, test_value: item_value == test_value # flake8: noqa
NE_FUNCTION = lambda item_value, test_value: item_value != test_value # flake8: noqa
LE_FUNCTION = lambda item_value, test_value: item_value <= test_value # flake8: noqa
LT_FUNCTION = lambda item_value, test_value: item_value < test_value # flake8: noqa
GE_FUNCTION = lambda item_value, test_value: item_value >= test_value # flake8: noqa
GT_FUNCTION = lambda item_value, test_value: item_value > test_value # flake8: noqa
COMPARISON_FUNCS = {
'EQ': EQ_FUNCTION,
'=': EQ_FUNCTION,
'NE': NE_FUNCTION,
'!=': NE_FUNCTION,
'LE': LE_FUNCTION,
'<=': LE_FUNCTION,
'LT': LT_FUNCTION,
'<': LT_FUNCTION,
'GE': GE_FUNCTION,
'>=': GE_FUNCTION,
'GT': GT_FUNCTION,
'>': GT_FUNCTION,
'NULL': lambda item_value: item_value is None,
'NOT_NULL': lambda item_value: item_value is not None,
'CONTAINS': lambda item_value, test_value: test_value in item_value,
'NOT_CONTAINS': lambda item_value, test_value: test_value not in item_value,
'BEGINS_WITH': lambda item_value, test_value: item_value.startswith(test_value),
'IN': lambda item_value, *test_values: item_value in test_values,
'BETWEEN': lambda item_value, lower_test_value, upper_test_value: lower_test_value <= item_value <= upper_test_value,
}
def get_comparison_func(range_comparison):
return COMPARISON_FUNCS.get(range_comparison)
class RecursionStopIteration(StopIteration):
pass
def get_filter_expression(expr, names, values):
# Examples
# expr = 'Id > 5 AND attribute_exists(test) AND Id BETWEEN 5 AND 6 OR length < 6 AND contains(test, 1) AND 5 IN (4,5, 6) OR (Id < 5 AND 5 > Id)'
# expr = 'Id > 5 AND Subs < 7'
if names is None:
names = {}
if values is None:
values = {}
# Do substitutions
for key, value in names.items():
expr = expr.replace(key, value)
# Store correct types of values for use later
values_map = {}
for key, value in values.items():
if 'N' in value:
values_map[key] = float(value['N'])
elif 'BOOL' in value:
values_map[key] = value['BOOL']
elif 'S' in value:
values_map[key] = value['S']
elif 'NS' in value:
values_map[key] = tuple(value['NS'])
elif 'SS' in value:
values_map[key] = tuple(value['SS'])
elif 'L' in value:
values_map[key] = tuple(value['L'])
else:
raise NotImplementedError()
# Remove all spaces, tbf we could just skip them in the next step.
# The number of known options is really small so we can do a fair bit of cheating
expr = list(expr.strip())
# DodgyTokenisation stage 1
def is_value(val):
return val not in ('<', '>', '=', '(', ')')
def contains_keyword(val):
for kw in ('BETWEEN', 'IN', 'AND', 'OR', 'NOT'):
if kw in val:
return kw
return None
def is_function(val):
return val in ('attribute_exists', 'attribute_not_exists', 'attribute_type', 'begins_with', 'contains', 'size')
# Does the main part of splitting between sections of characters
tokens = []
stack = ''
while len(expr) > 0:
current_char = expr.pop(0)
if current_char == ' ':
if len(stack) > 0:
tokens.append(stack)
stack = ''
elif current_char == ',': # Split params ,
if len(stack) > 0:
tokens.append(stack)
stack = ''
elif is_value(current_char):
stack += current_char
kw = contains_keyword(stack)
if kw is not None:
# We have a kw in the stack, could be AND or something like 5AND
tmp = stack.replace(kw, '')
if len(tmp) > 0:
tokens.append(tmp)
tokens.append(kw)
stack = ''
else:
if len(stack) > 0:
tokens.append(stack)
tokens.append(current_char)
stack = ''
if len(stack) > 0:
tokens.append(stack)
def is_op(val):
return val in ('<', '>', '=', '>=', '<=', '<>', 'BETWEEN', 'IN', 'AND', 'OR', 'NOT')
# DodgyTokenisation stage 2, it groups together some elements to make RPN'ing it later easier.
def handle_token(token, tokens2, token_iterator):
# ok so this essentially groups up some tokens to make later parsing easier,
# when it encounters brackets it will recurse and then unrecurse when RecursionStopIteration is raised.
if token == ')':
raise RecursionStopIteration() # Should be recursive so this should work
elif token == '(':
temp_list = []
try:
while True:
next_token = six.next(token_iterator)
handle_token(next_token, temp_list, token_iterator)
except RecursionStopIteration:
pass # Continue
except StopIteration:
ValueError('Malformed filter expression, type1')
# Sigh, we only want to group a tuple if it doesnt contain operators
if any([is_op(item) for item in temp_list]):
# Its an expression
tokens2.append('(')
tokens2.extend(temp_list)
tokens2.append(')')
else:
tokens2.append(tuple(temp_list))
elif token == 'BETWEEN':
field = tokens2.pop()
# if values map contains a number, it would be a float
# so we need to int() it anyway
op1 = six.next(token_iterator)
op1 = int(values_map.get(op1, op1))
and_op = six.next(token_iterator)
assert and_op == 'AND'
op2 = six.next(token_iterator)
op2 = int(values_map.get(op2, op2))
tokens2.append(['between', field, op1, op2])
elif is_function(token):
function_list = [token]
lbracket = six.next(token_iterator)
assert lbracket == '('
next_token = six.next(token_iterator)
while next_token != ')':
if next_token in values_map:
next_token = values_map[next_token]
function_list.append(next_token)
next_token = six.next(token_iterator)
tokens2.append(function_list)
else:
# Convert tokens back to real types
if token in values_map:
token = values_map[token]
# Need to join >= <= <>
if len(tokens2) > 0 and ((tokens2[-1] == '>' and token == '=') or (tokens2[-1] == '<' and token == '=') or (tokens2[-1] == '<' and token == '>')):
tokens2.append(tokens2.pop() + token)
else:
tokens2.append(token)
tokens2 = []
token_iterator = iter(tokens)
for token in token_iterator:
handle_token(token, tokens2, token_iterator)
# Start of the Shunting-Yard algorithm. <-- Proper beast algorithm!
def is_number(val):
return val not in ('<', '>', '=', '>=', '<=', '<>', 'BETWEEN', 'IN', 'AND', 'OR', 'NOT')
OPS = {'<': 5, '>': 5, '=': 5, '>=': 5, '<=': 5, '<>': 5, 'IN': 8, 'AND': 11, 'OR': 12, 'NOT': 10, 'BETWEEN': 9, '(': 100, ')': 100}
def shunting_yard(token_list):
output = []
op_stack = []
# Basically takes in an infix notation calculation, converts it to a reverse polish notation where there is no
# ambiguity on which order operators are applied.
while len(token_list) > 0:
token = token_list.pop(0)
if token == '(':
op_stack.append(token)
elif token == ')':
while len(op_stack) > 0 and op_stack[-1] != '(':
output.append(op_stack.pop())
lbracket = op_stack.pop()
assert lbracket == '('
elif is_number(token):
output.append(token)
else:
# Must be operator kw
# Cheat, NOT is our only RIGHT associative operator, should really have dict of operator associativity
while len(op_stack) > 0 and OPS[op_stack[-1]] <= OPS[token] and op_stack[-1] != 'NOT':
output.append(op_stack.pop())
op_stack.append(token)
while len(op_stack) > 0:
output.append(op_stack.pop())
return output
output = shunting_yard(tokens2)
# Hacky function to convert dynamo functions (which are represented as lists) to their Class equivalent
def to_func(val):
if isinstance(val, list):
func_name = val.pop(0)
# Expand rest of the list to arguments
val = FUNC_CLASS[func_name](*val)
return val
# Simple reverse polish notation execution. Builts up a nested filter object.
# The filter object then takes a dynamo item and returns true/false
stack = []
for token in output:
if is_op(token):
op_cls = OP_CLASS[token]
if token == 'NOT':
op1 = stack.pop()
op2 = True
else:
op2 = stack.pop()
op1 = stack.pop()
stack.append(op_cls(op1, op2))
else:
stack.append(to_func(token))
result = stack.pop(0)
if len(stack) > 0:
raise ValueError('Malformed filter expression, type2')
return result
class Op(object):
"""
Base class for a FilterExpression operator
"""
OP = ''
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def _lhs(self, item):
"""
:type item: moto.dynamodb2.models.Item
"""
lhs = self.lhs
if isinstance(self.lhs, (Op, Func)):
lhs = self.lhs.expr(item)
elif isinstance(self.lhs, six.string_types):
try:
lhs = item.attrs[self.lhs].cast_value
except Exception:
pass
return lhs
def _rhs(self, item):
rhs = self.rhs
if isinstance(self.rhs, (Op, Func)):
rhs = self.rhs.expr(item)
elif isinstance(self.rhs, six.string_types):
try:
rhs = item.attrs[self.rhs].cast_value
except Exception:
pass
return rhs
def expr(self, item):
return True
def __repr__(self):
return '({0} {1} {2})'.format(self.lhs, self.OP, self.rhs)
class Func(object):
"""
Base class for a FilterExpression function
"""
FUNC = 'Unknown'
def expr(self, item):
return True
def __repr__(self):
return 'Func(...)'.format(self.FUNC)
class OpNot(Op):
OP = 'NOT'
def expr(self, item):
lhs = self._lhs(item)
return not lhs
def __str__(self):
return '({0} {1})'.format(self.OP, self.lhs)
class OpAnd(Op):
OP = 'AND'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs and rhs
class OpLessThan(Op):
OP = '<'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs < rhs
class OpGreaterThan(Op):
OP = '>'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs > rhs
class OpEqual(Op):
OP = '='
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs == rhs
class OpNotEqual(Op):
OP = '<>'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs == rhs
class OpLessThanOrEqual(Op):
OP = '<='
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs <= rhs
class OpGreaterThanOrEqual(Op):
OP = '>='
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs >= rhs
class OpOr(Op):
OP = 'OR'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs or rhs
class OpIn(Op):
OP = 'IN'
def expr(self, item):
lhs = self._lhs(item)
rhs = self._rhs(item)
return lhs in rhs
class FuncAttrExists(Func):
FUNC = 'attribute_exists'
def __init__(self, attribute):
self.attr = attribute
def expr(self, item):
return self.attr in item.attrs
class FuncAttrNotExists(Func):
FUNC = 'attribute_not_exists'
def __init__(self, attribute):
self.attr = attribute
def expr(self, item):
return self.attr not in item.attrs
class FuncAttrType(Func):
FUNC = 'attribute_type'
def __init__(self, attribute, _type):
self.attr = attribute
self.type = _type
def expr(self, item):
return self.attr in item.attrs and item.attrs[self.attr].type == self.type
class FuncBeginsWith(Func):
FUNC = 'begins_with'
def __init__(self, attribute, substr):
self.attr = attribute
self.substr = substr
def expr(self, item):
return self.attr in item.attrs and item.attrs[self.attr].type == 'S' and item.attrs[self.attr].value.startswith(self.substr)
class FuncContains(Func):
FUNC = 'contains'
def __init__(self, attribute, operand):
self.attr = attribute
self.operand = operand
def expr(self, item):
if self.attr not in item.attrs:
return False
if item.attrs[self.attr].type in ('S', 'SS', 'NS', 'BS', 'L', 'M'):
return self.operand in item.attrs[self.attr].value
return False
class FuncSize(Func):
FUNC = 'contains'
def __init__(self, attribute):
self.attr = attribute
def expr(self, item):
if self.attr not in item.attrs:
raise ValueError('Invalid attribute name {0}'.format(self.attr))
if item.attrs[self.attr].type in ('S', 'SS', 'NS', 'B', 'BS', 'L', 'M'):
return len(item.attrs[self.attr].value)
raise ValueError('Invalid filter expression')
class FuncBetween(Func):
FUNC = 'between'
def __init__(self, attribute, start, end):
self.attr = attribute
self.start = start
self.end = end
def expr(self, item):
if self.attr not in item.attrs:
raise ValueError('Invalid attribute name {0}'.format(self.attr))
return self.start <= item.attrs[self.attr].cast_value <= self.end
OP_CLASS = {
'NOT': OpNot,
'AND': OpAnd,
'OR': OpOr,
'IN': OpIn,
'<': OpLessThan,
'>': OpGreaterThan,
'<=': OpLessThanOrEqual,
'>=': OpGreaterThanOrEqual,
'=': OpEqual,
'<>': OpNotEqual
}
FUNC_CLASS = {
'attribute_exists': FuncAttrExists,
'attribute_not_exists': FuncAttrNotExists,
'attribute_type': FuncAttrType,
'begins_with': FuncBeginsWith,
'contains': FuncContains,
'size': FuncSize,
'between': FuncBetween
}