diff --git a/moto/dynamodb2/models/__init__.py b/moto/dynamodb2/models/__init__.py index 29713d211..1f448f288 100644 --- a/moto/dynamodb2/models/__init__.py +++ b/moto/dynamodb2/models/__init__.py @@ -14,10 +14,11 @@ from moto.core import BaseBackend, BaseModel from moto.core.utils import unix_time from moto.core.exceptions import JsonRESTError from moto.dynamodb2.comparisons import get_filter_expression -from moto.dynamodb2.comparisons import get_expected -from moto.dynamodb2.exceptions import InvalidIndexNameError, ItemSizeTooLarge +from moto.dynamodb2.comparisons import get_expected, get_comparison_func +from moto.dynamodb2.exceptions import InvalidIndexNameError, ItemSizeTooLarge, InvalidUpdateExpression from moto.dynamodb2.models.utilities import bytesize, attribute_is_list from moto.dynamodb2.models.dynamo_type import DynamoType +from moto.dynamodb2.parsing.expressions import UpdateExpressionParser class DynamoJsonEncoder(json.JSONEncoder): @@ -1197,6 +1198,13 @@ class DynamoDBBackend(BaseBackend): ): table = self.get_table(table_name) + # Support spaces between operators in an update expression + # E.g. `a = b + c` -> `a=b+c` + if update_expression: + # Parse expression to get validation errors + UpdateExpressionParser.make(update_expression) + update_expression = re.sub(r"\s*([=\+-])\s*", "\\1", update_expression) + if all([table.hash_key_attr in key, table.range_key_attr in key]): # Covers cases where table has hash and range keys, ``key`` param # will be a dict diff --git a/moto/dynamodb2/parsing/ast_nodes.py b/moto/dynamodb2/parsing/ast_nodes.py new file mode 100644 index 000000000..78c7b6b2b --- /dev/null +++ b/moto/dynamodb2/parsing/ast_nodes.py @@ -0,0 +1,205 @@ +import abc +import six + + +@six.add_metaclass(abc.ABCMeta) +class Node: + def __init__(self, children=None): + self.type = self.__class__.__name__ + assert children is None or isinstance(children, list) + self.children = children + self.parent = None + + if isinstance(children, list): + for child in children: + if isinstance(child, Node): + child.set_parent(self) + + def set_parent(self, parent_node): + self.parent = parent_node + + +class LeafNode(Node): + """A LeafNode is a Node where none of the children are Nodes themselves.""" + + def __init__(self, children=None): + super(LeafNode, self).__init__(children) + + +@six.add_metaclass(abc.ABCMeta) +class Expression(Node): + """ + Abstract Syntax Tree representing the expression + + For the Grammar start here and jump down into the classes at the righ-hand side to look further. Nodes marked with + a star are abstract and won't appear in the final AST. + + Expression* => UpdateExpression + Expression* => ConditionExpression + """ + + +class UpdateExpression(Expression): + """ + UpdateExpression => UpdateExpressionClause* + UpdateExpression => UpdateExpressionClause* UpdateExpression + """ + + +@six.add_metaclass(abc.ABCMeta) +class UpdateExpressionClause(UpdateExpression): + """ + UpdateExpressionClause* => UpdateExpressionSetClause + UpdateExpressionClause* => UpdateExpressionRemoveClause + UpdateExpressionClause* => UpdateExpressionAddClause + UpdateExpressionClause* => UpdateExpressionDeleteClause + """ + + +class UpdateExpressionSetClause(UpdateExpressionClause): + """ + UpdateExpressionSetClause => SET SetActions + """ + + +class UpdateExpressionSetActions(UpdateExpressionClause): + """ + UpdateExpressionSetClause => SET SetActions + + SetActions => SetAction + SetActions => SetAction , SetActions + + """ + + +class UpdateExpressionSetAction(UpdateExpressionClause): + """ + SetAction => Path = Value + """ + + +class UpdateExpressionRemoveActions(UpdateExpressionClause): + """ + UpdateExpressionSetClause => REMOVE RemoveActions + + RemoveActions => RemoveAction + RemoveActions => RemoveAction , RemoveActions + """ + + +class UpdateExpressionRemoveAction(UpdateExpressionClause): + """ + RemoveAction => Path + """ + + +class UpdateExpressionAddActions(UpdateExpressionClause): + """ + UpdateExpressionAddClause => ADD RemoveActions + + AddActions => AddAction + AddActions => AddAction , AddActions + """ + + +class UpdateExpressionAddAction(UpdateExpressionClause): + """ + AddAction => Path Value + """ + + +class UpdateExpressionDeleteActions(UpdateExpressionClause): + """ + UpdateExpressionDeleteClause => DELETE RemoveActions + + DeleteActions => DeleteAction + DeleteActions => DeleteAction , DeleteActions + """ + + +class UpdateExpressionDeleteAction(UpdateExpressionClause): + """ + DeleteAction => Path Value + """ + + +class UpdateExpressionPath(UpdateExpressionClause): + pass + + +class UpdateExpressionValue(UpdateExpressionClause): + """ + Value => Operand + Value => Operand + Value + Value => Operand - Value + """ + + +class UpdateExpressionGroupedValue(UpdateExpressionClause): + """ + GroupedValue => ( Value ) + """ + + +class UpdateExpressionRemoveClause(UpdateExpressionClause): + """ + UpdateExpressionRemoveClause => REMOVE RemoveActions + """ + + +class UpdateExpressionAddClause(UpdateExpressionClause): + """ + UpdateExpressionAddClause => ADD AddActions + """ + + +class UpdateExpressionDeleteClause(UpdateExpressionClause): + """ + UpdateExpressionDeleteClause => DELETE DeleteActions + """ + + +class ExpressionPathDescender(Node): + """Node identifying descender into nested structure (.) in expression""" + + +class ExpressionSelector(LeafNode): + """Node identifying selector [selection_index] in expresion""" + + def __init__(self, selection_index): + super(ExpressionSelector, self).__init__(children=[selection_index]) + + +class ExpressionAttribute(LeafNode): + """An attribute identifier as used in the DDB item""" + + def __init__(self, attribute): + super(ExpressionAttribute, self).__init__(children=[attribute]) + + +class ExpressionAttributeName(LeafNode): + """An ExpressionAttributeName is an alias for an attribute identifier""" + + def __init__(self, attribute_name): + super(ExpressionAttributeName, self).__init__(children=[attribute_name]) + + +class ExpressionAttributeValue(LeafNode): + """An ExpressionAttributeValue is an alias for an value""" + + def __init__(self, value): + super(ExpressionAttributeValue, self).__init__(children=[value]) + + +class ExpressionValueOperator(LeafNode): + """An ExpressionValueOperator is an operation that works on 2 values""" + + def __init__(self, value): + super(ExpressionValueOperator, self).__init__(children=[value]) + + +class UpdateExpressionFunction(Node): + """ + A Node representing a function of an Update Expression. The first child is the function name the others are the + arguments. + """ diff --git a/moto/dynamodb2/parsing/expressions.py b/moto/dynamodb2/parsing/expressions.py new file mode 100644 index 000000000..e418bb47e --- /dev/null +++ b/moto/dynamodb2/parsing/expressions.py @@ -0,0 +1,1010 @@ +import logging +from abc import abstractmethod +import abc +import six +from collections import deque + +from moto.dynamodb2.parsing.ast_nodes import ( + UpdateExpression, + UpdateExpressionSetClause, + UpdateExpressionSetActions, + UpdateExpressionSetAction, + UpdateExpressionRemoveActions, + UpdateExpressionRemoveAction, + UpdateExpressionPath, + UpdateExpressionValue, + UpdateExpressionGroupedValue, + UpdateExpressionRemoveClause, + ExpressionPathDescender, + ExpressionSelector, + ExpressionAttribute, + ExpressionAttributeName, + ExpressionAttributeValue, + ExpressionValueOperator, + UpdateExpressionFunction, + UpdateExpressionAddClause, + UpdateExpressionAddActions, + UpdateExpressionAddAction, + UpdateExpressionDeleteAction, + UpdateExpressionDeleteActions, + UpdateExpressionDeleteClause, +) +from moto.dynamodb2.exceptions import InvalidTokenException +from moto.dynamodb2.parsing.tokens import Token, ExpressionTokenizer + + +class NestableExpressionParserMixin(object): + """ + For nodes that can be nested in themselves (recursive). Take for example UpdateExpression's grammar: + + UpdateExpression => UpdateExpressionClause* + UpdateExpression => UpdateExpressionClause* UpdateExpression + + If we consider it of structure + NestableExpression => TargetClause* + NestableExpression => TargetClause* NestableExpression + + This pattern comes back multiple times. This Mixin adds re-usability for that type of pattern. + + This approach is taken since it allows to remain the ordering of the Nodes as how the corresponding tokens where + in the originating expression. + """ + + def __init__(self, *args, **kwargs): + self.target_clauses = deque() + + def _parse_target_clause(self, factory_class): + """ + + Args: + factory_class: The factory for the target clause e.g. UpdateExpressionSetClauseParser + + Returns: + + """ + logging.debug( + "Move token pos {pos} to continue parsing with specific factory class {fc}".format( + pos=self.token_pos, fc=factory_class.__class__.__name__ + ) + ) + # noinspection PyProtectedMember + ast, token_pos = factory_class(**self._initializer_args())._parse_with_pos() + self.target_clauses.append(ast) + logging.debug( + "Continue where previous parsing ended {token_pos}".format( + token_pos=token_pos + ) + ) + self.token_pos = token_pos + + @abstractmethod + def _initializer_args(self): + """ + Get the arguments of the initializer. This is implemented by the calling class. See ExpressionParser for an + example. + + Returns: + dict: A dictionary of the initializer arguments + """ + + @classmethod + @abstractmethod + def _nestable_class(cls): + """ + Get the class of the Node that will be created that would be nested. For the example in the docstring this would + be UpdateExpression + + Returns: + class: The class of the Nodes that will be created. + """ + + def _create_node(self): + """ + target_clauses has the nodes in order of encountering. Go through them backwards and build the tree bottom up. + + This way left-deep-descending traversal will process nodes in order. + + Continuing the example of an UpdateExpression: + For example SET a=3 REMOVE b + UpdateExpression + / \ + SET a=3 UpdateExpression + | + REMOVE b + + self.target_clauses looks like: ( SET a=3 >> REMOVE b ) + Returns: + moto.dynamodb2.ast_nodes.Node: Node of an AST representing the Expression as produced by the factory. + """ + assert len(self.target_clauses) > 0, "No nodes for {cn}".format( + cn=self.__class__.__name__ + ) + target_node = self._nestable_class()(children=[self.target_clauses.pop()]) + while len(self.target_clauses) > 0: + target_node = self._nestable_class()( + children=[self.target_clauses.pop(), target_node] + ) + return target_node + + +@six.add_metaclass(abc.ABCMeta) +class ExpressionParser: + """Abstract class""" + + def __init__(self, expression_token_list, token_pos=0): + """ + + Args: + expression_token_list: + token_pos(int): Location where parsing is + """ + self.token_list = expression_token_list + self.token_pos = token_pos + + def _initializer_args(self): + return {"expression_token_list": self.token_list, "token_pos": self.token_pos} + + @abstractmethod + def _parse(self): + """ + Start parsing the token_list from token_pos for the factory type. + + Returns: + moto.dynamodb2.ast_nodes.Node: AST which is root node of resulting abstract syntax tree + """ + + @classmethod + def is_possible_start(cls, token): + return token is not None and cls._is_possible_start(token) + + @classmethod + @abstractmethod + def _is_possible_start(cls, token): + """ + + Args: + token(moto.dynamodb2.tokens.Token): + + Returns: + bool: True if token is a possible start for entries processed by `cls` + """ + + def _parse_with_pos(self): + """ + Start parsing the token_list from token_pos for the factory type and also return the resulting token_pos. + + Returns: + (ast, token_pos): tuple of AST which is root node of resulting abstract syntax tree and token_pos is the + position in the tokenlist. + """ + return self._parse(), self.token_pos + + def parse(self): + return self._parse() + + def get_next_token_type(self): + """ + Get the type of the next token to be processed + + Returns: + str: Token type or None if no more next token + """ + try: + return self.get_next_token().type + except AttributeError: + return None + + def get_next_token(self): + """ + Get the next token to be processed + + Returns: + moto.dynamodb2.tokens.Token: or None if no more next token + """ + try: + return self.token_list[self.token_pos] + except IndexError: + return None + + def get_next_token_value(self): + """ + Get the value of the next token to be processed + + Returns: + str: value or None if no more next token + """ + try: + return self.get_next_token().value + except AttributeError: + return None + + def is_at_end(self): + """Return boolean indicating whether we are at end of the parsing""" + return self.token_pos == len(self.token_list) + + def is_at_start(self): + """Return boolean indicating whether we are at start of the parsing""" + return self.token_pos == 0 + + def get_last_token_value(self): + """Get the last token that was correctly parsed or return empty string""" + if self.token_pos > 0: + return self.token_list[self.token_pos - 1].value + else: + return "" + + def get_last_token_type(self): + """Get the last token type that was correctly parsed or return None""" + if self.token_pos > 0: + return self.token_list[self.token_pos - 1].type + else: + return None + + def get_2nd_last_token_value_if_last_was_whitespace(self): + """Get the 2nd last token that was correctly parsed if last one was whitespace or return empty string""" + if self.token_pos > 1 and self.get_last_token_type() == Token.WHITESPACE: + return self.token_list[self.token_pos - 2].value + else: + return "" + + def get_following_token_value(self): + """Get the token value after the one that is being parsed or empty string if non existent.""" + try: + return self.token_list[self.token_pos + 1].value + except IndexError: + return "" + + def get_following_token_type(self): + """Get the token type after the one that is being parsed or None if non existent.""" + try: + return self.token_list[self.token_pos + 1].type + except IndexError: + return None + + def get_2nd_following_token_value_if_following_was_whitespace(self): + """Get the 2nd following token that was correctly parsed if 1st one was whitespace or return empty string""" + if self.get_following_token_type() == Token.WHITESPACE: + try: + return self.token_list[self.token_pos + 2].value + except IndexError: + return "" + else: + return "" + + def skip_white_space(self): + try: + while self.get_next_token_type() == Token.WHITESPACE: + self.token_pos += 1 + except IndexError: + assert self.token_pos > 0, "We should always have positive indexes" + logging.debug("We are out of range so end is reached") + + def process_token_of_type(self, token_type): + """ + Maker sure the next token is of type `token_type` if not raise unexpected token + Args: + token_type: A token type + + Returns: + str: The value if the token is of type `token_type` + """ + if self.get_next_token_type() == token_type: + token_value = self.get_next_token_value() + self.goto_next_significant_token() + return token_value + else: + self.raise_unexpected_token() + + def goto_next_significant_token(self): + """Continue past current token and skip all whitespaces""" + self.token_pos += 1 + self.skip_white_space() + + def raise_unexpected_token(self): + if self.is_at_end(): + problematic_token = "" + problematic_token_in_near = "" + else: + problematic_token_in_near = problematic_token = self.get_next_token_value() + + near = "".join( + [ + self.get_2nd_last_token_value_if_last_was_whitespace(), + self.get_last_token_value(), + problematic_token_in_near, + self.get_following_token_value(), + self.get_2nd_following_token_value_if_following_was_whitespace(), + ] + ) + + raise InvalidTokenException(problematic_token, near) + + +class NestableBinExpressionParser(ExpressionParser): + """ + For nodes that can be nested in themselves (recursive) but with an operation. Take for example + UpdateExpressionValue's grammar: + + Value => Operand* + Value => Operand* + Value + Value => Operand* - Value + + If we consider it of structure + NestableBinExpression => TargetClause* + NestableBinExpression => TargetClause* BinOp NestableBinExpression + + This pattern comes back multiple times. This Mixin adds re-usability for that type of pattern. + + This approach is taken since it allows to remain the ordering of the Nodes as how the corresponding tokens where + in the originating expression. + """ + + def __init__(self, *args, **kwargs): + super(NestableBinExpressionParser, self).__init__(*args, **kwargs) + self.target_nodes = deque() + + def _parse_target_clause(self, factory_class): + """ + + Args: + factory_class: The factory for the target clause e.g. UpdateExpressionSetClauseParser + + Returns: + + """ + # noinspection PyProtectedMember + ast, self.token_pos = factory_class( + **self._initializer_args() + )._parse_with_pos() + self.target_nodes.append(ast) + logging.debug( + "Continue where previous parsing ended {token_pos}".format( + token_pos=self.token_pos + ) + ) + + def _parse(self): + self._parse_target_clause(self._operand_factory_class()) + while self._binop_factory_class().is_possible_start(self.get_next_token()): + self._parse_target_clause(self._binop_factory_class()) + if self._operand_factory_class().is_possible_start(self.get_next_token()): + self._parse_target_clause(self._operand_factory_class()) + else: + self.raise_unexpected_token() + + @abstractmethod + def _operand_factory_class(self): + """ + Get the Parser class of the Operands for the Binary operations/actions. + + Returns: + class: + """ + + @abstractmethod + def _binop_factory_class(self): + """ + Get a factory that gets the possible binary operation. + + Returns: + class: A class extending ExpressionParser + """ + + def _create_node(self): + """ + target_clauses has the nodes in order of encountering. Go through them forward and build the tree bottom up. + For simplicity docstring will use Operand Node rather than the specific node + + This way left-deep-descending traversal will process nodes in order. + + Continuing the example of an UpdateExpressionValue: + For example value => a + :val - :val2 + UpdateExpressionValue + / | \ + UpdateExpressionValue BinOp Operand + / | | | | + UpdateExpressionValue BinOp Operand - :val2 + / | | + Operand + :val + | + a + + self.target_nodes looks like: ( a >> + >> :val >> - >> :val2 ) + Returns: + moto.dynamodb2.ast_nodes.Node: Node of an AST representing the Expression as produced by the factory. + """ + if len(self.target_nodes) == 1: + return UpdateExpressionValue(children=[self.target_nodes.popleft()]) + else: + target_node = UpdateExpressionValue( + children=[ + self.target_nodes.popleft(), + self.target_nodes.popleft(), + self.target_nodes.popleft(), + ] + ) + while len(self.target_nodes) >= 2: + target_node = UpdateExpressionValue( + children=[ + target_node, + self.target_nodes.popleft(), + self.target_nodes.popleft(), + ] + ) + assert len(self.target_nodes) == 0 + return target_node + + +class UpdateExpressionParser(ExpressionParser, NestableExpressionParserMixin): + """ + Parser to create update expressions + """ + + @classmethod + def _sub_factories(cls): + return [ + UpdateExpressionSetClauseParser, + UpdateExpressionAddClauseParser, + UpdateExpressionDeleteClauseParser, + UpdateExpressionRemoveClauseParser, + ] + + @classmethod + def _is_possible_start(cls, token): + pass + + def __init__(self, *args, **kwargs): + super(UpdateExpressionParser, self).__init__(*args, **kwargs) + NestableExpressionParserMixin.__init__(self) + + @classmethod + def _nestable_class(cls): + return UpdateExpression + + def _parse_expression_clause(self, factory_class): + return self._parse_target_clause(factory_class) + + def _parse_by_a_subfactory(self): + for sub_factory in self._sub_factories(): + if sub_factory.is_possible_start(self.get_next_token()): + self._parse_expression_clause(sub_factory) + return True + return False + + def _parse(self): + """ + Update Expression is the top-most node therefore it is expected to end up at the end of the expression. + """ + while True: + self.skip_white_space() + if self.is_at_end(): + logging.debug("End reached") + break + elif self._parse_by_a_subfactory(): + continue + else: + self.raise_unexpected_token() + + return self._create_node(), self.token_pos + + @classmethod + def make(cls, expression_str): + token_list = ExpressionTokenizer.make_list(expression_str) + return cls(token_list).parse() + + +class UpdateExpressionSetClauseParser(ExpressionParser): + """ + UpdateExpressionSetClause => SET SetActions + """ + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE and token.value.upper() == "SET" + + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionSetActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionSetClause(children=[ast]) + + +class UpdateExpressionActionsParser(ExpressionParser, NestableExpressionParserMixin): + """ + UpdateExpressionSetActions + """ + + def __init__(self, *args, **kwargs): + super(UpdateExpressionActionsParser, self).__init__(*args, **kwargs) + NestableExpressionParserMixin.__init__(self) + + @classmethod + def _is_possible_start(cls, token): + raise RuntimeError( + "{class_name} cannot be identified by the next token.".format( + class_name=cls._nestable_class().__name__ + ) + ) + + @classmethod + @abstractmethod + def _nestable_class(cls): + return UpdateExpressionSetActions + + @classmethod + @abstractmethod + def _nested_expression_parser_class(cls): + """Returns the parser for the query part that creates the nested nodes""" + + def _parse(self): + """ + UpdateExpressionSetActions is inside the expression so it can be followed by others. Process SetActions one by + one until no more SetAction. + """ + self.skip_white_space() + + while self._nested_expression_parser_class().is_possible_start( + self.get_next_token() + ): + self._parse_target_clause(self._nested_expression_parser_class()) + self.skip_white_space() + if self.get_next_token_type() == Token.COMMA: + self.goto_next_significant_token() + else: + break + + if len(self.target_clauses) == 0: + logging.debug( + "Didn't encounter a single {nc} in {nepc}.".format( + nc=self._nestable_class().__name__, + nepc=self._nested_expression_parser_class().__name__, + ) + ) + self.raise_unexpected_token() + + return self._create_node() + + +class UpdateExpressionSetActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionSetActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionSetActions + + +class UpdateExpressionSetActionParser(ExpressionParser): + """ + SetAction => Path = Value + + So we create an UpdateExpressionSetAction Node that has 2 children. Left child Path and right child Value. + """ + + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + """ + UpdateExpressionSetActionParser only gets called when expecting a SetAction. So we should be aggressive on + raising invalid Tokens. We can thus do the following: + 1) Process path + 2) skip whitespace if there are any + 3) Process equal-sign token + 4) skip whitespace if there are any + 3) Process value + + """ + path, self.token_pos = UpdateExpressionPathParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + self.process_token_of_type(Token.EQUAL_SIGN) + self.skip_white_space() + value, self.token_pos = UpdateExpressionValueParser( + **self._initializer_args() + )._parse_with_pos() + return UpdateExpressionSetAction(children=[path, value]) + + +class UpdateExpressionPathParser(ExpressionParser): + """ + Paths are selectors within items to specify a part within an Item. DynamoDB does not impose much restrictions on the + data it stores but it does store more strict restrictions on how they are represented in UpdateExpression's. + + """ + + def __init__(self, *args, **kwargs): + super(UpdateExpressionPathParser, self).__init__(*args, **kwargs) + self.path_nodes = [] + + @classmethod + def _is_possible_start(cls, token): + """ + Args: + token(Token): the token to be checked + + Returns: + bool: Whether the token could be the start of an UpdateExpressionPath + """ + if token.type == Token.ATTRIBUTE_NAME: + return True + elif token.type == Token.ATTRIBUTE and token.value.upper() != "REMOVE": + """We have to make sure remove is not passed""" + return True + return False + + def _parse(self): + return self.process_path() + + def process_path(self): + self.parse_path() + return UpdateExpressionPath(children=self.path_nodes) + + def parse_path(self): + """ + A path is comprised of: + - Attribute: the name of an attribute as how it is stored which has no special characters + - ATTRIBUTE_NAME: A placeholder that has no special characters except leading # to refer to attributes that + have a name that is not allowed in an UpdateExpression) + - DOT's: These are used to decent in a nested structure. When a DOT is in a path expression it is never part + of an attribute name but always means to descent into a MAP. We will call each descend a patch + chain + - SELECTORs: E.g.: [1] These are used to select an element in ordered datatypes like a list. + + Whitespaces can be between all these elements that build a path. For SELECTORs it is also allowed to have + whitespaces between brackets and numbers but the number cannot be split up with spaces + + Attributes and attribute_names must be separated with DOT's. + Returns: + UpdateExpressionPath: + """ + self.parse_path_chain() + while self.is_next_token_start_of_patch_chain(): + self.process_dot() + self.parse_path_chain() + + def is_next_token_start_of_patch_chain(self): + return self.get_next_token_type() == Token.DOT + + def process_dot(self): + self.path_nodes.append(ExpressionPathDescender()) + self.goto_next_significant_token() + + def parse_path_chain(self): + self.process_attribute_identifying_token() + self.skip_white_space() + while self.is_next_token_start_of_selector(): + self.process_selector() + self.skip_white_space() + + def process_attribute_identifying_token(self): + if self.get_next_token_type() == Token.ATTRIBUTE: + self.path_nodes.append(ExpressionAttribute(self.get_next_token_value())) + elif self.get_next_token_type() == Token.ATTRIBUTE_NAME: + self.path_nodes.append(ExpressionAttributeName(self.get_next_token_value())) + else: + self.raise_unexpected_token() + + self.goto_next_significant_token() + + def is_next_token_start_of_selector(self): + return self.get_next_token_type() == Token.OPEN_SQUARE_BRACKET + + def process_selector(self): + """ + Process the selector is only called when a selector must be processed. So do the following actions: + - skip opening bracket + - skip optional spaces + - read numeric literal + - skip optional spaces + - pass closing bracket + """ + self.process_token_of_type(Token.OPEN_SQUARE_BRACKET) + selector_value = self.process_token_of_type(Token.NUMBER) + self.process_token_of_type(Token.CLOSE_SQUARE_BRACKET) + self.path_nodes.append(ExpressionSelector(selector_value)) + + +class UpdateExpressionValueParser(NestableBinExpressionParser): + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionOperandParser.is_possible_start(token) + + def _operand_factory_class(self): + return UpdateExpressionOperandParser + + def _binop_factory_class(self): + return UpdateExpressionValueOperatorParser + + +class UpdateExpressionGroupedValueParser(ExpressionParser): + """ + A grouped value is an Update Expression value clause that is surrounded by round brackets. Each Operand can be + a grouped value by itself. + """ + + def _parse(self): + self.process_token_of_type(Token.OPEN_ROUND_BRACKET) + value, self.token_pos = UpdateExpressionValueParser( + **self._initializer_args() + )._parse_with_pos() + self.process_token_of_type(Token.CLOSE_ROUND_BRACKET) + return UpdateExpressionGroupedValue(children=value) + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.OPEN_ROUND_BRACKET + + +class UpdateExpressionValueOperatorParser(ExpressionParser): + OPERATION_TOKENS = [Token.PLUS_SIGN, Token.MINUS_SIGN] + + @classmethod + def _is_possible_start(cls, token): + return token.type in cls.OPERATION_TOKENS + + def _parse(self): + operation_value = self.get_next_token_value() + assert operation_value in self.OPERATION_TOKENS + self.goto_next_significant_token() + return ExpressionValueOperator(operation_value) + + +class UpdateExpressionOperandParser(ExpressionParser): + """ + Grammar + Operand* => AttributeValue + Operand* => UpdateExpressionFunction + Operand* => Path + Operand* => GroupedValue + """ + + @classmethod + def _sub_factories(cls): + return [ + UpdateExpressionAttributeValueParser, + UpdateExpressionFunctionParser, + UpdateExpressionPathParser, + UpdateExpressionGroupedValueParser, + ] + + @classmethod + def _is_possible_start(cls, token): + return any(parser.is_possible_start(token) for parser in cls._sub_factories()) + + def _parse(self): + for factory in self._sub_factories(): + if factory.is_possible_start(self.get_next_token()): + node, self.token_pos = factory( + **self._initializer_args() + )._parse_with_pos() + return node + self.raise_unexpected_token() + + +class UpdateExpressionAttributeValueParser(ExpressionParser): + def _parse(self): + attr_value = ExpressionAttributeValue( + self.process_token_of_type(Token.ATTRIBUTE_VALUE) + ) + return attr_value + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE_VALUE + + +class UpdateExpressionFunctionParser(ExpressionParser): + """ + A helper to process a function of an Update Expression + """ + + # TODO(pbbouwel): Function names are supposedly case sensitive according to doc add tests + # Map function to the factories for its elements + FUNCTIONS = { + "if_not_exists": [UpdateExpressionPathParser, UpdateExpressionValueParser], + "list_append": [UpdateExpressionOperandParser, UpdateExpressionOperandParser], + } + + @classmethod + def _is_possible_start(cls, token): + """ + Check whether a token is supposed to be a function + Args: + token(Token): the token to check + + Returns: + bool: True if token is the start of a function. + """ + if token.type == Token.ATTRIBUTE: + return token.value in cls.FUNCTIONS.keys() + else: + return False + + def _parse(self): + function_name = self.get_next_token_value() + self.goto_next_significant_token() + self.process_token_of_type(Token.OPEN_ROUND_BRACKET) + function_elements = [function_name] + function_arguments = self.FUNCTIONS[function_name] + for i, func_elem_factory in enumerate(function_arguments): + func_elem, self.token_pos = func_elem_factory( + **self._initializer_args() + )._parse_with_pos() + function_elements.append(func_elem) + if i + 1 < len(function_arguments): + self.skip_white_space() + self.process_token_of_type(Token.COMMA) + self.process_token_of_type(Token.CLOSE_ROUND_BRACKET) + return UpdateExpressionFunction(children=function_elements) + + +class UpdateExpressionRemoveClauseParser(ExpressionParser): + """ + UpdateExpressionRemoveClause => REMOVE RemoveActions + """ + + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionRemoveActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionRemoveClause(children=[ast]) + + @classmethod + def _is_possible_start(cls, token): + """REMOVE is not a keyword""" + return token.type == Token.ATTRIBUTE and token.value.upper() == "REMOVE" + + +class UpdateExpressionRemoveActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionRemoveActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionRemoveActions + + +class UpdateExpressionRemoveActionParser(ExpressionParser): + """ + RemoveAction => Path = Value + + So we create an UpdateExpressionSetAction Node that has 2 children. Left child Path and right child Value. + """ + + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + """ + UpdateExpressionRemoveActionParser only gets called when expecting a RemoveAction. So we should be aggressive on + raising invalid Tokens. We can thus do the following: + 1) Process path + 2) skip whitespace if there are any + + """ + path, self.token_pos = UpdateExpressionPathParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + return UpdateExpressionRemoveAction(children=[path]) + + +class UpdateExpressionAddClauseParser(ExpressionParser): + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionAddActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionAddClause(children=[ast]) + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE and token.value.upper() == "ADD" + + +class UpdateExpressionAddActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionAddActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionAddActions + + +@six.add_metaclass(abc.ABCMeta) +class UpdateExpressionPathValueParser(ExpressionParser): + def _parse_path_and_value(self): + """ + UpdateExpressionAddActionParser only gets called when expecting an AddAction. So we should be aggressive on + raising invalid Tokens. We can thus do the following: + 1) Process path + 2) skip whitespace if there are any + 3) Process a value + 4) skip whitespace if there are any + + Returns: + [path, value]: A list containing the Path node and the AttributeValue nodes + """ + path, self.token_pos = UpdateExpressionPathParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + value, self.token_pos = UpdateExpressionAttributeValueParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + return [path, value] + + +class UpdateExpressionAddActionParser(UpdateExpressionPathValueParser): + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + return UpdateExpressionAddAction(children=self._parse_path_and_value()) + + +class UpdateExpressionDeleteClauseParser(ExpressionParser): + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionDeleteActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionDeleteClause(children=[ast]) + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE and token.value.upper() == "DELETE" + + +class UpdateExpressionDeleteActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionDeleteActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionDeleteActions + + +class UpdateExpressionDeleteActionParser(UpdateExpressionPathValueParser): + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + return UpdateExpressionDeleteAction(children=self._parse_path_and_value()) diff --git a/moto/dynamodb2/parsing/reserved_keywords.py b/moto/dynamodb2/parsing/reserved_keywords.py new file mode 100644 index 000000000..d82b16e98 --- /dev/null +++ b/moto/dynamodb2/parsing/reserved_keywords.py @@ -0,0 +1,29 @@ +class ReservedKeywords(list): + """ + DynamoDB has an extensive list of keywords. Keywords are considered when validating the expression Tree. + Not earlier since an update expression like "SET path = VALUE 1" fails with: + 'Invalid UpdateExpression: Syntax error; token: "1", near: "VALUE 1"' + """ + + KEYWORDS = None + + @classmethod + def get_reserved_keywords(cls): + if cls.KEYWORDS is None: + cls.KEYWORDS = cls._get_reserved_keywords() + return cls.KEYWORDS + + @classmethod + def _get_reserved_keywords(cls): + """ + Get a list of reserved keywords of DynamoDB + """ + try: + import importlib.resources as pkg_resources + except ImportError: + import importlib_resources as pkg_resources + + reserved_keywords = pkg_resources.read_text( + "moto.dynamodb2.parsing", "reserved_keywords.txt" + ) + return reserved_keywords.split() diff --git a/moto/dynamodb2/parsing/reserved_keywords.txt b/moto/dynamodb2/parsing/reserved_keywords.txt new file mode 100644 index 000000000..7c0106127 --- /dev/null +++ b/moto/dynamodb2/parsing/reserved_keywords.txt @@ -0,0 +1,573 @@ +ABORT +ABSOLUTE +ACTION +ADD +AFTER +AGENT +AGGREGATE +ALL +ALLOCATE +ALTER +ANALYZE +AND +ANY +ARCHIVE +ARE +ARRAY +AS +ASC +ASCII +ASENSITIVE +ASSERTION +ASYMMETRIC +AT +ATOMIC +ATTACH +ATTRIBUTE +AUTH +AUTHORIZATION +AUTHORIZE +AUTO +AVG +BACK +BACKUP +BASE +BATCH +BEFORE +BEGIN +BETWEEN +BIGINT +BINARY +BIT +BLOB +BLOCK +BOOLEAN +BOTH +BREADTH +BUCKET +BULK +BY +BYTE +CALL +CALLED +CALLING +CAPACITY +CASCADE +CASCADED +CASE +CAST +CATALOG +CHAR +CHARACTER +CHECK +CLASS +CLOB +CLOSE +CLUSTER +CLUSTERED +CLUSTERING +CLUSTERS +COALESCE +COLLATE +COLLATION +COLLECTION +COLUMN +COLUMNS +COMBINE +COMMENT +COMMIT +COMPACT +COMPILE +COMPRESS +CONDITION +CONFLICT +CONNECT +CONNECTION +CONSISTENCY +CONSISTENT +CONSTRAINT +CONSTRAINTS +CONSTRUCTOR +CONSUMED +CONTINUE +CONVERT +COPY +CORRESPONDING +COUNT +COUNTER +CREATE +CROSS +CUBE +CURRENT +CURSOR +CYCLE +DATA +DATABASE +DATE +DATETIME +DAY +DEALLOCATE +DEC +DECIMAL +DECLARE +DEFAULT +DEFERRABLE +DEFERRED +DEFINE +DEFINED +DEFINITION +DELETE +DELIMITED +DEPTH +DEREF +DESC +DESCRIBE +DESCRIPTOR +DETACH +DETERMINISTIC +DIAGNOSTICS +DIRECTORIES +DISABLE +DISCONNECT +DISTINCT +DISTRIBUTE +DO +DOMAIN +DOUBLE +DROP +DUMP +DURATION +DYNAMIC +EACH +ELEMENT +ELSE +ELSEIF +EMPTY +ENABLE +END +EQUAL +EQUALS +ERROR +ESCAPE +ESCAPED +EVAL +EVALUATE +EXCEEDED +EXCEPT +EXCEPTION +EXCEPTIONS +EXCLUSIVE +EXEC +EXECUTE +EXISTS +EXIT +EXPLAIN +EXPLODE +EXPORT +EXPRESSION +EXTENDED +EXTERNAL +EXTRACT +FAIL +FALSE +FAMILY +FETCH +FIELDS +FILE +FILTER +FILTERING +FINAL +FINISH +FIRST +FIXED +FLATTERN +FLOAT +FOR +FORCE +FOREIGN +FORMAT +FORWARD +FOUND +FREE +FROM +FULL +FUNCTION +FUNCTIONS +GENERAL +GENERATE +GET +GLOB +GLOBAL +GO +GOTO +GRANT +GREATER +GROUP +GROUPING +HANDLER +HASH +HAVE +HAVING +HEAP +HIDDEN +HOLD +HOUR +IDENTIFIED +IDENTITY +IF +IGNORE +IMMEDIATE +IMPORT +IN +INCLUDING +INCLUSIVE +INCREMENT +INCREMENTAL +INDEX +INDEXED +INDEXES +INDICATOR +INFINITE +INITIALLY +INLINE +INNER +INNTER +INOUT +INPUT +INSENSITIVE +INSERT +INSTEAD +INT +INTEGER +INTERSECT +INTERVAL +INTO +INVALIDATE +IS +ISOLATION +ITEM +ITEMS +ITERATE +JOIN +KEY +KEYS +LAG +LANGUAGE +LARGE +LAST +LATERAL +LEAD +LEADING +LEAVE +LEFT +LENGTH +LESS +LEVEL +LIKE +LIMIT +LIMITED +LINES +LIST +LOAD +LOCAL +LOCALTIME +LOCALTIMESTAMP +LOCATION +LOCATOR +LOCK +LOCKS +LOG +LOGED +LONG +LOOP +LOWER +MAP +MATCH +MATERIALIZED +MAX +MAXLEN +MEMBER +MERGE +METHOD +METRICS +MIN +MINUS +MINUTE +MISSING +MOD +MODE +MODIFIES +MODIFY +MODULE +MONTH +MULTI +MULTISET +NAME +NAMES +NATIONAL +NATURAL +NCHAR +NCLOB +NEW +NEXT +NO +NONE +NOT +NULL +NULLIF +NUMBER +NUMERIC +OBJECT +OF +OFFLINE +OFFSET +OLD +ON +ONLINE +ONLY +OPAQUE +OPEN +OPERATOR +OPTION +OR +ORDER +ORDINALITY +OTHER +OTHERS +OUT +OUTER +OUTPUT +OVER +OVERLAPS +OVERRIDE +OWNER +PAD +PARALLEL +PARAMETER +PARAMETERS +PARTIAL +PARTITION +PARTITIONED +PARTITIONS +PATH +PERCENT +PERCENTILE +PERMISSION +PERMISSIONS +PIPE +PIPELINED +PLAN +POOL +POSITION +PRECISION +PREPARE +PRESERVE +PRIMARY +PRIOR +PRIVATE +PRIVILEGES +PROCEDURE +PROCESSED +PROJECT +PROJECTION +PROPERTY +PROVISIONING +PUBLIC +PUT +QUERY +QUIT +QUORUM +RAISE +RANDOM +RANGE +RANK +RAW +READ +READS +REAL +REBUILD +RECORD +RECURSIVE +REDUCE +REF +REFERENCE +REFERENCES +REFERENCING +REGEXP +REGION +REINDEX +RELATIVE +RELEASE +REMAINDER +RENAME +REPEAT +REPLACE +REQUEST +RESET +RESIGNAL +RESOURCE +RESPONSE +RESTORE +RESTRICT +RESULT +RETURN +RETURNING +RETURNS +REVERSE +REVOKE +RIGHT +ROLE +ROLES +ROLLBACK +ROLLUP +ROUTINE +ROW +ROWS +RULE +RULES +SAMPLE +SATISFIES +SAVE +SAVEPOINT +SCAN +SCHEMA +SCOPE +SCROLL +SEARCH +SECOND +SECTION +SEGMENT +SEGMENTS +SELECT +SELF +SEMI +SENSITIVE +SEPARATE +SEQUENCE +SERIALIZABLE +SESSION +SET +SETS +SHARD +SHARE +SHARED +SHORT +SHOW +SIGNAL +SIMILAR +SIZE +SKEWED +SMALLINT +SNAPSHOT +SOME +SOURCE +SPACE +SPACES +SPARSE +SPECIFIC +SPECIFICTYPE +SPLIT +SQL +SQLCODE +SQLERROR +SQLEXCEPTION +SQLSTATE +SQLWARNING +START +STATE +STATIC +STATUS +STORAGE +STORE +STORED +STREAM +STRING +STRUCT +STYLE +SUB +SUBMULTISET +SUBPARTITION +SUBSTRING +SUBTYPE +SUM +SUPER +SYMMETRIC +SYNONYM +SYSTEM +TABLE +TABLESAMPLE +TEMP +TEMPORARY +TERMINATED +TEXT +THAN +THEN +THROUGHPUT +TIME +TIMESTAMP +TIMEZONE +TINYINT +TO +TOKEN +TOTAL +TOUCH +TRAILING +TRANSACTION +TRANSFORM +TRANSLATE +TRANSLATION +TREAT +TRIGGER +TRIM +TRUE +TRUNCATE +TTL +TUPLE +TYPE +UNDER +UNDO +UNION +UNIQUE +UNIT +UNKNOWN +UNLOGGED +UNNEST +UNPROCESSED +UNSIGNED +UNTIL +UPDATE +UPPER +URL +USAGE +USE +USER +USERS +USING +UUID +VACUUM +VALUE +VALUED +VALUES +VARCHAR +VARIABLE +VARIANCE +VARINT +VARYING +VIEW +VIEWS +VIRTUAL +VOID +WAIT +WHEN +WHENEVER +WHERE +WHILE +WINDOW +WITH +WITHIN +WITHOUT +WORK +WRAPPED +WRITE +YEAR +ZONE diff --git a/moto/dynamodb2/parsing/tokens.py b/moto/dynamodb2/parsing/tokens.py index 07d65ae64..4fbb7883a 100644 --- a/moto/dynamodb2/parsing/tokens.py +++ b/moto/dynamodb2/parsing/tokens.py @@ -1,4 +1,5 @@ import re +import sys from moto.dynamodb2.exceptions import ( InvalidTokenException, @@ -147,9 +148,17 @@ class ExpressionTokenizer(object): self.token_list = [] self.staged_characters = "" + @classmethod + def is_py2(cls): + return sys.version_info[0] == 2 + @classmethod def make_list(cls, input_expression_str): - assert isinstance(input_expression_str, str) + if cls.is_py2(): + pass + else: + assert isinstance(input_expression_str, str) + return ExpressionTokenizer(input_expression_str)._make_list() def add_token(self, token_type, token_value): @@ -159,6 +168,10 @@ class ExpressionTokenizer(object): self.add_token(token_type, self.staged_characters) self.staged_characters = "" + @classmethod + def is_numeric(cls, input_str): + return re.compile("[0-9]+").match(input_str) is not None + def process_staged_characters(self): if len(self.staged_characters) == 0: return @@ -167,7 +180,7 @@ class ExpressionTokenizer(object): self.add_token_from_stage(Token.ATTRIBUTE_NAME) else: raise InvalidExpressionAttributeNameKey(self.staged_characters) - elif self.staged_characters.isnumeric(): + elif self.is_numeric(self.staged_characters): self.add_token_from_stage(Token.NUMBER) elif self.is_expression_attribute(self.staged_characters): self.add_token_from_stage(Token.ATTRIBUTE) diff --git a/moto/dynamodb2/responses.py b/moto/dynamodb2/responses.py index d21d1d756..a5aeeac70 100644 --- a/moto/dynamodb2/responses.py +++ b/moto/dynamodb2/responses.py @@ -748,11 +748,6 @@ class DynamoHandler(BaseResponse): expression_attribute_names = self.body.get("ExpressionAttributeNames", {}) expression_attribute_values = self.body.get("ExpressionAttributeValues", {}) - # Support spaces between operators in an update expression - # E.g. `a = b + c` -> `a=b+c` - if update_expression: - update_expression = re.sub(r"\s*([=\+-])\s*", "\\1", update_expression) - try: item = self.dynamodb_backend.update_item( name, diff --git a/tests/test_dynamodb2/test_dynamodb.py b/tests/test_dynamodb2/test_dynamodb.py index bec24c966..09401d562 100644 --- a/tests/test_dynamodb2/test_dynamodb.py +++ b/tests/test_dynamodb2/test_dynamodb.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals, print_function +import re from decimal import Decimal import six @@ -4177,3 +4178,70 @@ def test_gsi_verify_negative_number_order(): [float(item["gsiK1SortKey"]) for item in resp["Items"]].should.equal( [-0.7, -0.6, 0.7] ) + + +def assert_raise_syntax_error(client_error, token, near): + """ + Assert whether a client_error is as expected Syntax error. Syntax error looks like: `syntax_error_template` + + Args: + client_error(ClientError): The ClientError exception that was raised + token(str): The token that ws unexpected + near(str): The part in the expression that shows where the error occurs it generally has the preceding token the + optional separation and the problematic token. + """ + syntax_error_template = ( + 'Invalid UpdateExpression: Syntax error; token: "{token}", near: "{near}"' + ) + expected_syntax_error = syntax_error_template.format(token=token, near=near) + assert client_error.response["Error"]["Code"] == "ValidationException" + assert expected_syntax_error == client_error.response["Error"]["Message"] + + +@mock_dynamodb2 +def test_update_expression_with_numeric_literal_instead_of_value(): + """ + DynamoDB requires literals to be passed in as values. If they are put literally in the expression a token error will + be raised + """ + dynamodb = boto3.client("dynamodb", region_name="eu-west-1") + + dynamodb.create_table( + TableName="moto-test", + KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}], + AttributeDefinitions=[{"AttributeName": "id", "AttributeType": "S"}], + ) + + try: + dynamodb.update_item( + TableName="moto-test", + Key={"id": {"S": "1"}}, + UpdateExpression="SET MyStr = myNum + 1", + ) + assert False, "Validation exception not thrown" + except dynamodb.exceptions.ClientError as e: + assert_raise_syntax_error(e, "1", "+ 1") + + +@mock_dynamodb2 +def test_update_expression_with_multiple_set_clauses_must_be_comma_separated(): + """ + An UpdateExpression can have multiple set clauses but if they are passed in without the separating comma. + """ + dynamodb = boto3.client("dynamodb", region_name="eu-west-1") + + dynamodb.create_table( + TableName="moto-test", + KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}], + AttributeDefinitions=[{"AttributeName": "id", "AttributeType": "S"}], + ) + + try: + dynamodb.update_item( + TableName="moto-test", + Key={"id": {"S": "1"}}, + UpdateExpression="SET MyStr = myNum Mystr2 myNum2", + ) + assert False, "Validation exception not thrown" + except dynamodb.exceptions.ClientError as e: + assert_raise_syntax_error(e, "Mystr2", "myNum Mystr2 myNum2") diff --git a/tests/test_dynamodb2/test_dynamodb_expressions.py b/tests/test_dynamodb2/test_dynamodb_expressions.py new file mode 100644 index 000000000..1066231af --- /dev/null +++ b/tests/test_dynamodb2/test_dynamodb_expressions.py @@ -0,0 +1,395 @@ +from moto.dynamodb2.exceptions import InvalidTokenException +from moto.dynamodb2.parsing.expressions import UpdateExpressionParser +from moto.dynamodb2.parsing.reserved_keywords import ReservedKeywords + + +def test_get_reserved_keywords(): + reserved_keywords = ReservedKeywords.get_reserved_keywords() + assert "SET" in reserved_keywords + assert "DELETE" in reserved_keywords + assert "ADD" in reserved_keywords + # REMOVE is not part of the list of reserved keywords. + assert "REMOVE" not in reserved_keywords + + +def test_update_expression_numeric_literal_in_expression(): + set_action = "SET attrName = 3" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "3" + assert te.near == "= 3" + + +def test_expression_tokenizer_multi_number_numeric_literal_in_expression(): + set_action = "SET attrName = 34" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "34" + assert te.near == "= 34" + + +def test_expression_tokenizer_numeric_literal_unclosed_square_bracket(): + set_action = "SET MyStr[ 3" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == "3" + + +def test_expression_tokenizer_wrong_closing_bracket_with_space(): + set_action = "SET MyStr[3 )" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "3 )" + + +def test_expression_tokenizer_wrong_closing_bracket(): + set_action = "SET MyStr[3)" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "3)" + + +def test_expression_tokenizer_only_numeric_literal_for_set(): + set_action = "SET 2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "2" + assert te.near == "SET 2" + + +def test_expression_tokenizer_only_numeric_literal(): + set_action = "2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "2" + assert te.near == "2" + + +def test_expression_tokenizer_set_closing_round_bracket(): + set_action = "SET )" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "SET )" + + +def test_expression_tokenizer_set_closing_followed_by_numeric_literal(): + set_action = "SET ) 3" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "SET ) 3" + + +def test_expression_tokenizer_numeric_literal_unclosed_square_bracket_trailing_space(): + set_action = "SET MyStr[ 3 " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == "3 " + + +def test_expression_tokenizer_unbalanced_round_brackets_only_opening(): + set_action = "SET MyStr = (:_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == ":_val" + + +def test_expression_tokenizer_unbalanced_round_brackets_only_opening_trailing_space(): + set_action = "SET MyStr = (:_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == ":_val " + + +def test_expression_tokenizer_unbalanced_square_brackets_only_opening(): + set_action = "SET MyStr = [:_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "[" + assert te.near == "= [:_val" + + +def test_expression_tokenizer_unbalanced_square_brackets_only_opening_trailing_spaces(): + set_action = "SET MyStr = [:_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "[" + assert te.near == "= [:_val" + + +def test_expression_tokenizer_unbalanced_round_brackets_multiple_opening(): + set_action = "SET MyStr = (:_val + (:val2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == ":val2" + + +def test_expression_tokenizer_unbalanced_round_brackets_only_closing(): + set_action = "SET MyStr = ):_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "= ):_val" + + +def test_expression_tokenizer_unbalanced_square_brackets_only_closing(): + set_action = "SET MyStr = ]:_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "]" + assert te.near == "= ]:_val" + + +def test_expression_tokenizer_unbalanced_round_brackets_only_closing_followed_by_other_parts(): + set_action = "SET MyStr = ):_val + :val2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "= ):_val" + + +def test_update_expression_starts_with_keyword_reset_followed_by_identifier(): + update_expression = "RESET NonExistent" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == "RESET NonExistent" + + +def test_update_expression_starts_with_keyword_reset_followed_by_identifier_and_value(): + update_expression = "RESET NonExistent value" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == "RESET NonExistent" + + +def test_update_expression_starts_with_leading_spaces_and_keyword_reset_followed_by_identifier_and_value(): + update_expression = " RESET NonExistent value" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == " RESET NonExistent" + + +def test_update_expression_with_only_keyword_reset(): + update_expression = "RESET" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == "RESET" + + +def test_update_nested_expression_with_selector_just_should_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a[0].b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_update_nested_expression_with_selector_and_spaces_should_only_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a [ 2 ]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_update_nested_expression_with_double_selector_and_spaces_should_only_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a [2][ 3 ]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_update_nested_expression_should_only_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a . b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_nested_selectors_in_update_expression_should_fail_at_nesting(): + update_expression = "SET a [ [2] ]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "[" + assert te.near == "[ [2" + + +def test_update_expression_number_in_selector_cannot_be_splite(): + update_expression = "SET a [2 1]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "1" + assert te.near == "2 1]" + + +def test_update_expression_cannot_have_successive_attributes(): + update_expression = "SET #a a = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "a" + assert te.near == "#a a =" + + +def test_update_expression_path_with_both_attribute_and_attribute_name_should_only_fail_at_numeric_value(): + update_expression = "SET #a.a = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_expression_tokenizer_2_same_operators_back_to_back(): + set_action = "SET MyStr = NoExist + + :_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "+" + assert te.near == "+ + :_val" + + +def test_expression_tokenizer_2_different_operators_back_to_back(): + set_action = "SET MyStr = NoExist + - :_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "-" + assert te.near == "+ - :_val" + + +def test_update_expression_remove_does_not_allow_operations(): + remove_action = "REMOVE NoExist + " + try: + UpdateExpressionParser.make(remove_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "+" + assert te.near == "NoExist + " + + +def test_update_expression_add_does_not_allow_attribute_after_path(): + """value here is not really a value since a value starts with a colon (:)""" + add_expr = "ADD attr val foobar" + try: + UpdateExpressionParser.make(add_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "val" + assert te.near == "attr val foobar" + + +def test_update_expression_add_does_not_allow_attribute_foobar_after_value(): + add_expr = "ADD attr :val foobar" + try: + UpdateExpressionParser.make(add_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "foobar" + assert te.near == ":val foobar" + + +def test_update_expression_delete_does_not_allow_attribute_after_path(): + """value here is not really a value since a value starts with a colon (:)""" + delete_expr = "DELETE attr val" + try: + UpdateExpressionParser.make(delete_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "val" + assert te.near == "attr val" + + +def test_update_expression_delete_does_not_allow_attribute_foobar_after_value(): + delete_expr = "DELETE attr :val foobar" + try: + UpdateExpressionParser.make(delete_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "foobar" + assert te.near == ":val foobar" + + +def test_update_expression_parsing_is_not_keyword_aware(): + """path and VALUE are keywords. Yet a token error will be thrown for the numeric literal 1.""" + delete_expr = "SET path = VALUE 1" + try: + UpdateExpressionParser.make(delete_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "1" + assert te.near == "VALUE 1" diff --git a/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py b/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py index c433a3a31..1aa2175c1 100644 --- a/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py +++ b/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py @@ -1254,14 +1254,22 @@ def test_update_item_with_expression(): item_key = {"forum_name": "the-key", "subject": "123"} - table.update_item(Key=item_key, UpdateExpression="SET field=2") + table.update_item( + Key=item_key, + UpdateExpression="SET field = :field_value", + ExpressionAttributeValues={":field_value": 2}, + ) dict(table.get_item(Key=item_key)["Item"]).should.equal( - {"field": "2", "forum_name": "the-key", "subject": "123"} + {"field": Decimal("2"), "forum_name": "the-key", "subject": "123"} ) - table.update_item(Key=item_key, UpdateExpression="SET field = 3") + table.update_item( + Key=item_key, + UpdateExpression="SET field = :field_value", + ExpressionAttributeValues={":field_value": 3}, + ) dict(table.get_item(Key=item_key)["Item"]).should.equal( - {"field": "3", "forum_name": "the-key", "subject": "123"} + {"field": Decimal("3"), "forum_name": "the-key", "subject": "123"} )