From 9ed613e197e1d6e85f9631f7a15a2b8ce2f19b2e Mon Sep 17 00:00:00 2001 From: pvbouwel Date: Sat, 11 Apr 2020 21:17:16 +0100 Subject: [PATCH] Better DDB expressions support2: ExpressionTree Part of structured approach for UpdateExpressions: 1) Expression gets parsed into a tokenlist (tokenized) 2) Tokenlist get transformed to expression tree (AST) -> This commit 3) The AST gets validated (full semantic correctness) 4) AST gets processed to perform the update This commit uses the tokenlist to build an expression tree. This tree is not yet used. Still it allows to raise additional Validation Exceptions which previously were missed silently therefore it allows tests to catch these type of ValidationException. For that reason DDB UpdateExpressions will be parsed already. It also makes sure we won't break existing tests. One of the existing tests had to be changed in order to still pass: - test_dynamodb_table_with_range_key.test_update_item_with_expression This test passed in a numeric literal which is not supported by DynamoDB and with the current tokenization it would get the same error as in AWS DynamoDB. --- moto/dynamodb2/models/__init__.py | 12 +- moto/dynamodb2/parsing/ast_nodes.py | 205 ++++ moto/dynamodb2/parsing/expressions.py | 1010 +++++++++++++++++ moto/dynamodb2/parsing/reserved_keywords.py | 29 + moto/dynamodb2/parsing/reserved_keywords.txt | 573 ++++++++++ moto/dynamodb2/parsing/tokens.py | 17 +- moto/dynamodb2/responses.py | 5 - tests/test_dynamodb2/test_dynamodb.py | 68 ++ .../test_dynamodb_expressions.py | 395 +++++++ .../test_dynamodb_table_with_range_key.py | 16 +- 10 files changed, 2317 insertions(+), 13 deletions(-) create mode 100644 moto/dynamodb2/parsing/ast_nodes.py create mode 100644 moto/dynamodb2/parsing/expressions.py create mode 100644 moto/dynamodb2/parsing/reserved_keywords.py create mode 100644 moto/dynamodb2/parsing/reserved_keywords.txt create mode 100644 tests/test_dynamodb2/test_dynamodb_expressions.py diff --git a/moto/dynamodb2/models/__init__.py b/moto/dynamodb2/models/__init__.py index 29713d211..1f448f288 100644 --- a/moto/dynamodb2/models/__init__.py +++ b/moto/dynamodb2/models/__init__.py @@ -14,10 +14,11 @@ from moto.core import BaseBackend, BaseModel from moto.core.utils import unix_time from moto.core.exceptions import JsonRESTError from moto.dynamodb2.comparisons import get_filter_expression -from moto.dynamodb2.comparisons import get_expected -from moto.dynamodb2.exceptions import InvalidIndexNameError, ItemSizeTooLarge +from moto.dynamodb2.comparisons import get_expected, get_comparison_func +from moto.dynamodb2.exceptions import InvalidIndexNameError, ItemSizeTooLarge, InvalidUpdateExpression from moto.dynamodb2.models.utilities import bytesize, attribute_is_list from moto.dynamodb2.models.dynamo_type import DynamoType +from moto.dynamodb2.parsing.expressions import UpdateExpressionParser class DynamoJsonEncoder(json.JSONEncoder): @@ -1197,6 +1198,13 @@ class DynamoDBBackend(BaseBackend): ): table = self.get_table(table_name) + # Support spaces between operators in an update expression + # E.g. `a = b + c` -> `a=b+c` + if update_expression: + # Parse expression to get validation errors + UpdateExpressionParser.make(update_expression) + update_expression = re.sub(r"\s*([=\+-])\s*", "\\1", update_expression) + if all([table.hash_key_attr in key, table.range_key_attr in key]): # Covers cases where table has hash and range keys, ``key`` param # will be a dict diff --git a/moto/dynamodb2/parsing/ast_nodes.py b/moto/dynamodb2/parsing/ast_nodes.py new file mode 100644 index 000000000..78c7b6b2b --- /dev/null +++ b/moto/dynamodb2/parsing/ast_nodes.py @@ -0,0 +1,205 @@ +import abc +import six + + +@six.add_metaclass(abc.ABCMeta) +class Node: + def __init__(self, children=None): + self.type = self.__class__.__name__ + assert children is None or isinstance(children, list) + self.children = children + self.parent = None + + if isinstance(children, list): + for child in children: + if isinstance(child, Node): + child.set_parent(self) + + def set_parent(self, parent_node): + self.parent = parent_node + + +class LeafNode(Node): + """A LeafNode is a Node where none of the children are Nodes themselves.""" + + def __init__(self, children=None): + super(LeafNode, self).__init__(children) + + +@six.add_metaclass(abc.ABCMeta) +class Expression(Node): + """ + Abstract Syntax Tree representing the expression + + For the Grammar start here and jump down into the classes at the righ-hand side to look further. Nodes marked with + a star are abstract and won't appear in the final AST. + + Expression* => UpdateExpression + Expression* => ConditionExpression + """ + + +class UpdateExpression(Expression): + """ + UpdateExpression => UpdateExpressionClause* + UpdateExpression => UpdateExpressionClause* UpdateExpression + """ + + +@six.add_metaclass(abc.ABCMeta) +class UpdateExpressionClause(UpdateExpression): + """ + UpdateExpressionClause* => UpdateExpressionSetClause + UpdateExpressionClause* => UpdateExpressionRemoveClause + UpdateExpressionClause* => UpdateExpressionAddClause + UpdateExpressionClause* => UpdateExpressionDeleteClause + """ + + +class UpdateExpressionSetClause(UpdateExpressionClause): + """ + UpdateExpressionSetClause => SET SetActions + """ + + +class UpdateExpressionSetActions(UpdateExpressionClause): + """ + UpdateExpressionSetClause => SET SetActions + + SetActions => SetAction + SetActions => SetAction , SetActions + + """ + + +class UpdateExpressionSetAction(UpdateExpressionClause): + """ + SetAction => Path = Value + """ + + +class UpdateExpressionRemoveActions(UpdateExpressionClause): + """ + UpdateExpressionSetClause => REMOVE RemoveActions + + RemoveActions => RemoveAction + RemoveActions => RemoveAction , RemoveActions + """ + + +class UpdateExpressionRemoveAction(UpdateExpressionClause): + """ + RemoveAction => Path + """ + + +class UpdateExpressionAddActions(UpdateExpressionClause): + """ + UpdateExpressionAddClause => ADD RemoveActions + + AddActions => AddAction + AddActions => AddAction , AddActions + """ + + +class UpdateExpressionAddAction(UpdateExpressionClause): + """ + AddAction => Path Value + """ + + +class UpdateExpressionDeleteActions(UpdateExpressionClause): + """ + UpdateExpressionDeleteClause => DELETE RemoveActions + + DeleteActions => DeleteAction + DeleteActions => DeleteAction , DeleteActions + """ + + +class UpdateExpressionDeleteAction(UpdateExpressionClause): + """ + DeleteAction => Path Value + """ + + +class UpdateExpressionPath(UpdateExpressionClause): + pass + + +class UpdateExpressionValue(UpdateExpressionClause): + """ + Value => Operand + Value => Operand + Value + Value => Operand - Value + """ + + +class UpdateExpressionGroupedValue(UpdateExpressionClause): + """ + GroupedValue => ( Value ) + """ + + +class UpdateExpressionRemoveClause(UpdateExpressionClause): + """ + UpdateExpressionRemoveClause => REMOVE RemoveActions + """ + + +class UpdateExpressionAddClause(UpdateExpressionClause): + """ + UpdateExpressionAddClause => ADD AddActions + """ + + +class UpdateExpressionDeleteClause(UpdateExpressionClause): + """ + UpdateExpressionDeleteClause => DELETE DeleteActions + """ + + +class ExpressionPathDescender(Node): + """Node identifying descender into nested structure (.) in expression""" + + +class ExpressionSelector(LeafNode): + """Node identifying selector [selection_index] in expresion""" + + def __init__(self, selection_index): + super(ExpressionSelector, self).__init__(children=[selection_index]) + + +class ExpressionAttribute(LeafNode): + """An attribute identifier as used in the DDB item""" + + def __init__(self, attribute): + super(ExpressionAttribute, self).__init__(children=[attribute]) + + +class ExpressionAttributeName(LeafNode): + """An ExpressionAttributeName is an alias for an attribute identifier""" + + def __init__(self, attribute_name): + super(ExpressionAttributeName, self).__init__(children=[attribute_name]) + + +class ExpressionAttributeValue(LeafNode): + """An ExpressionAttributeValue is an alias for an value""" + + def __init__(self, value): + super(ExpressionAttributeValue, self).__init__(children=[value]) + + +class ExpressionValueOperator(LeafNode): + """An ExpressionValueOperator is an operation that works on 2 values""" + + def __init__(self, value): + super(ExpressionValueOperator, self).__init__(children=[value]) + + +class UpdateExpressionFunction(Node): + """ + A Node representing a function of an Update Expression. The first child is the function name the others are the + arguments. + """ diff --git a/moto/dynamodb2/parsing/expressions.py b/moto/dynamodb2/parsing/expressions.py new file mode 100644 index 000000000..e418bb47e --- /dev/null +++ b/moto/dynamodb2/parsing/expressions.py @@ -0,0 +1,1010 @@ +import logging +from abc import abstractmethod +import abc +import six +from collections import deque + +from moto.dynamodb2.parsing.ast_nodes import ( + UpdateExpression, + UpdateExpressionSetClause, + UpdateExpressionSetActions, + UpdateExpressionSetAction, + UpdateExpressionRemoveActions, + UpdateExpressionRemoveAction, + UpdateExpressionPath, + UpdateExpressionValue, + UpdateExpressionGroupedValue, + UpdateExpressionRemoveClause, + ExpressionPathDescender, + ExpressionSelector, + ExpressionAttribute, + ExpressionAttributeName, + ExpressionAttributeValue, + ExpressionValueOperator, + UpdateExpressionFunction, + UpdateExpressionAddClause, + UpdateExpressionAddActions, + UpdateExpressionAddAction, + UpdateExpressionDeleteAction, + UpdateExpressionDeleteActions, + UpdateExpressionDeleteClause, +) +from moto.dynamodb2.exceptions import InvalidTokenException +from moto.dynamodb2.parsing.tokens import Token, ExpressionTokenizer + + +class NestableExpressionParserMixin(object): + """ + For nodes that can be nested in themselves (recursive). Take for example UpdateExpression's grammar: + + UpdateExpression => UpdateExpressionClause* + UpdateExpression => UpdateExpressionClause* UpdateExpression + + If we consider it of structure + NestableExpression => TargetClause* + NestableExpression => TargetClause* NestableExpression + + This pattern comes back multiple times. This Mixin adds re-usability for that type of pattern. + + This approach is taken since it allows to remain the ordering of the Nodes as how the corresponding tokens where + in the originating expression. + """ + + def __init__(self, *args, **kwargs): + self.target_clauses = deque() + + def _parse_target_clause(self, factory_class): + """ + + Args: + factory_class: The factory for the target clause e.g. UpdateExpressionSetClauseParser + + Returns: + + """ + logging.debug( + "Move token pos {pos} to continue parsing with specific factory class {fc}".format( + pos=self.token_pos, fc=factory_class.__class__.__name__ + ) + ) + # noinspection PyProtectedMember + ast, token_pos = factory_class(**self._initializer_args())._parse_with_pos() + self.target_clauses.append(ast) + logging.debug( + "Continue where previous parsing ended {token_pos}".format( + token_pos=token_pos + ) + ) + self.token_pos = token_pos + + @abstractmethod + def _initializer_args(self): + """ + Get the arguments of the initializer. This is implemented by the calling class. See ExpressionParser for an + example. + + Returns: + dict: A dictionary of the initializer arguments + """ + + @classmethod + @abstractmethod + def _nestable_class(cls): + """ + Get the class of the Node that will be created that would be nested. For the example in the docstring this would + be UpdateExpression + + Returns: + class: The class of the Nodes that will be created. + """ + + def _create_node(self): + """ + target_clauses has the nodes in order of encountering. Go through them backwards and build the tree bottom up. + + This way left-deep-descending traversal will process nodes in order. + + Continuing the example of an UpdateExpression: + For example SET a=3 REMOVE b + UpdateExpression + / \ + SET a=3 UpdateExpression + | + REMOVE b + + self.target_clauses looks like: ( SET a=3 >> REMOVE b ) + Returns: + moto.dynamodb2.ast_nodes.Node: Node of an AST representing the Expression as produced by the factory. + """ + assert len(self.target_clauses) > 0, "No nodes for {cn}".format( + cn=self.__class__.__name__ + ) + target_node = self._nestable_class()(children=[self.target_clauses.pop()]) + while len(self.target_clauses) > 0: + target_node = self._nestable_class()( + children=[self.target_clauses.pop(), target_node] + ) + return target_node + + +@six.add_metaclass(abc.ABCMeta) +class ExpressionParser: + """Abstract class""" + + def __init__(self, expression_token_list, token_pos=0): + """ + + Args: + expression_token_list: + token_pos(int): Location where parsing is + """ + self.token_list = expression_token_list + self.token_pos = token_pos + + def _initializer_args(self): + return {"expression_token_list": self.token_list, "token_pos": self.token_pos} + + @abstractmethod + def _parse(self): + """ + Start parsing the token_list from token_pos for the factory type. + + Returns: + moto.dynamodb2.ast_nodes.Node: AST which is root node of resulting abstract syntax tree + """ + + @classmethod + def is_possible_start(cls, token): + return token is not None and cls._is_possible_start(token) + + @classmethod + @abstractmethod + def _is_possible_start(cls, token): + """ + + Args: + token(moto.dynamodb2.tokens.Token): + + Returns: + bool: True if token is a possible start for entries processed by `cls` + """ + + def _parse_with_pos(self): + """ + Start parsing the token_list from token_pos for the factory type and also return the resulting token_pos. + + Returns: + (ast, token_pos): tuple of AST which is root node of resulting abstract syntax tree and token_pos is the + position in the tokenlist. + """ + return self._parse(), self.token_pos + + def parse(self): + return self._parse() + + def get_next_token_type(self): + """ + Get the type of the next token to be processed + + Returns: + str: Token type or None if no more next token + """ + try: + return self.get_next_token().type + except AttributeError: + return None + + def get_next_token(self): + """ + Get the next token to be processed + + Returns: + moto.dynamodb2.tokens.Token: or None if no more next token + """ + try: + return self.token_list[self.token_pos] + except IndexError: + return None + + def get_next_token_value(self): + """ + Get the value of the next token to be processed + + Returns: + str: value or None if no more next token + """ + try: + return self.get_next_token().value + except AttributeError: + return None + + def is_at_end(self): + """Return boolean indicating whether we are at end of the parsing""" + return self.token_pos == len(self.token_list) + + def is_at_start(self): + """Return boolean indicating whether we are at start of the parsing""" + return self.token_pos == 0 + + def get_last_token_value(self): + """Get the last token that was correctly parsed or return empty string""" + if self.token_pos > 0: + return self.token_list[self.token_pos - 1].value + else: + return "" + + def get_last_token_type(self): + """Get the last token type that was correctly parsed or return None""" + if self.token_pos > 0: + return self.token_list[self.token_pos - 1].type + else: + return None + + def get_2nd_last_token_value_if_last_was_whitespace(self): + """Get the 2nd last token that was correctly parsed if last one was whitespace or return empty string""" + if self.token_pos > 1 and self.get_last_token_type() == Token.WHITESPACE: + return self.token_list[self.token_pos - 2].value + else: + return "" + + def get_following_token_value(self): + """Get the token value after the one that is being parsed or empty string if non existent.""" + try: + return self.token_list[self.token_pos + 1].value + except IndexError: + return "" + + def get_following_token_type(self): + """Get the token type after the one that is being parsed or None if non existent.""" + try: + return self.token_list[self.token_pos + 1].type + except IndexError: + return None + + def get_2nd_following_token_value_if_following_was_whitespace(self): + """Get the 2nd following token that was correctly parsed if 1st one was whitespace or return empty string""" + if self.get_following_token_type() == Token.WHITESPACE: + try: + return self.token_list[self.token_pos + 2].value + except IndexError: + return "" + else: + return "" + + def skip_white_space(self): + try: + while self.get_next_token_type() == Token.WHITESPACE: + self.token_pos += 1 + except IndexError: + assert self.token_pos > 0, "We should always have positive indexes" + logging.debug("We are out of range so end is reached") + + def process_token_of_type(self, token_type): + """ + Maker sure the next token is of type `token_type` if not raise unexpected token + Args: + token_type: A token type + + Returns: + str: The value if the token is of type `token_type` + """ + if self.get_next_token_type() == token_type: + token_value = self.get_next_token_value() + self.goto_next_significant_token() + return token_value + else: + self.raise_unexpected_token() + + def goto_next_significant_token(self): + """Continue past current token and skip all whitespaces""" + self.token_pos += 1 + self.skip_white_space() + + def raise_unexpected_token(self): + if self.is_at_end(): + problematic_token = "" + problematic_token_in_near = "" + else: + problematic_token_in_near = problematic_token = self.get_next_token_value() + + near = "".join( + [ + self.get_2nd_last_token_value_if_last_was_whitespace(), + self.get_last_token_value(), + problematic_token_in_near, + self.get_following_token_value(), + self.get_2nd_following_token_value_if_following_was_whitespace(), + ] + ) + + raise InvalidTokenException(problematic_token, near) + + +class NestableBinExpressionParser(ExpressionParser): + """ + For nodes that can be nested in themselves (recursive) but with an operation. Take for example + UpdateExpressionValue's grammar: + + Value => Operand* + Value => Operand* + Value + Value => Operand* - Value + + If we consider it of structure + NestableBinExpression => TargetClause* + NestableBinExpression => TargetClause* BinOp NestableBinExpression + + This pattern comes back multiple times. This Mixin adds re-usability for that type of pattern. + + This approach is taken since it allows to remain the ordering of the Nodes as how the corresponding tokens where + in the originating expression. + """ + + def __init__(self, *args, **kwargs): + super(NestableBinExpressionParser, self).__init__(*args, **kwargs) + self.target_nodes = deque() + + def _parse_target_clause(self, factory_class): + """ + + Args: + factory_class: The factory for the target clause e.g. UpdateExpressionSetClauseParser + + Returns: + + """ + # noinspection PyProtectedMember + ast, self.token_pos = factory_class( + **self._initializer_args() + )._parse_with_pos() + self.target_nodes.append(ast) + logging.debug( + "Continue where previous parsing ended {token_pos}".format( + token_pos=self.token_pos + ) + ) + + def _parse(self): + self._parse_target_clause(self._operand_factory_class()) + while self._binop_factory_class().is_possible_start(self.get_next_token()): + self._parse_target_clause(self._binop_factory_class()) + if self._operand_factory_class().is_possible_start(self.get_next_token()): + self._parse_target_clause(self._operand_factory_class()) + else: + self.raise_unexpected_token() + + @abstractmethod + def _operand_factory_class(self): + """ + Get the Parser class of the Operands for the Binary operations/actions. + + Returns: + class: + """ + + @abstractmethod + def _binop_factory_class(self): + """ + Get a factory that gets the possible binary operation. + + Returns: + class: A class extending ExpressionParser + """ + + def _create_node(self): + """ + target_clauses has the nodes in order of encountering. Go through them forward and build the tree bottom up. + For simplicity docstring will use Operand Node rather than the specific node + + This way left-deep-descending traversal will process nodes in order. + + Continuing the example of an UpdateExpressionValue: + For example value => a + :val - :val2 + UpdateExpressionValue + / | \ + UpdateExpressionValue BinOp Operand + / | | | | + UpdateExpressionValue BinOp Operand - :val2 + / | | + Operand + :val + | + a + + self.target_nodes looks like: ( a >> + >> :val >> - >> :val2 ) + Returns: + moto.dynamodb2.ast_nodes.Node: Node of an AST representing the Expression as produced by the factory. + """ + if len(self.target_nodes) == 1: + return UpdateExpressionValue(children=[self.target_nodes.popleft()]) + else: + target_node = UpdateExpressionValue( + children=[ + self.target_nodes.popleft(), + self.target_nodes.popleft(), + self.target_nodes.popleft(), + ] + ) + while len(self.target_nodes) >= 2: + target_node = UpdateExpressionValue( + children=[ + target_node, + self.target_nodes.popleft(), + self.target_nodes.popleft(), + ] + ) + assert len(self.target_nodes) == 0 + return target_node + + +class UpdateExpressionParser(ExpressionParser, NestableExpressionParserMixin): + """ + Parser to create update expressions + """ + + @classmethod + def _sub_factories(cls): + return [ + UpdateExpressionSetClauseParser, + UpdateExpressionAddClauseParser, + UpdateExpressionDeleteClauseParser, + UpdateExpressionRemoveClauseParser, + ] + + @classmethod + def _is_possible_start(cls, token): + pass + + def __init__(self, *args, **kwargs): + super(UpdateExpressionParser, self).__init__(*args, **kwargs) + NestableExpressionParserMixin.__init__(self) + + @classmethod + def _nestable_class(cls): + return UpdateExpression + + def _parse_expression_clause(self, factory_class): + return self._parse_target_clause(factory_class) + + def _parse_by_a_subfactory(self): + for sub_factory in self._sub_factories(): + if sub_factory.is_possible_start(self.get_next_token()): + self._parse_expression_clause(sub_factory) + return True + return False + + def _parse(self): + """ + Update Expression is the top-most node therefore it is expected to end up at the end of the expression. + """ + while True: + self.skip_white_space() + if self.is_at_end(): + logging.debug("End reached") + break + elif self._parse_by_a_subfactory(): + continue + else: + self.raise_unexpected_token() + + return self._create_node(), self.token_pos + + @classmethod + def make(cls, expression_str): + token_list = ExpressionTokenizer.make_list(expression_str) + return cls(token_list).parse() + + +class UpdateExpressionSetClauseParser(ExpressionParser): + """ + UpdateExpressionSetClause => SET SetActions + """ + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE and token.value.upper() == "SET" + + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionSetActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionSetClause(children=[ast]) + + +class UpdateExpressionActionsParser(ExpressionParser, NestableExpressionParserMixin): + """ + UpdateExpressionSetActions + """ + + def __init__(self, *args, **kwargs): + super(UpdateExpressionActionsParser, self).__init__(*args, **kwargs) + NestableExpressionParserMixin.__init__(self) + + @classmethod + def _is_possible_start(cls, token): + raise RuntimeError( + "{class_name} cannot be identified by the next token.".format( + class_name=cls._nestable_class().__name__ + ) + ) + + @classmethod + @abstractmethod + def _nestable_class(cls): + return UpdateExpressionSetActions + + @classmethod + @abstractmethod + def _nested_expression_parser_class(cls): + """Returns the parser for the query part that creates the nested nodes""" + + def _parse(self): + """ + UpdateExpressionSetActions is inside the expression so it can be followed by others. Process SetActions one by + one until no more SetAction. + """ + self.skip_white_space() + + while self._nested_expression_parser_class().is_possible_start( + self.get_next_token() + ): + self._parse_target_clause(self._nested_expression_parser_class()) + self.skip_white_space() + if self.get_next_token_type() == Token.COMMA: + self.goto_next_significant_token() + else: + break + + if len(self.target_clauses) == 0: + logging.debug( + "Didn't encounter a single {nc} in {nepc}.".format( + nc=self._nestable_class().__name__, + nepc=self._nested_expression_parser_class().__name__, + ) + ) + self.raise_unexpected_token() + + return self._create_node() + + +class UpdateExpressionSetActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionSetActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionSetActions + + +class UpdateExpressionSetActionParser(ExpressionParser): + """ + SetAction => Path = Value + + So we create an UpdateExpressionSetAction Node that has 2 children. Left child Path and right child Value. + """ + + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + """ + UpdateExpressionSetActionParser only gets called when expecting a SetAction. So we should be aggressive on + raising invalid Tokens. We can thus do the following: + 1) Process path + 2) skip whitespace if there are any + 3) Process equal-sign token + 4) skip whitespace if there are any + 3) Process value + + """ + path, self.token_pos = UpdateExpressionPathParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + self.process_token_of_type(Token.EQUAL_SIGN) + self.skip_white_space() + value, self.token_pos = UpdateExpressionValueParser( + **self._initializer_args() + )._parse_with_pos() + return UpdateExpressionSetAction(children=[path, value]) + + +class UpdateExpressionPathParser(ExpressionParser): + """ + Paths are selectors within items to specify a part within an Item. DynamoDB does not impose much restrictions on the + data it stores but it does store more strict restrictions on how they are represented in UpdateExpression's. + + """ + + def __init__(self, *args, **kwargs): + super(UpdateExpressionPathParser, self).__init__(*args, **kwargs) + self.path_nodes = [] + + @classmethod + def _is_possible_start(cls, token): + """ + Args: + token(Token): the token to be checked + + Returns: + bool: Whether the token could be the start of an UpdateExpressionPath + """ + if token.type == Token.ATTRIBUTE_NAME: + return True + elif token.type == Token.ATTRIBUTE and token.value.upper() != "REMOVE": + """We have to make sure remove is not passed""" + return True + return False + + def _parse(self): + return self.process_path() + + def process_path(self): + self.parse_path() + return UpdateExpressionPath(children=self.path_nodes) + + def parse_path(self): + """ + A path is comprised of: + - Attribute: the name of an attribute as how it is stored which has no special characters + - ATTRIBUTE_NAME: A placeholder that has no special characters except leading # to refer to attributes that + have a name that is not allowed in an UpdateExpression) + - DOT's: These are used to decent in a nested structure. When a DOT is in a path expression it is never part + of an attribute name but always means to descent into a MAP. We will call each descend a patch + chain + - SELECTORs: E.g.: [1] These are used to select an element in ordered datatypes like a list. + + Whitespaces can be between all these elements that build a path. For SELECTORs it is also allowed to have + whitespaces between brackets and numbers but the number cannot be split up with spaces + + Attributes and attribute_names must be separated with DOT's. + Returns: + UpdateExpressionPath: + """ + self.parse_path_chain() + while self.is_next_token_start_of_patch_chain(): + self.process_dot() + self.parse_path_chain() + + def is_next_token_start_of_patch_chain(self): + return self.get_next_token_type() == Token.DOT + + def process_dot(self): + self.path_nodes.append(ExpressionPathDescender()) + self.goto_next_significant_token() + + def parse_path_chain(self): + self.process_attribute_identifying_token() + self.skip_white_space() + while self.is_next_token_start_of_selector(): + self.process_selector() + self.skip_white_space() + + def process_attribute_identifying_token(self): + if self.get_next_token_type() == Token.ATTRIBUTE: + self.path_nodes.append(ExpressionAttribute(self.get_next_token_value())) + elif self.get_next_token_type() == Token.ATTRIBUTE_NAME: + self.path_nodes.append(ExpressionAttributeName(self.get_next_token_value())) + else: + self.raise_unexpected_token() + + self.goto_next_significant_token() + + def is_next_token_start_of_selector(self): + return self.get_next_token_type() == Token.OPEN_SQUARE_BRACKET + + def process_selector(self): + """ + Process the selector is only called when a selector must be processed. So do the following actions: + - skip opening bracket + - skip optional spaces + - read numeric literal + - skip optional spaces + - pass closing bracket + """ + self.process_token_of_type(Token.OPEN_SQUARE_BRACKET) + selector_value = self.process_token_of_type(Token.NUMBER) + self.process_token_of_type(Token.CLOSE_SQUARE_BRACKET) + self.path_nodes.append(ExpressionSelector(selector_value)) + + +class UpdateExpressionValueParser(NestableBinExpressionParser): + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionOperandParser.is_possible_start(token) + + def _operand_factory_class(self): + return UpdateExpressionOperandParser + + def _binop_factory_class(self): + return UpdateExpressionValueOperatorParser + + +class UpdateExpressionGroupedValueParser(ExpressionParser): + """ + A grouped value is an Update Expression value clause that is surrounded by round brackets. Each Operand can be + a grouped value by itself. + """ + + def _parse(self): + self.process_token_of_type(Token.OPEN_ROUND_BRACKET) + value, self.token_pos = UpdateExpressionValueParser( + **self._initializer_args() + )._parse_with_pos() + self.process_token_of_type(Token.CLOSE_ROUND_BRACKET) + return UpdateExpressionGroupedValue(children=value) + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.OPEN_ROUND_BRACKET + + +class UpdateExpressionValueOperatorParser(ExpressionParser): + OPERATION_TOKENS = [Token.PLUS_SIGN, Token.MINUS_SIGN] + + @classmethod + def _is_possible_start(cls, token): + return token.type in cls.OPERATION_TOKENS + + def _parse(self): + operation_value = self.get_next_token_value() + assert operation_value in self.OPERATION_TOKENS + self.goto_next_significant_token() + return ExpressionValueOperator(operation_value) + + +class UpdateExpressionOperandParser(ExpressionParser): + """ + Grammar + Operand* => AttributeValue + Operand* => UpdateExpressionFunction + Operand* => Path + Operand* => GroupedValue + """ + + @classmethod + def _sub_factories(cls): + return [ + UpdateExpressionAttributeValueParser, + UpdateExpressionFunctionParser, + UpdateExpressionPathParser, + UpdateExpressionGroupedValueParser, + ] + + @classmethod + def _is_possible_start(cls, token): + return any(parser.is_possible_start(token) for parser in cls._sub_factories()) + + def _parse(self): + for factory in self._sub_factories(): + if factory.is_possible_start(self.get_next_token()): + node, self.token_pos = factory( + **self._initializer_args() + )._parse_with_pos() + return node + self.raise_unexpected_token() + + +class UpdateExpressionAttributeValueParser(ExpressionParser): + def _parse(self): + attr_value = ExpressionAttributeValue( + self.process_token_of_type(Token.ATTRIBUTE_VALUE) + ) + return attr_value + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE_VALUE + + +class UpdateExpressionFunctionParser(ExpressionParser): + """ + A helper to process a function of an Update Expression + """ + + # TODO(pbbouwel): Function names are supposedly case sensitive according to doc add tests + # Map function to the factories for its elements + FUNCTIONS = { + "if_not_exists": [UpdateExpressionPathParser, UpdateExpressionValueParser], + "list_append": [UpdateExpressionOperandParser, UpdateExpressionOperandParser], + } + + @classmethod + def _is_possible_start(cls, token): + """ + Check whether a token is supposed to be a function + Args: + token(Token): the token to check + + Returns: + bool: True if token is the start of a function. + """ + if token.type == Token.ATTRIBUTE: + return token.value in cls.FUNCTIONS.keys() + else: + return False + + def _parse(self): + function_name = self.get_next_token_value() + self.goto_next_significant_token() + self.process_token_of_type(Token.OPEN_ROUND_BRACKET) + function_elements = [function_name] + function_arguments = self.FUNCTIONS[function_name] + for i, func_elem_factory in enumerate(function_arguments): + func_elem, self.token_pos = func_elem_factory( + **self._initializer_args() + )._parse_with_pos() + function_elements.append(func_elem) + if i + 1 < len(function_arguments): + self.skip_white_space() + self.process_token_of_type(Token.COMMA) + self.process_token_of_type(Token.CLOSE_ROUND_BRACKET) + return UpdateExpressionFunction(children=function_elements) + + +class UpdateExpressionRemoveClauseParser(ExpressionParser): + """ + UpdateExpressionRemoveClause => REMOVE RemoveActions + """ + + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionRemoveActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionRemoveClause(children=[ast]) + + @classmethod + def _is_possible_start(cls, token): + """REMOVE is not a keyword""" + return token.type == Token.ATTRIBUTE and token.value.upper() == "REMOVE" + + +class UpdateExpressionRemoveActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionRemoveActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionRemoveActions + + +class UpdateExpressionRemoveActionParser(ExpressionParser): + """ + RemoveAction => Path = Value + + So we create an UpdateExpressionSetAction Node that has 2 children. Left child Path and right child Value. + """ + + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + """ + UpdateExpressionRemoveActionParser only gets called when expecting a RemoveAction. So we should be aggressive on + raising invalid Tokens. We can thus do the following: + 1) Process path + 2) skip whitespace if there are any + + """ + path, self.token_pos = UpdateExpressionPathParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + return UpdateExpressionRemoveAction(children=[path]) + + +class UpdateExpressionAddClauseParser(ExpressionParser): + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionAddActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionAddClause(children=[ast]) + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE and token.value.upper() == "ADD" + + +class UpdateExpressionAddActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionAddActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionAddActions + + +@six.add_metaclass(abc.ABCMeta) +class UpdateExpressionPathValueParser(ExpressionParser): + def _parse_path_and_value(self): + """ + UpdateExpressionAddActionParser only gets called when expecting an AddAction. So we should be aggressive on + raising invalid Tokens. We can thus do the following: + 1) Process path + 2) skip whitespace if there are any + 3) Process a value + 4) skip whitespace if there are any + + Returns: + [path, value]: A list containing the Path node and the AttributeValue nodes + """ + path, self.token_pos = UpdateExpressionPathParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + value, self.token_pos = UpdateExpressionAttributeValueParser( + **self._initializer_args() + )._parse_with_pos() + self.skip_white_space() + return [path, value] + + +class UpdateExpressionAddActionParser(UpdateExpressionPathValueParser): + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + return UpdateExpressionAddAction(children=self._parse_path_and_value()) + + +class UpdateExpressionDeleteClauseParser(ExpressionParser): + def _parse(self): + assert self.is_possible_start(self.get_next_token()) + self.goto_next_significant_token() + ast, self.token_pos = UpdateExpressionDeleteActionsParser( + **self._initializer_args() + )._parse_with_pos() + # noinspection PyProtectedMember + return UpdateExpressionDeleteClause(children=[ast]) + + @classmethod + def _is_possible_start(cls, token): + return token.type == Token.ATTRIBUTE and token.value.upper() == "DELETE" + + +class UpdateExpressionDeleteActionsParser(UpdateExpressionActionsParser): + """ + UpdateExpressionSetActions + """ + + @classmethod + def _nested_expression_parser_class(cls): + return UpdateExpressionDeleteActionParser + + @classmethod + def _nestable_class(cls): + return UpdateExpressionDeleteActions + + +class UpdateExpressionDeleteActionParser(UpdateExpressionPathValueParser): + @classmethod + def _is_possible_start(cls, token): + return UpdateExpressionPathParser.is_possible_start(token) + + def _parse(self): + return UpdateExpressionDeleteAction(children=self._parse_path_and_value()) diff --git a/moto/dynamodb2/parsing/reserved_keywords.py b/moto/dynamodb2/parsing/reserved_keywords.py new file mode 100644 index 000000000..d82b16e98 --- /dev/null +++ b/moto/dynamodb2/parsing/reserved_keywords.py @@ -0,0 +1,29 @@ +class ReservedKeywords(list): + """ + DynamoDB has an extensive list of keywords. Keywords are considered when validating the expression Tree. + Not earlier since an update expression like "SET path = VALUE 1" fails with: + 'Invalid UpdateExpression: Syntax error; token: "1", near: "VALUE 1"' + """ + + KEYWORDS = None + + @classmethod + def get_reserved_keywords(cls): + if cls.KEYWORDS is None: + cls.KEYWORDS = cls._get_reserved_keywords() + return cls.KEYWORDS + + @classmethod + def _get_reserved_keywords(cls): + """ + Get a list of reserved keywords of DynamoDB + """ + try: + import importlib.resources as pkg_resources + except ImportError: + import importlib_resources as pkg_resources + + reserved_keywords = pkg_resources.read_text( + "moto.dynamodb2.parsing", "reserved_keywords.txt" + ) + return reserved_keywords.split() diff --git a/moto/dynamodb2/parsing/reserved_keywords.txt b/moto/dynamodb2/parsing/reserved_keywords.txt new file mode 100644 index 000000000..7c0106127 --- /dev/null +++ b/moto/dynamodb2/parsing/reserved_keywords.txt @@ -0,0 +1,573 @@ +ABORT +ABSOLUTE +ACTION +ADD +AFTER +AGENT +AGGREGATE +ALL +ALLOCATE +ALTER +ANALYZE +AND +ANY +ARCHIVE +ARE +ARRAY +AS +ASC +ASCII +ASENSITIVE +ASSERTION +ASYMMETRIC +AT +ATOMIC +ATTACH +ATTRIBUTE +AUTH +AUTHORIZATION +AUTHORIZE +AUTO +AVG +BACK +BACKUP +BASE +BATCH +BEFORE +BEGIN +BETWEEN +BIGINT +BINARY +BIT +BLOB +BLOCK +BOOLEAN +BOTH +BREADTH +BUCKET +BULK +BY +BYTE +CALL +CALLED +CALLING +CAPACITY +CASCADE +CASCADED +CASE +CAST +CATALOG +CHAR +CHARACTER +CHECK +CLASS +CLOB +CLOSE +CLUSTER +CLUSTERED +CLUSTERING +CLUSTERS +COALESCE +COLLATE +COLLATION +COLLECTION +COLUMN +COLUMNS +COMBINE +COMMENT +COMMIT +COMPACT +COMPILE +COMPRESS +CONDITION +CONFLICT +CONNECT +CONNECTION +CONSISTENCY +CONSISTENT +CONSTRAINT +CONSTRAINTS +CONSTRUCTOR +CONSUMED +CONTINUE +CONVERT +COPY +CORRESPONDING +COUNT +COUNTER +CREATE +CROSS +CUBE +CURRENT +CURSOR +CYCLE +DATA +DATABASE +DATE +DATETIME +DAY +DEALLOCATE +DEC +DECIMAL +DECLARE +DEFAULT +DEFERRABLE +DEFERRED +DEFINE +DEFINED +DEFINITION +DELETE +DELIMITED +DEPTH +DEREF +DESC +DESCRIBE +DESCRIPTOR +DETACH +DETERMINISTIC +DIAGNOSTICS +DIRECTORIES +DISABLE +DISCONNECT +DISTINCT +DISTRIBUTE +DO +DOMAIN +DOUBLE +DROP +DUMP +DURATION +DYNAMIC +EACH +ELEMENT +ELSE +ELSEIF +EMPTY +ENABLE +END +EQUAL +EQUALS +ERROR +ESCAPE +ESCAPED +EVAL +EVALUATE +EXCEEDED +EXCEPT +EXCEPTION +EXCEPTIONS +EXCLUSIVE +EXEC +EXECUTE +EXISTS +EXIT +EXPLAIN +EXPLODE +EXPORT +EXPRESSION +EXTENDED +EXTERNAL +EXTRACT +FAIL +FALSE +FAMILY +FETCH +FIELDS +FILE +FILTER +FILTERING +FINAL +FINISH +FIRST +FIXED +FLATTERN +FLOAT +FOR +FORCE +FOREIGN +FORMAT +FORWARD +FOUND +FREE +FROM +FULL +FUNCTION +FUNCTIONS +GENERAL +GENERATE +GET +GLOB +GLOBAL +GO +GOTO +GRANT +GREATER +GROUP +GROUPING +HANDLER +HASH +HAVE +HAVING +HEAP +HIDDEN +HOLD +HOUR +IDENTIFIED +IDENTITY +IF +IGNORE +IMMEDIATE +IMPORT +IN +INCLUDING +INCLUSIVE +INCREMENT +INCREMENTAL +INDEX +INDEXED +INDEXES +INDICATOR +INFINITE +INITIALLY +INLINE +INNER +INNTER +INOUT +INPUT +INSENSITIVE +INSERT +INSTEAD +INT +INTEGER +INTERSECT +INTERVAL +INTO +INVALIDATE +IS +ISOLATION +ITEM +ITEMS +ITERATE +JOIN +KEY +KEYS +LAG +LANGUAGE +LARGE +LAST +LATERAL +LEAD +LEADING +LEAVE +LEFT +LENGTH +LESS +LEVEL +LIKE +LIMIT +LIMITED +LINES +LIST +LOAD +LOCAL +LOCALTIME +LOCALTIMESTAMP +LOCATION +LOCATOR +LOCK +LOCKS +LOG +LOGED +LONG +LOOP +LOWER +MAP +MATCH +MATERIALIZED +MAX +MAXLEN +MEMBER +MERGE +METHOD +METRICS +MIN +MINUS +MINUTE +MISSING +MOD +MODE +MODIFIES +MODIFY +MODULE +MONTH +MULTI +MULTISET +NAME +NAMES +NATIONAL +NATURAL +NCHAR +NCLOB +NEW +NEXT +NO +NONE +NOT +NULL +NULLIF +NUMBER +NUMERIC +OBJECT +OF +OFFLINE +OFFSET +OLD +ON +ONLINE +ONLY +OPAQUE +OPEN +OPERATOR +OPTION +OR +ORDER +ORDINALITY +OTHER +OTHERS +OUT +OUTER +OUTPUT +OVER +OVERLAPS +OVERRIDE +OWNER +PAD +PARALLEL +PARAMETER +PARAMETERS +PARTIAL +PARTITION +PARTITIONED +PARTITIONS +PATH +PERCENT +PERCENTILE +PERMISSION +PERMISSIONS +PIPE +PIPELINED +PLAN +POOL +POSITION +PRECISION +PREPARE +PRESERVE +PRIMARY +PRIOR +PRIVATE +PRIVILEGES +PROCEDURE +PROCESSED +PROJECT +PROJECTION +PROPERTY +PROVISIONING +PUBLIC +PUT +QUERY +QUIT +QUORUM +RAISE +RANDOM +RANGE +RANK +RAW +READ +READS +REAL +REBUILD +RECORD +RECURSIVE +REDUCE +REF +REFERENCE +REFERENCES +REFERENCING +REGEXP +REGION +REINDEX +RELATIVE +RELEASE +REMAINDER +RENAME +REPEAT +REPLACE +REQUEST +RESET +RESIGNAL +RESOURCE +RESPONSE +RESTORE +RESTRICT +RESULT +RETURN +RETURNING +RETURNS +REVERSE +REVOKE +RIGHT +ROLE +ROLES +ROLLBACK +ROLLUP +ROUTINE +ROW +ROWS +RULE +RULES +SAMPLE +SATISFIES +SAVE +SAVEPOINT +SCAN +SCHEMA +SCOPE +SCROLL +SEARCH +SECOND +SECTION +SEGMENT +SEGMENTS +SELECT +SELF +SEMI +SENSITIVE +SEPARATE +SEQUENCE +SERIALIZABLE +SESSION +SET +SETS +SHARD +SHARE +SHARED +SHORT +SHOW +SIGNAL +SIMILAR +SIZE +SKEWED +SMALLINT +SNAPSHOT +SOME +SOURCE +SPACE +SPACES +SPARSE +SPECIFIC +SPECIFICTYPE +SPLIT +SQL +SQLCODE +SQLERROR +SQLEXCEPTION +SQLSTATE +SQLWARNING +START +STATE +STATIC +STATUS +STORAGE +STORE +STORED +STREAM +STRING +STRUCT +STYLE +SUB +SUBMULTISET +SUBPARTITION +SUBSTRING +SUBTYPE +SUM +SUPER +SYMMETRIC +SYNONYM +SYSTEM +TABLE +TABLESAMPLE +TEMP +TEMPORARY +TERMINATED +TEXT +THAN +THEN +THROUGHPUT +TIME +TIMESTAMP +TIMEZONE +TINYINT +TO +TOKEN +TOTAL +TOUCH +TRAILING +TRANSACTION +TRANSFORM +TRANSLATE +TRANSLATION +TREAT +TRIGGER +TRIM +TRUE +TRUNCATE +TTL +TUPLE +TYPE +UNDER +UNDO +UNION +UNIQUE +UNIT +UNKNOWN +UNLOGGED +UNNEST +UNPROCESSED +UNSIGNED +UNTIL +UPDATE +UPPER +URL +USAGE +USE +USER +USERS +USING +UUID +VACUUM +VALUE +VALUED +VALUES +VARCHAR +VARIABLE +VARIANCE +VARINT +VARYING +VIEW +VIEWS +VIRTUAL +VOID +WAIT +WHEN +WHENEVER +WHERE +WHILE +WINDOW +WITH +WITHIN +WITHOUT +WORK +WRAPPED +WRITE +YEAR +ZONE diff --git a/moto/dynamodb2/parsing/tokens.py b/moto/dynamodb2/parsing/tokens.py index 07d65ae64..4fbb7883a 100644 --- a/moto/dynamodb2/parsing/tokens.py +++ b/moto/dynamodb2/parsing/tokens.py @@ -1,4 +1,5 @@ import re +import sys from moto.dynamodb2.exceptions import ( InvalidTokenException, @@ -147,9 +148,17 @@ class ExpressionTokenizer(object): self.token_list = [] self.staged_characters = "" + @classmethod + def is_py2(cls): + return sys.version_info[0] == 2 + @classmethod def make_list(cls, input_expression_str): - assert isinstance(input_expression_str, str) + if cls.is_py2(): + pass + else: + assert isinstance(input_expression_str, str) + return ExpressionTokenizer(input_expression_str)._make_list() def add_token(self, token_type, token_value): @@ -159,6 +168,10 @@ class ExpressionTokenizer(object): self.add_token(token_type, self.staged_characters) self.staged_characters = "" + @classmethod + def is_numeric(cls, input_str): + return re.compile("[0-9]+").match(input_str) is not None + def process_staged_characters(self): if len(self.staged_characters) == 0: return @@ -167,7 +180,7 @@ class ExpressionTokenizer(object): self.add_token_from_stage(Token.ATTRIBUTE_NAME) else: raise InvalidExpressionAttributeNameKey(self.staged_characters) - elif self.staged_characters.isnumeric(): + elif self.is_numeric(self.staged_characters): self.add_token_from_stage(Token.NUMBER) elif self.is_expression_attribute(self.staged_characters): self.add_token_from_stage(Token.ATTRIBUTE) diff --git a/moto/dynamodb2/responses.py b/moto/dynamodb2/responses.py index d21d1d756..a5aeeac70 100644 --- a/moto/dynamodb2/responses.py +++ b/moto/dynamodb2/responses.py @@ -748,11 +748,6 @@ class DynamoHandler(BaseResponse): expression_attribute_names = self.body.get("ExpressionAttributeNames", {}) expression_attribute_values = self.body.get("ExpressionAttributeValues", {}) - # Support spaces between operators in an update expression - # E.g. `a = b + c` -> `a=b+c` - if update_expression: - update_expression = re.sub(r"\s*([=\+-])\s*", "\\1", update_expression) - try: item = self.dynamodb_backend.update_item( name, diff --git a/tests/test_dynamodb2/test_dynamodb.py b/tests/test_dynamodb2/test_dynamodb.py index bec24c966..09401d562 100644 --- a/tests/test_dynamodb2/test_dynamodb.py +++ b/tests/test_dynamodb2/test_dynamodb.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals, print_function +import re from decimal import Decimal import six @@ -4177,3 +4178,70 @@ def test_gsi_verify_negative_number_order(): [float(item["gsiK1SortKey"]) for item in resp["Items"]].should.equal( [-0.7, -0.6, 0.7] ) + + +def assert_raise_syntax_error(client_error, token, near): + """ + Assert whether a client_error is as expected Syntax error. Syntax error looks like: `syntax_error_template` + + Args: + client_error(ClientError): The ClientError exception that was raised + token(str): The token that ws unexpected + near(str): The part in the expression that shows where the error occurs it generally has the preceding token the + optional separation and the problematic token. + """ + syntax_error_template = ( + 'Invalid UpdateExpression: Syntax error; token: "{token}", near: "{near}"' + ) + expected_syntax_error = syntax_error_template.format(token=token, near=near) + assert client_error.response["Error"]["Code"] == "ValidationException" + assert expected_syntax_error == client_error.response["Error"]["Message"] + + +@mock_dynamodb2 +def test_update_expression_with_numeric_literal_instead_of_value(): + """ + DynamoDB requires literals to be passed in as values. If they are put literally in the expression a token error will + be raised + """ + dynamodb = boto3.client("dynamodb", region_name="eu-west-1") + + dynamodb.create_table( + TableName="moto-test", + KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}], + AttributeDefinitions=[{"AttributeName": "id", "AttributeType": "S"}], + ) + + try: + dynamodb.update_item( + TableName="moto-test", + Key={"id": {"S": "1"}}, + UpdateExpression="SET MyStr = myNum + 1", + ) + assert False, "Validation exception not thrown" + except dynamodb.exceptions.ClientError as e: + assert_raise_syntax_error(e, "1", "+ 1") + + +@mock_dynamodb2 +def test_update_expression_with_multiple_set_clauses_must_be_comma_separated(): + """ + An UpdateExpression can have multiple set clauses but if they are passed in without the separating comma. + """ + dynamodb = boto3.client("dynamodb", region_name="eu-west-1") + + dynamodb.create_table( + TableName="moto-test", + KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}], + AttributeDefinitions=[{"AttributeName": "id", "AttributeType": "S"}], + ) + + try: + dynamodb.update_item( + TableName="moto-test", + Key={"id": {"S": "1"}}, + UpdateExpression="SET MyStr = myNum Mystr2 myNum2", + ) + assert False, "Validation exception not thrown" + except dynamodb.exceptions.ClientError as e: + assert_raise_syntax_error(e, "Mystr2", "myNum Mystr2 myNum2") diff --git a/tests/test_dynamodb2/test_dynamodb_expressions.py b/tests/test_dynamodb2/test_dynamodb_expressions.py new file mode 100644 index 000000000..1066231af --- /dev/null +++ b/tests/test_dynamodb2/test_dynamodb_expressions.py @@ -0,0 +1,395 @@ +from moto.dynamodb2.exceptions import InvalidTokenException +from moto.dynamodb2.parsing.expressions import UpdateExpressionParser +from moto.dynamodb2.parsing.reserved_keywords import ReservedKeywords + + +def test_get_reserved_keywords(): + reserved_keywords = ReservedKeywords.get_reserved_keywords() + assert "SET" in reserved_keywords + assert "DELETE" in reserved_keywords + assert "ADD" in reserved_keywords + # REMOVE is not part of the list of reserved keywords. + assert "REMOVE" not in reserved_keywords + + +def test_update_expression_numeric_literal_in_expression(): + set_action = "SET attrName = 3" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "3" + assert te.near == "= 3" + + +def test_expression_tokenizer_multi_number_numeric_literal_in_expression(): + set_action = "SET attrName = 34" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "34" + assert te.near == "= 34" + + +def test_expression_tokenizer_numeric_literal_unclosed_square_bracket(): + set_action = "SET MyStr[ 3" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == "3" + + +def test_expression_tokenizer_wrong_closing_bracket_with_space(): + set_action = "SET MyStr[3 )" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "3 )" + + +def test_expression_tokenizer_wrong_closing_bracket(): + set_action = "SET MyStr[3)" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "3)" + + +def test_expression_tokenizer_only_numeric_literal_for_set(): + set_action = "SET 2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "2" + assert te.near == "SET 2" + + +def test_expression_tokenizer_only_numeric_literal(): + set_action = "2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "2" + assert te.near == "2" + + +def test_expression_tokenizer_set_closing_round_bracket(): + set_action = "SET )" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "SET )" + + +def test_expression_tokenizer_set_closing_followed_by_numeric_literal(): + set_action = "SET ) 3" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "SET ) 3" + + +def test_expression_tokenizer_numeric_literal_unclosed_square_bracket_trailing_space(): + set_action = "SET MyStr[ 3 " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == "3 " + + +def test_expression_tokenizer_unbalanced_round_brackets_only_opening(): + set_action = "SET MyStr = (:_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == ":_val" + + +def test_expression_tokenizer_unbalanced_round_brackets_only_opening_trailing_space(): + set_action = "SET MyStr = (:_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == ":_val " + + +def test_expression_tokenizer_unbalanced_square_brackets_only_opening(): + set_action = "SET MyStr = [:_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "[" + assert te.near == "= [:_val" + + +def test_expression_tokenizer_unbalanced_square_brackets_only_opening_trailing_spaces(): + set_action = "SET MyStr = [:_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "[" + assert te.near == "= [:_val" + + +def test_expression_tokenizer_unbalanced_round_brackets_multiple_opening(): + set_action = "SET MyStr = (:_val + (:val2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "" + assert te.near == ":val2" + + +def test_expression_tokenizer_unbalanced_round_brackets_only_closing(): + set_action = "SET MyStr = ):_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "= ):_val" + + +def test_expression_tokenizer_unbalanced_square_brackets_only_closing(): + set_action = "SET MyStr = ]:_val" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "]" + assert te.near == "= ]:_val" + + +def test_expression_tokenizer_unbalanced_round_brackets_only_closing_followed_by_other_parts(): + set_action = "SET MyStr = ):_val + :val2" + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == ")" + assert te.near == "= ):_val" + + +def test_update_expression_starts_with_keyword_reset_followed_by_identifier(): + update_expression = "RESET NonExistent" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == "RESET NonExistent" + + +def test_update_expression_starts_with_keyword_reset_followed_by_identifier_and_value(): + update_expression = "RESET NonExistent value" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == "RESET NonExistent" + + +def test_update_expression_starts_with_leading_spaces_and_keyword_reset_followed_by_identifier_and_value(): + update_expression = " RESET NonExistent value" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == " RESET NonExistent" + + +def test_update_expression_with_only_keyword_reset(): + update_expression = "RESET" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "RESET" + assert te.near == "RESET" + + +def test_update_nested_expression_with_selector_just_should_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a[0].b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_update_nested_expression_with_selector_and_spaces_should_only_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a [ 2 ]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_update_nested_expression_with_double_selector_and_spaces_should_only_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a [2][ 3 ]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_update_nested_expression_should_only_fail_parsing_at_numeric_literal_value(): + update_expression = "SET a . b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_nested_selectors_in_update_expression_should_fail_at_nesting(): + update_expression = "SET a [ [2] ]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "[" + assert te.near == "[ [2" + + +def test_update_expression_number_in_selector_cannot_be_splite(): + update_expression = "SET a [2 1]. b = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "1" + assert te.near == "2 1]" + + +def test_update_expression_cannot_have_successive_attributes(): + update_expression = "SET #a a = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "a" + assert te.near == "#a a =" + + +def test_update_expression_path_with_both_attribute_and_attribute_name_should_only_fail_at_numeric_value(): + update_expression = "SET #a.a = 5" + try: + UpdateExpressionParser.make(update_expression) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "5" + assert te.near == "= 5" + + +def test_expression_tokenizer_2_same_operators_back_to_back(): + set_action = "SET MyStr = NoExist + + :_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "+" + assert te.near == "+ + :_val" + + +def test_expression_tokenizer_2_different_operators_back_to_back(): + set_action = "SET MyStr = NoExist + - :_val " + try: + UpdateExpressionParser.make(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "-" + assert te.near == "+ - :_val" + + +def test_update_expression_remove_does_not_allow_operations(): + remove_action = "REMOVE NoExist + " + try: + UpdateExpressionParser.make(remove_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "+" + assert te.near == "NoExist + " + + +def test_update_expression_add_does_not_allow_attribute_after_path(): + """value here is not really a value since a value starts with a colon (:)""" + add_expr = "ADD attr val foobar" + try: + UpdateExpressionParser.make(add_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "val" + assert te.near == "attr val foobar" + + +def test_update_expression_add_does_not_allow_attribute_foobar_after_value(): + add_expr = "ADD attr :val foobar" + try: + UpdateExpressionParser.make(add_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "foobar" + assert te.near == ":val foobar" + + +def test_update_expression_delete_does_not_allow_attribute_after_path(): + """value here is not really a value since a value starts with a colon (:)""" + delete_expr = "DELETE attr val" + try: + UpdateExpressionParser.make(delete_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "val" + assert te.near == "attr val" + + +def test_update_expression_delete_does_not_allow_attribute_foobar_after_value(): + delete_expr = "DELETE attr :val foobar" + try: + UpdateExpressionParser.make(delete_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "foobar" + assert te.near == ":val foobar" + + +def test_update_expression_parsing_is_not_keyword_aware(): + """path and VALUE are keywords. Yet a token error will be thrown for the numeric literal 1.""" + delete_expr = "SET path = VALUE 1" + try: + UpdateExpressionParser.make(delete_expr) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "1" + assert te.near == "VALUE 1" diff --git a/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py b/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py index c433a3a31..1aa2175c1 100644 --- a/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py +++ b/tests/test_dynamodb2/test_dynamodb_table_with_range_key.py @@ -1254,14 +1254,22 @@ def test_update_item_with_expression(): item_key = {"forum_name": "the-key", "subject": "123"} - table.update_item(Key=item_key, UpdateExpression="SET field=2") + table.update_item( + Key=item_key, + UpdateExpression="SET field = :field_value", + ExpressionAttributeValues={":field_value": 2}, + ) dict(table.get_item(Key=item_key)["Item"]).should.equal( - {"field": "2", "forum_name": "the-key", "subject": "123"} + {"field": Decimal("2"), "forum_name": "the-key", "subject": "123"} ) - table.update_item(Key=item_key, UpdateExpression="SET field = 3") + table.update_item( + Key=item_key, + UpdateExpression="SET field = :field_value", + ExpressionAttributeValues={":field_value": 3}, + ) dict(table.get_item(Key=item_key)["Item"]).should.equal( - {"field": "3", "forum_name": "the-key", "subject": "123"} + {"field": Decimal("3"), "forum_name": "the-key", "subject": "123"} )