diff --git a/moto/dynamodb2/exceptions.py b/moto/dynamodb2/exceptions.py index 1f3b5f974..4c5dfd447 100644 --- a/moto/dynamodb2/exceptions.py +++ b/moto/dynamodb2/exceptions.py @@ -2,9 +2,59 @@ class InvalidIndexNameError(ValueError): pass -class InvalidUpdateExpression(ValueError): - pass +class MockValidationException(ValueError): + def __init__(self, message): + self.exception_msg = message -class ItemSizeTooLarge(Exception): - message = "Item size has exceeded the maximum allowed size" +class InvalidUpdateExpression(MockValidationException): + invalid_update_expression_msg = ( + "The document path provided in the update expression is invalid for update" + ) + + def __init__(self): + super(InvalidUpdateExpression, self).__init__( + self.invalid_update_expression_msg + ) + + +class UpdateExprSyntaxError(MockValidationException): + update_expr_syntax_error_msg = ( + "Invalid UpdateExpression: Syntax error; {error_detail}" + ) + + def __init__(self, error_detail): + self.error_detail = error_detail + super(UpdateExprSyntaxError, self).__init__( + self.update_expr_syntax_error_msg.format(error_detail=error_detail) + ) + + +class InvalidTokenException(UpdateExprSyntaxError): + token_detail_msg = 'token: "{token}", near: "{near}"' + + def __init__(self, token, near): + self.token = token + self.near = near + super(InvalidTokenException, self).__init__( + self.token_detail_msg.format(token=token, near=near) + ) + + +class InvalidExpressionAttributeNameKey(MockValidationException): + invalid_expr_attr_name_msg = ( + 'ExpressionAttributeNames contains invalid key: Syntax error; key: "{key}"' + ) + + def __init__(self, key): + self.key = key + super(InvalidExpressionAttributeNameKey, self).__init__( + self.invalid_expr_attr_name_msg.format(key=key) + ) + + +class ItemSizeTooLarge(MockValidationException): + item_size_too_large_msg = "Item size has exceeded the maximum allowed size" + + def __init__(self): + super(ItemSizeTooLarge, self).__init__(self.item_size_too_large_msg) diff --git a/moto/dynamodb2/parsing/__init__.py b/moto/dynamodb2/parsing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/moto/dynamodb2/parsing/tokens.py b/moto/dynamodb2/parsing/tokens.py new file mode 100644 index 000000000..07d65ae64 --- /dev/null +++ b/moto/dynamodb2/parsing/tokens.py @@ -0,0 +1,210 @@ +import re + +from moto.dynamodb2.exceptions import ( + InvalidTokenException, + InvalidExpressionAttributeNameKey, +) + + +class Token(object): + _TOKEN_INSTANCE = None + MINUS_SIGN = "-" + PLUS_SIGN = "+" + SPACE_SIGN = " " + EQUAL_SIGN = "=" + OPEN_ROUND_BRACKET = "(" + CLOSE_ROUND_BRACKET = ")" + COMMA = "," + SPACE = " " + DOT = "." + OPEN_SQUARE_BRACKET = "[" + CLOSE_SQUARE_BRACKET = "]" + + SPECIAL_CHARACTERS = [ + MINUS_SIGN, + PLUS_SIGN, + SPACE_SIGN, + EQUAL_SIGN, + OPEN_ROUND_BRACKET, + CLOSE_ROUND_BRACKET, + COMMA, + SPACE, + DOT, + OPEN_SQUARE_BRACKET, + CLOSE_SQUARE_BRACKET, + ] + + # Attribute: an identifier that is an attribute + ATTRIBUTE = 0 + # Place holder for attribute name + ATTRIBUTE_NAME = 1 + # Placeholder for attribute value starts with : + ATTRIBUTE_VALUE = 2 + # WhiteSpace shall be grouped together + WHITESPACE = 3 + # Placeholder for a number + NUMBER = 4 + + PLACEHOLDER_NAMES = { + ATTRIBUTE: "Attribute", + ATTRIBUTE_NAME: "AttributeName", + ATTRIBUTE_VALUE: "AttributeValue", + WHITESPACE: "Whitespace", + NUMBER: "Number", + } + + def __init__(self, token_type, value): + assert ( + token_type in self.SPECIAL_CHARACTERS + or token_type in self.PLACEHOLDER_NAMES + ) + self.type = token_type + self.value = value + + def __repr__(self): + if isinstance(self.type, int): + return 'Token("{tt}", "{tv}")'.format( + tt=self.PLACEHOLDER_NAMES[self.type], tv=self.value + ) + else: + return 'Token("{tt}", "{tv}")'.format(tt=self.type, tv=self.value) + + def __eq__(self, other): + return self.type == other.type and self.value == other.value + + +class ExpressionTokenizer(object): + """ + Takes a string and returns a list of tokens. While attribute names in DynamoDB must be between 1 and 255 characters + long there are no other restrictions for attribute names. For expressions however there are additional rules. If an + attribute name does not adhere then it must be passed via an ExpressionAttributeName. This tokenizer is aware of the + rules of Expression attributes. + + We consider a Token as a tuple which has the tokenType + + From https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ExpressionAttributeNames.html + 1) If an attribute name begins with a number or contains a space, a special character, or a reserved word, you + must use an expression attribute name to replace that attribute's name in the expression. + => So spaces,+,- or other special characters do identify tokens in update expressions + + 2) When using a dot (.) in an attribute name you must use expression-attribute-names. A dot in an expression + will be interpreted as a separator in a document path + + 3) For a nested structure if you want to use expression_attribute_names you must specify one per part of the + path. Since for members of expression_attribute_names the . is part of the name + + """ + + @classmethod + def is_simple_token_character(cls, character): + return character.isalnum() or character in ("_", ":", "#") + + @classmethod + def is_possible_token_boundary(cls, character): + return ( + character in Token.SPECIAL_CHARACTERS + or not cls.is_simple_token_character(character) + ) + + @classmethod + def is_expression_attribute(cls, input_string): + return re.compile("^[a-zA-Z][a-zA-Z0-9_]*$").match(input_string) is not None + + @classmethod + def is_expression_attribute_name(cls, input_string): + """ + https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ExpressionAttributeNames.html + An expression attribute name must begin with a pound sign (#), and be followed by one or more alphanumeric + characters. + """ + return input_string.startswith("#") and cls.is_expression_attribute( + input_string[1:] + ) + + @classmethod + def is_expression_attribute_value(cls, input_string): + return re.compile("^:[a-zA-Z0-9_]*$").match(input_string) is not None + + def raise_unexpected_token(self): + """If during parsing an unexpected token is encountered""" + if len(self.token_list) == 0: + near = "" + else: + if len(self.token_list) == 1: + near = self.token_list[-1].value + else: + if self.token_list[-1].type == Token.WHITESPACE: + # Last token was whitespace take 2nd last token value as well to help User orientate + near = self.token_list[-2].value + self.token_list[-1].value + else: + near = self.token_list[-1].value + + problematic_token = self.staged_characters[0] + raise InvalidTokenException(problematic_token, near + self.staged_characters) + + def __init__(self, input_expression_str): + self.input_expression_str = input_expression_str + self.token_list = [] + self.staged_characters = "" + + @classmethod + def make_list(cls, input_expression_str): + assert isinstance(input_expression_str, str) + return ExpressionTokenizer(input_expression_str)._make_list() + + def add_token(self, token_type, token_value): + self.token_list.append(Token(token_type, token_value)) + + def add_token_from_stage(self, token_type): + self.add_token(token_type, self.staged_characters) + self.staged_characters = "" + + def process_staged_characters(self): + if len(self.staged_characters) == 0: + return + if self.staged_characters.startswith("#"): + if self.is_expression_attribute_name(self.staged_characters): + self.add_token_from_stage(Token.ATTRIBUTE_NAME) + else: + raise InvalidExpressionAttributeNameKey(self.staged_characters) + elif self.staged_characters.isnumeric(): + self.add_token_from_stage(Token.NUMBER) + elif self.is_expression_attribute(self.staged_characters): + self.add_token_from_stage(Token.ATTRIBUTE) + elif self.is_expression_attribute_value(self.staged_characters): + self.add_token_from_stage(Token.ATTRIBUTE_VALUE) + else: + self.raise_unexpected_token() + + def _make_list(self): + """ + Just go through characters if a character is not a token boundary stage it for adding it as a grouped token + later if it is a tokenboundary process staged characters and then process the token boundary as well. + """ + for character in self.input_expression_str: + if not self.is_possible_token_boundary(character): + self.staged_characters += character + else: + self.process_staged_characters() + + if character == Token.SPACE: + if ( + len(self.token_list) > 0 + and self.token_list[-1].type == Token.WHITESPACE + ): + self.token_list[-1].value = ( + self.token_list[-1].value + character + ) + else: + self.add_token(Token.WHITESPACE, character) + elif character in Token.SPECIAL_CHARACTERS: + self.add_token(character, character) + elif not self.is_simple_token_character(character): + self.staged_characters += character + self.raise_unexpected_token() + else: + raise NotImplementedError( + "Encountered character which was not implemented : " + character + ) + self.process_staged_characters() + return self.token_list diff --git a/moto/dynamodb2/responses.py b/moto/dynamodb2/responses.py index 65484aa08..d21d1d756 100644 --- a/moto/dynamodb2/responses.py +++ b/moto/dynamodb2/responses.py @@ -9,7 +9,7 @@ import six from moto.core.responses import BaseResponse from moto.core.utils import camelcase_to_underscores, amzn_request_id -from .exceptions import InvalidIndexNameError, InvalidUpdateExpression, ItemSizeTooLarge +from .exceptions import InvalidIndexNameError, InvalidUpdateExpression, ItemSizeTooLarge, MockValidationException from moto.dynamodb2.models import dynamodb_backends, dynamo_json_dump @@ -298,7 +298,7 @@ class DynamoHandler(BaseResponse): ) except ItemSizeTooLarge: er = "com.amazonaws.dynamodb.v20111205#ValidationException" - return self.error(er, ItemSizeTooLarge.message) + return self.error(er, ItemSizeTooLarge.item_size_too_large_msg) except KeyError as ke: er = "com.amazonaws.dynamodb.v20111205#ValidationException" return self.error(er, ke.args[0]) @@ -764,15 +764,9 @@ class DynamoHandler(BaseResponse): expected, condition_expression, ) - except InvalidUpdateExpression: + except MockValidationException as mve: er = "com.amazonaws.dynamodb.v20111205#ValidationException" - return self.error( - er, - "The document path provided in the update expression is invalid for update", - ) - except ItemSizeTooLarge: - er = "com.amazonaws.dynamodb.v20111205#ValidationException" - return self.error(er, ItemSizeTooLarge.message) + return self.error(er, mve.exception_msg) except ValueError: er = "com.amazonaws.dynamodb.v20111205#ConditionalCheckFailedException" return self.error( diff --git a/tests/test_dynamodb2/test_dynamodb_expression_tokenizer.py b/tests/test_dynamodb2/test_dynamodb_expression_tokenizer.py new file mode 100644 index 000000000..3330d431e --- /dev/null +++ b/tests/test_dynamodb2/test_dynamodb_expression_tokenizer.py @@ -0,0 +1,259 @@ +from moto.dynamodb2.exceptions import ( + InvalidTokenException, + InvalidExpressionAttributeNameKey, +) +from moto.dynamodb2.parsing.tokens import ExpressionTokenizer, Token + + +def test_expression_tokenizer_single_set_action(): + set_action = "SET attrName = :attrValue" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attrValue"), + ] + + +def test_expression_tokenizer_single_set_action_leading_space(): + set_action = "Set attrName = :attrValue" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "Set"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attrValue"), + ] + + +def test_expression_tokenizer_single_set_action_attribute_name_leading_space(): + set_action = "SET #a = :attrValue" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_NAME, "#a"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attrValue"), + ] + + +def test_expression_tokenizer_single_set_action_trailing_space(): + set_action = "SET attrName = :attrValue " + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attrValue"), + Token(Token.WHITESPACE, " "), + ] + + +def test_expression_tokenizer_single_set_action_multi_spaces(): + set_action = "SET attrName = :attrValue " + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attrValue"), + Token(Token.WHITESPACE, " "), + ] + + +def test_expression_tokenizer_single_set_action_with_numbers_in_identifiers(): + set_action = "SET attrName3 = :attr3Value" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName3"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attr3Value"), + ] + + +def test_expression_tokenizer_single_set_action_with_underscore_in_identifier(): + set_action = "SET attr_Name = :attr_Value" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attr_Name"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attr_Value"), + ] + + +def test_expression_tokenizer_leading_underscore_in_attribute_name_expression(): + """Leading underscore is not allowed for an attribute name""" + set_action = "SET attrName = _idid" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "_" + assert te.near == "= _idid" + + +def test_expression_tokenizer_leading_underscore_in_attribute_value_expression(): + """Leading underscore is allowed in an attribute value""" + set_action = "SET attrName = :_attrValue" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":_attrValue"), + ] + + +def test_expression_tokenizer_single_set_action_nested_attribute(): + set_action = "SET attrName.elem = :attrValue" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attrName"), + Token(Token.DOT, "."), + Token(Token.ATTRIBUTE, "elem"), + Token(Token.WHITESPACE, " "), + Token(Token.EQUAL_SIGN, "="), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE_VALUE, ":attrValue"), + ] + + +def test_expression_tokenizer_list_index_with_sub_attribute(): + set_action = "SET itemmap.itemlist[1].foos=:Item" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "itemmap"), + Token(Token.DOT, "."), + Token(Token.ATTRIBUTE, "itemlist"), + Token(Token.OPEN_SQUARE_BRACKET, "["), + Token(Token.NUMBER, "1"), + Token(Token.CLOSE_SQUARE_BRACKET, "]"), + Token(Token.DOT, "."), + Token(Token.ATTRIBUTE, "foos"), + Token(Token.EQUAL_SIGN, "="), + Token(Token.ATTRIBUTE_VALUE, ":Item"), + ] + + +def test_expression_tokenizer_list_index_surrounded_with_whitespace(): + set_action = "SET itemlist[ 1 ]=:Item" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "itemlist"), + Token(Token.OPEN_SQUARE_BRACKET, "["), + Token(Token.WHITESPACE, " "), + Token(Token.NUMBER, "1"), + Token(Token.WHITESPACE, " "), + Token(Token.CLOSE_SQUARE_BRACKET, "]"), + Token(Token.EQUAL_SIGN, "="), + Token(Token.ATTRIBUTE_VALUE, ":Item"), + ] + + +def test_expression_tokenizer_single_set_action_attribute_name_invalid_key(): + """ + ExpressionAttributeNames contains invalid key: Syntax error; key: "#va#l2" + """ + set_action = "SET #va#l2 = 3" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidExpressionAttributeNameKey as e: + assert e.key == "#va#l2" + + +def test_expression_tokenizer_single_set_action_attribute_name_invalid_key_double_hash(): + """ + ExpressionAttributeNames contains invalid key: Syntax error; key: "#va#l" + """ + set_action = "SET #va#l = 3" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidExpressionAttributeNameKey as e: + assert e.key == "#va#l" + + +def test_expression_tokenizer_single_set_action_attribute_name_valid_key(): + set_action = "SET attr=#val2" + token_list = ExpressionTokenizer.make_list(set_action) + assert token_list == [ + Token(Token.ATTRIBUTE, "SET"), + Token(Token.WHITESPACE, " "), + Token(Token.ATTRIBUTE, "attr"), + Token(Token.EQUAL_SIGN, "="), + Token(Token.ATTRIBUTE_NAME, "#val2"), + ] + + +def test_expression_tokenizer_just_a_pipe(): + set_action = "|" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "|" + assert te.near == "|" + + +def test_expression_tokenizer_just_a_pipe_with_leading_white_spaces(): + set_action = " |" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "|" + assert te.near == " |" + + +def test_expression_tokenizer_just_a_pipe_for_set_expression(): + set_action = "SET|" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "|" + assert te.near == "SET|" + + +def test_expression_tokenizer_just_an_attribute_and_a_pipe_for_set_expression(): + set_action = "SET a|" + try: + ExpressionTokenizer.make_list(set_action) + assert False, "Exception not raised correctly" + except InvalidTokenException as te: + assert te.token == "|" + assert te.near == "a|"