From 53cc3dd67a9733d734bb1f654dd4653f7bac789f Mon Sep 17 00:00:00 2001 From: Artem Date: Tue, 27 Oct 2020 14:13:47 +0200 Subject: [PATCH] Fix SQS md5 attribute hashing. (#3403) * Fix sqs md5 attribute hashing. * Fix test name. * Fix format. --- moto/sqs/models.py | 116 +++++++++++++++++++------------------ tests/test_sqs/test_sqs.py | 76 ++++++++++++++++++++---- 2 files changed, 124 insertions(+), 68 deletions(-) diff --git a/moto/sqs/models.py b/moto/sqs/models.py index 34e81be8a..1ab98e94c 100644 --- a/moto/sqs/models.py +++ b/moto/sqs/models.py @@ -54,6 +54,15 @@ TRANSPORT_TYPE_ENCODINGS = { "String.custom": b"\x01", } +STRING_TYPE_FIELD_INDEX = 1 +BINARY_TYPE_FIELD_INDEX = 2 +STRING_LIST_TYPE_FIELD_INDEX = 3 +BINARY_LIST_TYPE_FIELD_INDEX = 4 + +# Valid attribute name rules can found at +# https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-message-metadata.html +ATTRIBUTE_NAME_PATTERN = re.compile("^([a-z]|[A-Z]|[0-9]|[_.\\-])+$") + class Message(BaseModel): def __init__(self, message_id, body): @@ -78,70 +87,63 @@ class Message(BaseModel): @property def attribute_md5(self): - """ - The MD5 of all attributes is calculated by first generating a - utf-8 string from each attribute and MD5-ing the concatenation - of them all. Each attribute is encoded with some bytes that - describe the length of each part and the type of attribute. - - Not yet implemented: - List types (https://github.com/aws/aws-sdk-java/blob/7844c64cf248aed889811bf2e871ad6b276a89ca/aws-java-sdk-sqs/src/main/java/com/amazonaws/services/sqs/MessageMD5ChecksumHandler.java#L58k) - """ - - def utf8(str): - if isinstance(str, six.string_types): - return str.encode("utf-8") - return str md5 = hashlib.md5() - struct_format = "!I".encode("ascii") # ensure it's a bytestring - for name in sorted(self.message_attributes.keys()): - attr = self.message_attributes[name] - whole_data_type = attr.get("data_type") - if TRANSPORT_TYPE_ENCODINGS.get(whole_data_type): - data_type = whole_data_type - else: - data_type_parts = attr["data_type"].split(".") - data_type = data_type_parts[0] - if data_type not in ["String", "Binary", "Number", "String.custom"]: - raise MessageAttributesInvalid( - "The message attribute '{0}' has an invalid message attribute type, the set of supported type prefixes is Binary, Number, and String.".format( - name[0] - ) + for attrName in sorted(self.message_attributes.keys()): + self.validate_attribute_name(attrName) + attrValue = self.message_attributes[attrName] + # Encode name + self.update_binary_length_and_value(md5, self.utf8(attrName)) + # Encode type + self.update_binary_length_and_value(md5, self.utf8(attrValue["data_type"])) + + if attrValue.get("string_value"): + md5.update(bytearray([STRING_TYPE_FIELD_INDEX])) + self.update_binary_length_and_value( + md5, self.utf8(attrValue.get("string_value")) ) + elif attrValue.get("binary_value"): + md5.update(bytearray([BINARY_TYPE_FIELD_INDEX])) + decoded_binary_value = base64.b64decode(attrValue.get("binary_value")) + self.update_binary_length_and_value(md5, decoded_binary_value) + # string_list_value type is not implemented, reserved for the future use. + # See https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_MessageAttributeValue.html + elif len(attrValue["string_list_value"]) > 0: + md5.update(bytearray([STRING_LIST_TYPE_FIELD_INDEX])) + for strListMember in attrValue["string_list_value"]: + self.update_binary_length_and_value(md5, self.utf8(strListMember)) + # binary_list_value type is not implemented, reserved for the future use. + # See https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_MessageAttributeValue.html + elif len(attrValue["binary_list_value"]) > 0: + md5.update(bytearray([BINARY_LIST_TYPE_FIELD_INDEX])) + for strListMember in attrValue["binary_list_value"]: + decoded_binary_value = base64.b64decode(strListMember) + self.update_binary_length_and_value(md5, decoded_binary_value) - encoded = utf8("") - # Each part of each attribute is encoded right after it's - # own length is packed into a 4-byte integer - # 'timestamp' -> b'\x00\x00\x00\t' - encoded += struct.pack(struct_format, len(utf8(name))) + utf8(name) - # The datatype is additionally given a final byte - # representing which type it is - encoded += struct.pack(struct_format, len(data_type)) + utf8(data_type) - encoded += TRANSPORT_TYPE_ENCODINGS[data_type] - - if data_type in ["String", "Number", "String.custom"]: - value = attr["string_value"] - elif data_type == "Binary": - value = base64.b64decode(attr["binary_value"]) - else: - print( - "Moto hasn't implemented MD5 hashing for {} attributes".format( - data_type - ) - ) - # The following should be enough of a clue to users that - # they are not, in fact, looking at a correct MD5 while - # also following the character and length constraints of - # MD5 so as not to break client softwre - return "deadbeefdeadbeefdeadbeefdeadbeef" - - encoded += struct.pack(struct_format, len(utf8(value))) + utf8(value) - - md5.update(encoded) return md5.hexdigest() + @staticmethod + def update_binary_length_and_value(md5, value): + length_bytes = struct.pack("!I".encode("ascii"), len(value)) + md5.update(length_bytes) + md5.update(value) + + @staticmethod + def validate_attribute_name(name): + if not ATTRIBUTE_NAME_PATTERN.match(name): + raise MessageAttributesInvalid( + "The message attribute name '{0}' is invalid. " + "Attribute name can contain A-Z, a-z, 0-9, " + "underscore (_), hyphen (-), and period (.) characters.".format(name) + ) + + @staticmethod + def utf8(string): + if isinstance(string, six.string_types): + return string.encode("utf-8") + return string + @property def body(self): return escape(self._body) diff --git a/tests/test_sqs/test_sqs.py b/tests/test_sqs/test_sqs.py index 6305a163a..b974e04f6 100644 --- a/tests/test_sqs/test_sqs.py +++ b/tests/test_sqs/test_sqs.py @@ -256,11 +256,14 @@ def test_message_send_with_attributes(): msg = queue.send_message( MessageBody="derp", MessageAttributes={ - "timestamp": {"StringValue": "1493147359900", "DataType": "Number"} + "SOME_Valid.attribute-Name": { + "StringValue": "1493147359900", + "DataType": "Number", + } }, ) msg.get("MD5OfMessageBody").should.equal("58fd9edd83341c29f1aebba81c31e257") - msg.get("MD5OfMessageAttributes").should.equal("235c5c510d26fb653d073faed50ae77c") + msg.get("MD5OfMessageAttributes").should.equal("36655e7e9d7c0e8479fa3f3f42247ae7") msg.get("MessageId").should_not.contain(" \n") messages = queue.receive_messages() @@ -268,20 +271,71 @@ def test_message_send_with_attributes(): @mock_sqs -def test_message_with_complex_attributes(): +def test_message_with_invalid_attributes(): + sqs = boto3.resource("sqs", region_name="us-east-1") + queue = sqs.create_queue(QueueName="blah") + with assert_raises(ClientError) as e: + queue.send_message( + MessageBody="derp", + MessageAttributes={ + "öther_encodings": {"DataType": "String", "StringValue": "str"}, + }, + ) + ex = e.exception + ex.response["Error"]["Code"].should.equal("MessageAttributesInvalid") + ex.response["Error"]["Message"].should.equal( + "The message attribute name 'öther_encodings' is invalid. " + "Attribute name can contain A-Z, a-z, 0-9, underscore (_), hyphen (-), and period (.) characters." + ) + + +@mock_sqs +def test_message_with_string_attributes(): sqs = boto3.resource("sqs", region_name="us-east-1") queue = sqs.create_queue(QueueName="blah") msg = queue.send_message( MessageBody="derp", MessageAttributes={ - "ccc": {"StringValue": "testjunk", "DataType": "String"}, - "aaa": {"BinaryValue": b"\x02\x03\x04", "DataType": "Binary"}, - "zzz": {"DataType": "Number", "StringValue": "0230.01"}, - "öther_encodings": {"DataType": "String", "StringValue": "T\xFCst"}, + "id": { + "StringValue": "2018fc74-4f77-1a5a-1be0-c2d037d5052b", + "DataType": "String", + }, + "contentType": {"StringValue": "application/json", "DataType": "String"}, + "timestamp": { + "StringValue": "1602845432024", + "DataType": "Number.java.lang.Long", + }, }, ) msg.get("MD5OfMessageBody").should.equal("58fd9edd83341c29f1aebba81c31e257") - msg.get("MD5OfMessageAttributes").should.equal("8ae21a7957029ef04146b42aeaa18a22") + msg.get("MD5OfMessageAttributes").should.equal("b12289320bb6e494b18b645ef562b4a9") + msg.get("MessageId").should_not.contain(" \n") + + messages = queue.receive_messages() + messages.should.have.length_of(1) + + +@mock_sqs +def test_message_with_binary_attribute(): + sqs = boto3.resource("sqs", region_name="us-east-1") + queue = sqs.create_queue(QueueName="blah") + msg = queue.send_message( + MessageBody="derp", + MessageAttributes={ + "id": { + "StringValue": "453ae55e-f03b-21a6-a4b1-70c2e2e8fe71", + "DataType": "String", + }, + "mybin": {"BinaryValue": "kekchebukek", "DataType": "Binary"}, + "timestamp": { + "StringValue": "1603134247654", + "DataType": "Number.java.lang.Long", + }, + "contentType": {"StringValue": "application/json", "DataType": "String"}, + }, + ) + msg.get("MD5OfMessageBody").should.equal("58fd9edd83341c29f1aebba81c31e257") + msg.get("MD5OfMessageAttributes").should.equal("049075255ebc53fb95f7f9f3cedf3c50") msg.get("MessageId").should_not.contain(" \n") messages = queue.receive_messages() @@ -302,7 +356,7 @@ def test_message_with_attributes_have_labels(): }, ) msg.get("MD5OfMessageBody").should.equal("58fd9edd83341c29f1aebba81c31e257") - msg.get("MD5OfMessageAttributes").should.equal("235c5c510d26fb653d073faed50ae77c") + msg.get("MD5OfMessageAttributes").should.equal("2e2e4876d8e0bd6b8c2c8f556831c349") msg.get("MessageId").should_not.contain(" \n") messages = queue.receive_messages() @@ -657,10 +711,10 @@ def test_send_receive_message_with_attributes_with_labels(): message2.get("Body").should.equal(body_two) message1.get("MD5OfMessageAttributes").should.equal( - "235c5c510d26fb653d073faed50ae77c" + "2e2e4876d8e0bd6b8c2c8f556831c349" ) message2.get("MD5OfMessageAttributes").should.equal( - "994258b45346a2cc3f9cbb611aa7af30" + "cfa7c73063c6e2dbf9be34232a1978cf" ) response = queue.send_message(