From 5e88b5d1b49bc853f67969b2b7cc0b9bc346d9f9 Mon Sep 17 00:00:00 2001 From: Jack Danger Canty Date: Wed, 26 Apr 2017 23:40:28 -0700 Subject: [PATCH] MD5 calculation of SQS message attributes This implements the same MD5 hashing pattern as implemented in the Ruby and Java AWS SDKs Doesn't yet handle list types but if you're reading this you might be surprised how easy that is to add. Give it a shot and if you get stuck reach out to me for help. --- moto/sqs/models.py | 55 +++++++++++++++++++++++++++++++++++--- moto/sqs/responses.py | 18 +++++-------- tests/test_sqs/test_sqs.py | 38 ++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 18 deletions(-) diff --git a/moto/sqs/models.py b/moto/sqs/models.py index cedf03199..f8b7d91b1 100644 --- a/moto/sqs/models.py +++ b/moto/sqs/models.py @@ -1,7 +1,9 @@ from __future__ import unicode_literals +import base64 import hashlib import re +import struct from xml.sax.saxutils import escape import boto.sqs @@ -17,6 +19,8 @@ from .exceptions import ( DEFAULT_ACCOUNT_ID = 123456789012 DEFAULT_SENDER_ID = "AIDAIT2UOQQY3AUEKVGXU" +TRANSPORT_TYPE_ENCODINGS = {'String': b'\x01', 'Binary': b'\x02', 'Number': b'\x01'} + class Message(BaseModel): @@ -33,10 +37,53 @@ class Message(BaseModel): self.delayed_until = 0 @property - def md5(self): - body_md5 = hashlib.md5() - body_md5.update(self._body.encode('utf-8')) - return body_md5.hexdigest() + def body_md5(self): + md5 = hashlib.md5() + md5.update(self._body.encode('utf-8')) + return md5.hexdigest() + + @property + def attribute_md5(self): + """ + The MD5 of all attributes is calculated by first generating a + utf-8 string from each attribute and MD5-ing the concatenation + of them all. Each attribute is encoded with some bytes that + describe the length of each part and the type of attribute. + + Not yet implemented: + List types (https://github.com/aws/aws-sdk-java/blob/7844c64cf248aed889811bf2e871ad6b276a89ca/aws-java-sdk-sqs/src/main/java/com/amazonaws/services/sqs/MessageMD5ChecksumHandler.java#L58k) + """ + md5 = hashlib.md5() + for name in sorted(self.message_attributes.keys()): + attr = self.message_attributes[name] + data_type = attr['data_type'] + + encoded = ''.encode('utf-8') + # Each part of each attribute is encoded right after it's + # own length is packed into a 4-byte integer + # 'timestamp' -> b'\x00\x00\x00\t' + encoded += struct.pack("!I", len(name.encode('utf-8'))) + name.encode('utf-8') + # The datatype is additionally given a final byte + # representing which type it is + encoded += struct.pack("!I", len(data_type)).encode('utf-8') + data_type.encode('utf-8') + encoded += TRANSPORT_TYPE_ENCODINGS[data_type] + + if data_type == 'String' or data_type == 'Number': + value = attr['string_value'] + elif data_type == 'Binary': + value = base64.b64decode(attr['binary_value']) + else: + print("Moto hasn't implemented MD5 hashing for {} attributes".format(data_type)) + # The following should be enough of a clue to users that + # they are not, in fact, looking at a correct MD5 while + # also following the character and length constraints of + # MD5 so as not to break client softwre + return('deadbeefdeadbeefdeadbeefdeadbeef') + + encoded += struct.pack("!I", len(value.encode('utf-8'))) + value.encode('utf-8') + + md5.update(encoded) + return md5.hexdigest() @property def body(self): diff --git a/moto/sqs/responses.py b/moto/sqs/responses.py index 75602b1b7..53bbac6ef 100644 --- a/moto/sqs/responses.py +++ b/moto/sqs/responses.py @@ -337,11 +337,9 @@ SET_QUEUE_ATTRIBUTE_RESPONSE = """ SEND_MESSAGE_RESPONSE = """ - {{- message.md5 -}} + {{- message.body_md5 -}} - {% if message.message_attributes.items()|count > 0 %} - 324758f82d026ac6ec5b31a3b192d1e3 - {% endif %} + {{- message.attribute_md5 -}} {{- message.id -}} @@ -357,7 +355,7 @@ RECEIVE_MESSAGE_RESPONSE = """ {{ message.id }} {{ message.receipt_handle }} - {{ message.md5 }} + {{ message.body_md5 }} {{ message.body }} SenderId @@ -375,9 +373,7 @@ RECEIVE_MESSAGE_RESPONSE = """ ApproximateFirstReceiveTimestamp {{ message.approximate_first_receive_timestamp }} - {% if message.message_attributes.items()|count > 0 %} - 324758f82d026ac6ec5b31a3b192d1e3 - {% endif %} + {{- message.attribute_md5 -}} {% for name, value in message.message_attributes.items() %} {{ name }} @@ -405,10 +401,8 @@ SEND_MESSAGE_BATCH_RESPONSE = """ {{ message.user_id }} {{ message.id }} - {{ message.md5 }} - {% if message.message_attributes.items()|count > 0 %} - 324758f82d026ac6ec5b31a3b192d1e3 - {% endif %} + {{ message.body_md5 }} + {{- message.attribute_md5 -}} {% endfor %} diff --git a/tests/test_sqs/test_sqs.py b/tests/test_sqs/test_sqs.py index f179d9f85..987efa3d5 100644 --- a/tests/test_sqs/test_sqs.py +++ b/tests/test_sqs/test_sqs.py @@ -43,10 +43,44 @@ def test_get_inexistent_queue(): def test_message_send(): sqs = boto3.resource('sqs', region_name='us-east-1') queue = sqs.create_queue(QueueName="blah") - msg = queue.send_message(MessageBody="derp") - + msg = queue.send_message( + MessageBody="derp", + MessageAttributes={ + 'timestamp': { + 'StringValue': '1493147359900', + 'DataType': 'Number', + } + } + ) msg.get('MD5OfMessageBody').should.equal( '58fd9edd83341c29f1aebba81c31e257') + msg.get('MD5OfMessageAttributes').should.equal( + '235c5c510d26fb653d073faed50ae77c') + msg.get('ResponseMetadata', {}).get('RequestId').should.equal( + '27daac76-34dd-47df-bd01-1f6e873584a0') + msg.get('MessageId').should_not.contain(' \n') + + messages = queue.receive_messages() + messages.should.have.length_of(1) + + +@mock_sqs +def test_message_with_complex_attributes(): + sqs = boto3.resource('sqs', region_name='us-east-1') + queue = sqs.create_queue(QueueName="blah") + msg = queue.send_message( + MessageBody="derp", + MessageAttributes={ + 'ccc': {'StringValue': 'testjunk', 'DataType': 'String'}, + 'aaa': {'BinaryValue': b'\x02\x03\x04', 'DataType': 'Binary'}, + 'zzz': {'DataType': 'Number', 'StringValue': '0230.01'}, + 'öther_encodings': {'DataType': 'String', 'StringValue': 'T\xFCst'} + } + ) + msg.get('MD5OfMessageBody').should.equal( + '58fd9edd83341c29f1aebba81c31e257') + msg.get('MD5OfMessageAttributes').should.equal( + '8ae21a7957029ef04146b42aeaa18a22') msg.get('ResponseMetadata', {}).get('RequestId').should.equal( '27daac76-34dd-47df-bd01-1f6e873584a0') msg.get('MessageId').should_not.contain(' \n')