From 5e88b5d1b49bc853f67969b2b7cc0b9bc346d9f9 Mon Sep 17 00:00:00 2001 From: Jack Danger Canty Date: Wed, 26 Apr 2017 23:40:28 -0700 Subject: [PATCH 1/3] MD5 calculation of SQS message attributes This implements the same MD5 hashing pattern as implemented in the Ruby and Java AWS SDKs Doesn't yet handle list types but if you're reading this you might be surprised how easy that is to add. Give it a shot and if you get stuck reach out to me for help. --- moto/sqs/models.py | 55 +++++++++++++++++++++++++++++++++++--- moto/sqs/responses.py | 18 +++++-------- tests/test_sqs/test_sqs.py | 38 ++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 18 deletions(-) diff --git a/moto/sqs/models.py b/moto/sqs/models.py index cedf03199..f8b7d91b1 100644 --- a/moto/sqs/models.py +++ b/moto/sqs/models.py @@ -1,7 +1,9 @@ from __future__ import unicode_literals +import base64 import hashlib import re +import struct from xml.sax.saxutils import escape import boto.sqs @@ -17,6 +19,8 @@ from .exceptions import ( DEFAULT_ACCOUNT_ID = 123456789012 DEFAULT_SENDER_ID = "AIDAIT2UOQQY3AUEKVGXU" +TRANSPORT_TYPE_ENCODINGS = {'String': b'\x01', 'Binary': b'\x02', 'Number': b'\x01'} + class Message(BaseModel): @@ -33,10 +37,53 @@ class Message(BaseModel): self.delayed_until = 0 @property - def md5(self): - body_md5 = hashlib.md5() - body_md5.update(self._body.encode('utf-8')) - return body_md5.hexdigest() + def body_md5(self): + md5 = hashlib.md5() + md5.update(self._body.encode('utf-8')) + return md5.hexdigest() + + @property + def attribute_md5(self): + """ + The MD5 of all attributes is calculated by first generating a + utf-8 string from each attribute and MD5-ing the concatenation + of them all. Each attribute is encoded with some bytes that + describe the length of each part and the type of attribute. + + Not yet implemented: + List types (https://github.com/aws/aws-sdk-java/blob/7844c64cf248aed889811bf2e871ad6b276a89ca/aws-java-sdk-sqs/src/main/java/com/amazonaws/services/sqs/MessageMD5ChecksumHandler.java#L58k) + """ + md5 = hashlib.md5() + for name in sorted(self.message_attributes.keys()): + attr = self.message_attributes[name] + data_type = attr['data_type'] + + encoded = ''.encode('utf-8') + # Each part of each attribute is encoded right after it's + # own length is packed into a 4-byte integer + # 'timestamp' -> b'\x00\x00\x00\t' + encoded += struct.pack("!I", len(name.encode('utf-8'))) + name.encode('utf-8') + # The datatype is additionally given a final byte + # representing which type it is + encoded += struct.pack("!I", len(data_type)).encode('utf-8') + data_type.encode('utf-8') + encoded += TRANSPORT_TYPE_ENCODINGS[data_type] + + if data_type == 'String' or data_type == 'Number': + value = attr['string_value'] + elif data_type == 'Binary': + value = base64.b64decode(attr['binary_value']) + else: + print("Moto hasn't implemented MD5 hashing for {} attributes".format(data_type)) + # The following should be enough of a clue to users that + # they are not, in fact, looking at a correct MD5 while + # also following the character and length constraints of + # MD5 so as not to break client softwre + return('deadbeefdeadbeefdeadbeefdeadbeef') + + encoded += struct.pack("!I", len(value.encode('utf-8'))) + value.encode('utf-8') + + md5.update(encoded) + return md5.hexdigest() @property def body(self): diff --git a/moto/sqs/responses.py b/moto/sqs/responses.py index 75602b1b7..53bbac6ef 100644 --- a/moto/sqs/responses.py +++ b/moto/sqs/responses.py @@ -337,11 +337,9 @@ SET_QUEUE_ATTRIBUTE_RESPONSE = """ SEND_MESSAGE_RESPONSE = """ - {{- message.md5 -}} + {{- message.body_md5 -}} - {% if message.message_attributes.items()|count > 0 %} - 324758f82d026ac6ec5b31a3b192d1e3 - {% endif %} + {{- message.attribute_md5 -}} {{- message.id -}} @@ -357,7 +355,7 @@ RECEIVE_MESSAGE_RESPONSE = """ {{ message.id }} {{ message.receipt_handle }} - {{ message.md5 }} + {{ message.body_md5 }} {{ message.body }} SenderId @@ -375,9 +373,7 @@ RECEIVE_MESSAGE_RESPONSE = """ ApproximateFirstReceiveTimestamp {{ message.approximate_first_receive_timestamp }} - {% if message.message_attributes.items()|count > 0 %} - 324758f82d026ac6ec5b31a3b192d1e3 - {% endif %} + {{- message.attribute_md5 -}} {% for name, value in message.message_attributes.items() %} {{ name }} @@ -405,10 +401,8 @@ SEND_MESSAGE_BATCH_RESPONSE = """ {{ message.user_id }} {{ message.id }} - {{ message.md5 }} - {% if message.message_attributes.items()|count > 0 %} - 324758f82d026ac6ec5b31a3b192d1e3 - {% endif %} + {{ message.body_md5 }} + {{- message.attribute_md5 -}} {% endfor %} diff --git a/tests/test_sqs/test_sqs.py b/tests/test_sqs/test_sqs.py index f179d9f85..987efa3d5 100644 --- a/tests/test_sqs/test_sqs.py +++ b/tests/test_sqs/test_sqs.py @@ -43,10 +43,44 @@ def test_get_inexistent_queue(): def test_message_send(): sqs = boto3.resource('sqs', region_name='us-east-1') queue = sqs.create_queue(QueueName="blah") - msg = queue.send_message(MessageBody="derp") - + msg = queue.send_message( + MessageBody="derp", + MessageAttributes={ + 'timestamp': { + 'StringValue': '1493147359900', + 'DataType': 'Number', + } + } + ) msg.get('MD5OfMessageBody').should.equal( '58fd9edd83341c29f1aebba81c31e257') + msg.get('MD5OfMessageAttributes').should.equal( + '235c5c510d26fb653d073faed50ae77c') + msg.get('ResponseMetadata', {}).get('RequestId').should.equal( + '27daac76-34dd-47df-bd01-1f6e873584a0') + msg.get('MessageId').should_not.contain(' \n') + + messages = queue.receive_messages() + messages.should.have.length_of(1) + + +@mock_sqs +def test_message_with_complex_attributes(): + sqs = boto3.resource('sqs', region_name='us-east-1') + queue = sqs.create_queue(QueueName="blah") + msg = queue.send_message( + MessageBody="derp", + MessageAttributes={ + 'ccc': {'StringValue': 'testjunk', 'DataType': 'String'}, + 'aaa': {'BinaryValue': b'\x02\x03\x04', 'DataType': 'Binary'}, + 'zzz': {'DataType': 'Number', 'StringValue': '0230.01'}, + 'öther_encodings': {'DataType': 'String', 'StringValue': 'T\xFCst'} + } + ) + msg.get('MD5OfMessageBody').should.equal( + '58fd9edd83341c29f1aebba81c31e257') + msg.get('MD5OfMessageAttributes').should.equal( + '8ae21a7957029ef04146b42aeaa18a22') msg.get('ResponseMetadata', {}).get('RequestId').should.equal( '27daac76-34dd-47df-bd01-1f6e873584a0') msg.get('MessageId').should_not.contain(' \n') From daba69914767f0b48fbf379cea44d12d21f2e635 Mon Sep 17 00:00:00 2001 From: Jack Danger Canty Date: Thu, 11 May 2017 07:06:42 -0700 Subject: [PATCH 2/3] binary values are sent as base64-encoded strings --- tests/test_sqs/test_sqs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_sqs/test_sqs.py b/tests/test_sqs/test_sqs.py index 987efa3d5..0e1149200 100644 --- a/tests/test_sqs/test_sqs.py +++ b/tests/test_sqs/test_sqs.py @@ -7,6 +7,7 @@ import botocore.exceptions from boto.exception import SQSError from boto.sqs.message import RawMessage, Message +import base64 import requests import sure # noqa import time @@ -233,7 +234,7 @@ def test_send_message_with_attributes(): message = queue.new_message(body) message_attributes = { 'test.attribute_name': {'data_type': 'String', 'string_value': 'attribute value'}, - 'test.binary_attribute': {'data_type': 'Binary', 'binary_value': 'binary value'}, + 'test.binary_attribute': {'data_type': 'Binary', 'binary_value': base64.b64encode('binary value')}, 'test.number_attribute': {'data_type': 'Number', 'string_value': 'string value'} } message.message_attributes = message_attributes From 6679def702922d19eeea5e9e0016311a868b58de Mon Sep 17 00:00:00 2001 From: Jack Danger Canty Date: Thu, 11 May 2017 09:28:19 -0700 Subject: [PATCH 3/3] Python 2/3 compat for MD5 of SQS attributes --- moto/sqs/models.py | 14 ++++++++++---- tests/test_sqs/test_sqs.py | 3 ++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/moto/sqs/models.py b/moto/sqs/models.py index f8b7d91b1..d2c538ecb 100644 --- a/moto/sqs/models.py +++ b/moto/sqs/models.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import base64 import hashlib import re +import six import struct from xml.sax.saxutils import escape @@ -53,24 +54,29 @@ class Message(BaseModel): Not yet implemented: List types (https://github.com/aws/aws-sdk-java/blob/7844c64cf248aed889811bf2e871ad6b276a89ca/aws-java-sdk-sqs/src/main/java/com/amazonaws/services/sqs/MessageMD5ChecksumHandler.java#L58k) """ + def utf8(str): + if isinstance(str, six.string_types): + return str.encode('utf-8') + return str md5 = hashlib.md5() for name in sorted(self.message_attributes.keys()): attr = self.message_attributes[name] data_type = attr['data_type'] - encoded = ''.encode('utf-8') + encoded = utf8('') # Each part of each attribute is encoded right after it's # own length is packed into a 4-byte integer # 'timestamp' -> b'\x00\x00\x00\t' - encoded += struct.pack("!I", len(name.encode('utf-8'))) + name.encode('utf-8') + encoded += struct.pack("!I", len(utf8(name))) + utf8(name) # The datatype is additionally given a final byte # representing which type it is - encoded += struct.pack("!I", len(data_type)).encode('utf-8') + data_type.encode('utf-8') + encoded += struct.pack("!I", len(data_type)) + utf8(data_type) encoded += TRANSPORT_TYPE_ENCODINGS[data_type] if data_type == 'String' or data_type == 'Number': value = attr['string_value'] elif data_type == 'Binary': + print(data_type, attr['binary_value'], type(attr['binary_value'])) value = base64.b64decode(attr['binary_value']) else: print("Moto hasn't implemented MD5 hashing for {} attributes".format(data_type)) @@ -80,7 +86,7 @@ class Message(BaseModel): # MD5 so as not to break client softwre return('deadbeefdeadbeefdeadbeefdeadbeef') - encoded += struct.pack("!I", len(value.encode('utf-8'))) + value.encode('utf-8') + encoded += struct.pack("!I", len(utf8(value))) + utf8(value) md5.update(encoded) return md5.hexdigest() diff --git a/tests/test_sqs/test_sqs.py b/tests/test_sqs/test_sqs.py index 0e1149200..cad8ace76 100644 --- a/tests/test_sqs/test_sqs.py +++ b/tests/test_sqs/test_sqs.py @@ -232,9 +232,10 @@ def test_send_message_with_attributes(): body = 'this is a test message' message = queue.new_message(body) + BASE64_BINARY = base64.b64encode(b'binary value').decode('utf-8') message_attributes = { 'test.attribute_name': {'data_type': 'String', 'string_value': 'attribute value'}, - 'test.binary_attribute': {'data_type': 'Binary', 'binary_value': base64.b64encode('binary value')}, + 'test.binary_attribute': {'data_type': 'Binary', 'binary_value': BASE64_BINARY}, 'test.number_attribute': {'data_type': 'Number', 'string_value': 'string value'} } message.message_attributes = message_attributes