MD5 calculation of SQS message attributes

This implements the same MD5 hashing pattern as implemented in the Ruby
and Java AWS SDKs

Doesn't yet handle list types but if you're reading this you might be
surprised how easy that is to add. Give it a shot and if you get stuck
reach out to me for help.
This commit is contained in:
Jack Danger Canty 2017-04-26 23:40:28 -07:00
parent 584352aaf6
commit 5e88b5d1b4
3 changed files with 93 additions and 18 deletions

View File

@ -1,7 +1,9 @@
from __future__ import unicode_literals
import base64
import hashlib
import re
import struct
from xml.sax.saxutils import escape
import boto.sqs
@ -17,6 +19,8 @@ from .exceptions import (
DEFAULT_ACCOUNT_ID = 123456789012
DEFAULT_SENDER_ID = "AIDAIT2UOQQY3AUEKVGXU"
TRANSPORT_TYPE_ENCODINGS = {'String': b'\x01', 'Binary': b'\x02', 'Number': b'\x01'}
class Message(BaseModel):
@ -33,10 +37,53 @@ class Message(BaseModel):
self.delayed_until = 0
@property
def md5(self):
body_md5 = hashlib.md5()
body_md5.update(self._body.encode('utf-8'))
return body_md5.hexdigest()
def body_md5(self):
md5 = hashlib.md5()
md5.update(self._body.encode('utf-8'))
return md5.hexdigest()
@property
def attribute_md5(self):
"""
The MD5 of all attributes is calculated by first generating a
utf-8 string from each attribute and MD5-ing the concatenation
of them all. Each attribute is encoded with some bytes that
describe the length of each part and the type of attribute.
Not yet implemented:
List types (https://github.com/aws/aws-sdk-java/blob/7844c64cf248aed889811bf2e871ad6b276a89ca/aws-java-sdk-sqs/src/main/java/com/amazonaws/services/sqs/MessageMD5ChecksumHandler.java#L58k)
"""
md5 = hashlib.md5()
for name in sorted(self.message_attributes.keys()):
attr = self.message_attributes[name]
data_type = attr['data_type']
encoded = ''.encode('utf-8')
# Each part of each attribute is encoded right after it's
# own length is packed into a 4-byte integer
# 'timestamp' -> b'\x00\x00\x00\t'
encoded += struct.pack("!I", len(name.encode('utf-8'))) + name.encode('utf-8')
# The datatype is additionally given a final byte
# representing which type it is
encoded += struct.pack("!I", len(data_type)).encode('utf-8') + data_type.encode('utf-8')
encoded += TRANSPORT_TYPE_ENCODINGS[data_type]
if data_type == 'String' or data_type == 'Number':
value = attr['string_value']
elif data_type == 'Binary':
value = base64.b64decode(attr['binary_value'])
else:
print("Moto hasn't implemented MD5 hashing for {} attributes".format(data_type))
# The following should be enough of a clue to users that
# they are not, in fact, looking at a correct MD5 while
# also following the character and length constraints of
# MD5 so as not to break client softwre
return('deadbeefdeadbeefdeadbeefdeadbeef')
encoded += struct.pack("!I", len(value.encode('utf-8'))) + value.encode('utf-8')
md5.update(encoded)
return md5.hexdigest()
@property
def body(self):

View File

@ -337,11 +337,9 @@ SET_QUEUE_ATTRIBUTE_RESPONSE = """<SetQueueAttributesResponse>
SEND_MESSAGE_RESPONSE = """<SendMessageResponse>
<SendMessageResult>
<MD5OfMessageBody>
{{- message.md5 -}}
{{- message.body_md5 -}}
</MD5OfMessageBody>
{% if message.message_attributes.items()|count > 0 %}
<MD5OfMessageAttributes>324758f82d026ac6ec5b31a3b192d1e3</MD5OfMessageAttributes>
{% endif %}
<MD5OfMessageAttributes>{{- message.attribute_md5 -}}</MD5OfMessageAttributes>
<MessageId>
{{- message.id -}}
</MessageId>
@ -357,7 +355,7 @@ RECEIVE_MESSAGE_RESPONSE = """<ReceiveMessageResponse>
<Message>
<MessageId>{{ message.id }}</MessageId>
<ReceiptHandle>{{ message.receipt_handle }}</ReceiptHandle>
<MD5OfBody>{{ message.md5 }}</MD5OfBody>
<MD5OfBody>{{ message.body_md5 }}</MD5OfBody>
<Body>{{ message.body }}</Body>
<Attribute>
<Name>SenderId</Name>
@ -375,9 +373,7 @@ RECEIVE_MESSAGE_RESPONSE = """<ReceiveMessageResponse>
<Name>ApproximateFirstReceiveTimestamp</Name>
<Value>{{ message.approximate_first_receive_timestamp }}</Value>
</Attribute>
{% if message.message_attributes.items()|count > 0 %}
<MD5OfMessageAttributes>324758f82d026ac6ec5b31a3b192d1e3</MD5OfMessageAttributes>
{% endif %}
<MD5OfMessageAttributes>{{- message.attribute_md5 -}}</MD5OfMessageAttributes>
{% for name, value in message.message_attributes.items() %}
<MessageAttribute>
<Name>{{ name }}</Name>
@ -405,10 +401,8 @@ SEND_MESSAGE_BATCH_RESPONSE = """<SendMessageBatchResponse>
<SendMessageBatchResultEntry>
<Id>{{ message.user_id }}</Id>
<MessageId>{{ message.id }}</MessageId>
<MD5OfMessageBody>{{ message.md5 }}</MD5OfMessageBody>
{% if message.message_attributes.items()|count > 0 %}
<MD5OfMessageAttributes>324758f82d026ac6ec5b31a3b192d1e3</MD5OfMessageAttributes>
{% endif %}
<MD5OfMessageBody>{{ message.body_md5 }}</MD5OfMessageBody>
<MD5OfMessageAttributes>{{- message.attribute_md5 -}}</MD5OfMessageAttributes>
</SendMessageBatchResultEntry>
{% endfor %}
</SendMessageBatchResult>

View File

@ -43,10 +43,44 @@ def test_get_inexistent_queue():
def test_message_send():
sqs = boto3.resource('sqs', region_name='us-east-1')
queue = sqs.create_queue(QueueName="blah")
msg = queue.send_message(MessageBody="derp")
msg = queue.send_message(
MessageBody="derp",
MessageAttributes={
'timestamp': {
'StringValue': '1493147359900',
'DataType': 'Number',
}
}
)
msg.get('MD5OfMessageBody').should.equal(
'58fd9edd83341c29f1aebba81c31e257')
msg.get('MD5OfMessageAttributes').should.equal(
'235c5c510d26fb653d073faed50ae77c')
msg.get('ResponseMetadata', {}).get('RequestId').should.equal(
'27daac76-34dd-47df-bd01-1f6e873584a0')
msg.get('MessageId').should_not.contain(' \n')
messages = queue.receive_messages()
messages.should.have.length_of(1)
@mock_sqs
def test_message_with_complex_attributes():
sqs = boto3.resource('sqs', region_name='us-east-1')
queue = sqs.create_queue(QueueName="blah")
msg = queue.send_message(
MessageBody="derp",
MessageAttributes={
'ccc': {'StringValue': 'testjunk', 'DataType': 'String'},
'aaa': {'BinaryValue': b'\x02\x03\x04', 'DataType': 'Binary'},
'zzz': {'DataType': 'Number', 'StringValue': '0230.01'},
'öther_encodings': {'DataType': 'String', 'StringValue': 'T\xFCst'}
}
)
msg.get('MD5OfMessageBody').should.equal(
'58fd9edd83341c29f1aebba81c31e257')
msg.get('MD5OfMessageAttributes').should.equal(
'8ae21a7957029ef04146b42aeaa18a22')
msg.get('ResponseMetadata', {}).get('RequestId').should.equal(
'27daac76-34dd-47df-bd01-1f6e873584a0')
msg.get('MessageId').should_not.contain(' \n')