Merge pull request #919 from JackDanger/md5-of-sqs-messages

MD5 calculation of SQS message attributes
This commit is contained in:
Jack Danger 2017-05-22 18:23:51 -07:00 committed by GitHub
commit bdecd26397
3 changed files with 102 additions and 19 deletions

View File

@ -1,7 +1,10 @@
from __future__ import unicode_literals
import base64
import hashlib
import re
import six
import struct
from xml.sax.saxutils import escape
import boto.sqs
@ -17,6 +20,8 @@ from .exceptions import (
DEFAULT_ACCOUNT_ID = 123456789012
DEFAULT_SENDER_ID = "AIDAIT2UOQQY3AUEKVGXU"
TRANSPORT_TYPE_ENCODINGS = {'String': b'\x01', 'Binary': b'\x02', 'Number': b'\x01'}
class Message(BaseModel):
@ -33,10 +38,58 @@ class Message(BaseModel):
self.delayed_until = 0
@property
def md5(self):
body_md5 = hashlib.md5()
body_md5.update(self._body.encode('utf-8'))
return body_md5.hexdigest()
def body_md5(self):
md5 = hashlib.md5()
md5.update(self._body.encode('utf-8'))
return md5.hexdigest()
@property
def attribute_md5(self):
"""
The MD5 of all attributes is calculated by first generating a
utf-8 string from each attribute and MD5-ing the concatenation
of them all. Each attribute is encoded with some bytes that
describe the length of each part and the type of attribute.
Not yet implemented:
List types (https://github.com/aws/aws-sdk-java/blob/7844c64cf248aed889811bf2e871ad6b276a89ca/aws-java-sdk-sqs/src/main/java/com/amazonaws/services/sqs/MessageMD5ChecksumHandler.java#L58k)
"""
def utf8(str):
if isinstance(str, six.string_types):
return str.encode('utf-8')
return str
md5 = hashlib.md5()
for name in sorted(self.message_attributes.keys()):
attr = self.message_attributes[name]
data_type = attr['data_type']
encoded = utf8('')
# Each part of each attribute is encoded right after it's
# own length is packed into a 4-byte integer
# 'timestamp' -> b'\x00\x00\x00\t'
encoded += struct.pack("!I", len(utf8(name))) + utf8(name)
# The datatype is additionally given a final byte
# representing which type it is
encoded += struct.pack("!I", len(data_type)) + utf8(data_type)
encoded += TRANSPORT_TYPE_ENCODINGS[data_type]
if data_type == 'String' or data_type == 'Number':
value = attr['string_value']
elif data_type == 'Binary':
print(data_type, attr['binary_value'], type(attr['binary_value']))
value = base64.b64decode(attr['binary_value'])
else:
print("Moto hasn't implemented MD5 hashing for {} attributes".format(data_type))
# The following should be enough of a clue to users that
# they are not, in fact, looking at a correct MD5 while
# also following the character and length constraints of
# MD5 so as not to break client softwre
return('deadbeefdeadbeefdeadbeefdeadbeef')
encoded += struct.pack("!I", len(utf8(value))) + utf8(value)
md5.update(encoded)
return md5.hexdigest()
@property
def body(self):

View File

@ -337,11 +337,9 @@ SET_QUEUE_ATTRIBUTE_RESPONSE = """<SetQueueAttributesResponse>
SEND_MESSAGE_RESPONSE = """<SendMessageResponse>
<SendMessageResult>
<MD5OfMessageBody>
{{- message.md5 -}}
{{- message.body_md5 -}}
</MD5OfMessageBody>
{% if message.message_attributes.items()|count > 0 %}
<MD5OfMessageAttributes>324758f82d026ac6ec5b31a3b192d1e3</MD5OfMessageAttributes>
{% endif %}
<MD5OfMessageAttributes>{{- message.attribute_md5 -}}</MD5OfMessageAttributes>
<MessageId>
{{- message.id -}}
</MessageId>
@ -357,7 +355,7 @@ RECEIVE_MESSAGE_RESPONSE = """<ReceiveMessageResponse>
<Message>
<MessageId>{{ message.id }}</MessageId>
<ReceiptHandle>{{ message.receipt_handle }}</ReceiptHandle>
<MD5OfBody>{{ message.md5 }}</MD5OfBody>
<MD5OfBody>{{ message.body_md5 }}</MD5OfBody>
<Body>{{ message.body }}</Body>
<Attribute>
<Name>SenderId</Name>
@ -375,9 +373,7 @@ RECEIVE_MESSAGE_RESPONSE = """<ReceiveMessageResponse>
<Name>ApproximateFirstReceiveTimestamp</Name>
<Value>{{ message.approximate_first_receive_timestamp }}</Value>
</Attribute>
{% if message.message_attributes.items()|count > 0 %}
<MD5OfMessageAttributes>324758f82d026ac6ec5b31a3b192d1e3</MD5OfMessageAttributes>
{% endif %}
<MD5OfMessageAttributes>{{- message.attribute_md5 -}}</MD5OfMessageAttributes>
{% for name, value in message.message_attributes.items() %}
<MessageAttribute>
<Name>{{ name }}</Name>
@ -405,10 +401,8 @@ SEND_MESSAGE_BATCH_RESPONSE = """<SendMessageBatchResponse>
<SendMessageBatchResultEntry>
<Id>{{ message.user_id }}</Id>
<MessageId>{{ message.id }}</MessageId>
<MD5OfMessageBody>{{ message.md5 }}</MD5OfMessageBody>
{% if message.message_attributes.items()|count > 0 %}
<MD5OfMessageAttributes>324758f82d026ac6ec5b31a3b192d1e3</MD5OfMessageAttributes>
{% endif %}
<MD5OfMessageBody>{{ message.body_md5 }}</MD5OfMessageBody>
<MD5OfMessageAttributes>{{- message.attribute_md5 -}}</MD5OfMessageAttributes>
</SendMessageBatchResultEntry>
{% endfor %}
</SendMessageBatchResult>

View File

@ -7,6 +7,7 @@ import botocore.exceptions
from boto.exception import SQSError
from boto.sqs.message import RawMessage, Message
import base64
import requests
import sure # noqa
import time
@ -43,10 +44,44 @@ def test_get_inexistent_queue():
def test_message_send():
sqs = boto3.resource('sqs', region_name='us-east-1')
queue = sqs.create_queue(QueueName="blah")
msg = queue.send_message(MessageBody="derp")
msg = queue.send_message(
MessageBody="derp",
MessageAttributes={
'timestamp': {
'StringValue': '1493147359900',
'DataType': 'Number',
}
}
)
msg.get('MD5OfMessageBody').should.equal(
'58fd9edd83341c29f1aebba81c31e257')
msg.get('MD5OfMessageAttributes').should.equal(
'235c5c510d26fb653d073faed50ae77c')
msg.get('ResponseMetadata', {}).get('RequestId').should.equal(
'27daac76-34dd-47df-bd01-1f6e873584a0')
msg.get('MessageId').should_not.contain(' \n')
messages = queue.receive_messages()
messages.should.have.length_of(1)
@mock_sqs
def test_message_with_complex_attributes():
sqs = boto3.resource('sqs', region_name='us-east-1')
queue = sqs.create_queue(QueueName="blah")
msg = queue.send_message(
MessageBody="derp",
MessageAttributes={
'ccc': {'StringValue': 'testjunk', 'DataType': 'String'},
'aaa': {'BinaryValue': b'\x02\x03\x04', 'DataType': 'Binary'},
'zzz': {'DataType': 'Number', 'StringValue': '0230.01'},
'öther_encodings': {'DataType': 'String', 'StringValue': 'T\xFCst'}
}
)
msg.get('MD5OfMessageBody').should.equal(
'58fd9edd83341c29f1aebba81c31e257')
msg.get('MD5OfMessageAttributes').should.equal(
'8ae21a7957029ef04146b42aeaa18a22')
msg.get('ResponseMetadata', {}).get('RequestId').should.equal(
'27daac76-34dd-47df-bd01-1f6e873584a0')
msg.get('MessageId').should_not.contain(' \n')
@ -197,9 +232,10 @@ def test_send_message_with_attributes():
body = 'this is a test message'
message = queue.new_message(body)
BASE64_BINARY = base64.b64encode(b'binary value').decode('utf-8')
message_attributes = {
'test.attribute_name': {'data_type': 'String', 'string_value': 'attribute value'},
'test.binary_attribute': {'data_type': 'Binary', 'binary_value': 'binary value'},
'test.binary_attribute': {'data_type': 'Binary', 'binary_value': BASE64_BINARY},
'test.number_attribute': {'data_type': 'Number', 'string_value': 'string value'}
}
message.message_attributes = message_attributes