From 760fa8fdc0a6812a92e426e77ebef3bd3a27283c Mon Sep 17 00:00:00 2001 From: Ben Simon Hartung <42031100+bentsku@users.noreply.github.com> Date: Fri, 12 May 2023 15:00:00 +0300 Subject: [PATCH] S3: fix checksum calculation (#6309) * fix S3 compute checksums * format * fix test * add copying of checksum between objects * fix unit tests for checksum --- moto/s3/models.py | 4 + moto/s3/utils.py | 6 +- tests/test_s3/test_s3_copyobject.py | 111 +++++++++++++++++++++++++++- tests/test_s3/test_s3_utils.py | 10 +-- 4 files changed, 121 insertions(+), 10 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index 75ad72fac..c7f4e9527 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -2418,6 +2418,10 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): # Object copied from Glacier object should not have expiry new_key.set_expiry(None) + if src_key.checksum_value: + new_key.checksum_value = src_key.checksum_value + new_key.checksum_algorithm = src_key.checksum_algorithm + # Send notifications that an object was copied notifications.send_event( self.account_id, notifications.S3_OBJECT_CREATE_COPY, bucket, new_key diff --git a/moto/s3/utils.py b/moto/s3/utils.py index 855ce4a4f..fafabdcd1 100644 --- a/moto/s3/utils.py +++ b/moto/s3/utils.py @@ -200,7 +200,7 @@ def compute_checksum(body: bytes, algorithm: str) -> bytes: if algorithm == "SHA1": hashed_body = _hash(hashlib.sha1, (body,)) elif algorithm == "CRC32" or algorithm == "CRC32C": - hashed_body = f"{binascii.crc32(body)}".encode("utf-8") + hashed_body = binascii.crc32(body).to_bytes(4, "big") else: hashed_body = _hash(hashlib.sha256, (body,)) return base64.b64encode(hashed_body) @@ -208,10 +208,10 @@ def compute_checksum(body: bytes, algorithm: str) -> bytes: def _hash(fn: Any, args: Any) -> bytes: try: - return fn(*args, usedforsecurity=False).hexdigest().encode("utf-8") + return fn(*args, usedforsecurity=False).digest() except TypeError: # The usedforsecurity-parameter is only available as of Python 3.9 - return fn(*args).hexdigest().encode("utf-8") + return fn(*args).digest() def cors_matches_origin(origin_header: str, allowed_origins: List[str]) -> bool: diff --git a/tests/test_s3/test_s3_copyobject.py b/tests/test_s3/test_s3_copyobject.py index 5a54a0537..701e8df58 100644 --- a/tests/test_s3/test_s3_copyobject.py +++ b/tests/test_s3/test_s3_copyobject.py @@ -46,7 +46,7 @@ def test_copy_key_boto3_with_sha256_checksum(): key_name = "key" new_key = "new_key" bucket = "foobar" - expected_hash = "YWIzZDA3ZjMxNjljY2JkMGVkNmM0YjQ1ZGUyMTUxOWY5ZjkzOGM3MmQyNDEyNDk5OGFhYjk0OWNlODNiYjUxYg==" + expected_hash = "qz0H8xacy9DtbEtF3iFRn5+TjHLSQSSZiquUnOg7tRs=" s3.create_bucket(Bucket=bucket) key = s3.Object("foobar", key_name) @@ -730,3 +730,112 @@ def test_copy_object_in_place_with_bucket_encryption(): Key=key, ) assert response["ServerSideEncryption"] == "AES256" + + +@mock_s3 +@pytest.mark.parametrize( + "algorithm", + ["CRC32", "SHA1", "SHA256"], +) +def test_copy_key_boto3_with_both_sha256_checksum(algorithm): + # This test will validate that moto S3 checksum calculations are correct + # We first create an object with a Checksum calculated by boto, by specifying ChecksumAlgorithm="SHA256" + # we then retrieve the right checksum from this request + # we copy the object while requesting moto to recalculate the checksum for that key + # we verify that both checksums are equal + client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + source_key = "source-key" + dest_key = "dest-key" + bucket = "foobar" + body = b"checksum-test" + client.create_bucket(Bucket=bucket) + + checksum_key = f"Checksum{algorithm}" + + resp = client.put_object( + Bucket=bucket, + Key=source_key, + Body=body, + ChecksumAlgorithm=algorithm, + ) + assert checksum_key in resp + checksum_by_boto = resp[checksum_key] + + resp = client.copy_object( + Bucket=bucket, + CopySource=f"{bucket}/{source_key}", + Key=dest_key, + ChecksumAlgorithm=algorithm, + ) + + assert checksum_key in resp["CopyObjectResult"] + assert resp["CopyObjectResult"][checksum_key] == checksum_by_boto + + +@mock_s3 +@pytest.mark.parametrize( + "algorithm, checksum", + [ + ("CRC32", "lVk/nw=="), + ("SHA1", "jbXkHAsXUrubtL3dqDQ4w+7WXc0="), + ("SHA256", "1YQo81vx2VFUl0q5ccWISq8AkSBQQ0WO80S82TmfdIQ="), + ], +) +def test_copy_object_calculates_checksum(algorithm, checksum): + client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + source_key = "source-key" + dest_key = "dest-key" + bucket = "foobar" + body = b"test-checksum" + client.create_bucket(Bucket=bucket) + + checksum_key = f"Checksum{algorithm}" + + resp = client.put_object( + Bucket=bucket, + Key=source_key, + Body=body, + ) + assert checksum_key not in resp + + resp = client.copy_object( + Bucket=bucket, + CopySource=f"{bucket}/{source_key}", + Key=dest_key, + ChecksumAlgorithm=algorithm, + ) + + assert checksum_key in resp["CopyObjectResult"] + assert resp["CopyObjectResult"][checksum_key] == checksum + + +@mock_s3 +def test_copy_object_keeps_checksum(): + client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + source_key = "source-key" + dest_key = "dest-key" + bucket = "foobar" + body = b"test-checksum" + expected_checksum = "1YQo81vx2VFUl0q5ccWISq8AkSBQQ0WO80S82TmfdIQ=" + client.create_bucket(Bucket=bucket) + + # put an object with a checksum + resp = client.put_object( + Bucket=bucket, + Key=source_key, + Body=body, + ChecksumAlgorithm="SHA256", + ) + assert "ChecksumSHA256" in resp + assert resp["ChecksumSHA256"] == expected_checksum + + # do not specify the checksum + resp = client.copy_object( + Bucket=bucket, + CopySource=f"{bucket}/{source_key}", + Key=dest_key, + ) + + # assert that it kept the checksum from the source key + assert "ChecksumSHA256" in resp["CopyObjectResult"] + assert resp["CopyObjectResult"]["ChecksumSHA256"] == expected_checksum diff --git a/tests/test_s3/test_s3_utils.py b/tests/test_s3/test_s3_utils.py index a5bdd7344..5edd5a24b 100644 --- a/tests/test_s3/test_s3_utils.py +++ b/tests/test_s3/test_s3_utils.py @@ -124,24 +124,22 @@ def test_undo_clean_key_name(key, expected): def test_checksum_sha256(): - checksum = b"ODdkMTQ5Y2I0MjRjMDM4NzY1NmYyMTFkMjU4OWZiNWIxZTE2MjI5OTIxMzA5ZTk4NTg4NDE5Y2NjYThhNzM2Mg==" + checksum = b"h9FJy0JMA4dlbyEdJYn7Wx4WIpkhMJ6YWIQZzMqKc2I=" compute_checksum(b"somedata", "SHA256").should.equal(checksum) # Unknown algorithms fallback to SHA256 for now compute_checksum(b"somedata", algorithm="unknown").should.equal(checksum) def test_checksum_sha1(): - compute_checksum(b"somedata", "SHA1").should.equal( - b"ZWZhYTMxMWFlNDQ4YTczNzRjMTIyMDYxYmZlZDk1MmQ5NDBlOWUzNw==" - ) + compute_checksum(b"somedata", "SHA1").should.equal(b"76oxGuRIpzdMEiBhv+2VLZQOnjc=") def test_checksum_crc32(): - compute_checksum(b"somedata", "CRC32").should.equal(b"MTM5MzM0Mzk1Mg==") + compute_checksum(b"somedata", "CRC32").should.equal(b"Uwy90A==") def test_checksum_crc32c(): - compute_checksum(b"somedata", "CRC32C").should.equal(b"MTM5MzM0Mzk1Mg==") + compute_checksum(b"somedata", "CRC32C").should.equal(b"Uwy90A==") def test_cors_utils():