From 94100c116ca9bb4621ca7e8dfe9e35d8dea64aef Mon Sep 17 00:00:00 2001 From: Bert Blommers Date: Thu, 16 Mar 2023 10:56:20 -0100 Subject: [PATCH] S3: get_object_attributes() (#6075) --- IMPLEMENTATION_COVERAGE.md | 4 +- docs/docs/services/s3.rst | 6 +- moto/s3/models.py | 30 +++++- moto/s3/responses.py | 36 ++++++- tests/test_s3/test_s3_object_attributes.py | 113 +++++++++++++++++++++ 5 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 tests/test_s3/test_s3_object_attributes.py diff --git a/IMPLEMENTATION_COVERAGE.md b/IMPLEMENTATION_COVERAGE.md index 51718c2fe..6cc409f69 100644 --- a/IMPLEMENTATION_COVERAGE.md +++ b/IMPLEMENTATION_COVERAGE.md @@ -5563,7 +5563,7 @@ ## s3
-62% implemented +63% implemented - [X] abort_multipart_upload - [X] complete_multipart_upload @@ -5611,7 +5611,7 @@ - [ ] get_bucket_website - [X] get_object - [X] get_object_acl -- [ ] get_object_attributes +- [X] get_object_attributes - [X] get_object_legal_hold - [X] get_object_lock_configuration - [ ] get_object_retention diff --git a/docs/docs/services/s3.rst b/docs/docs/services/s3.rst index 02522f6f7..c05e20bb1 100644 --- a/docs/docs/services/s3.rst +++ b/docs/docs/services/s3.rst @@ -73,7 +73,11 @@ s3 - [ ] get_bucket_website - [X] get_object - [X] get_object_acl -- [ ] get_object_attributes +- [X] get_object_attributes + + The following attributes are not yet returned: DeleteMarker, RequestCharged, ObjectParts + + - [X] get_object_legal_hold - [X] get_object_lock_configuration - [ ] get_object_retention diff --git a/moto/s3/models.py b/moto/s3/models.py index 9c6bd0b9f..31efe31ad 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -12,7 +12,7 @@ import sys import urllib.parse from bisect import insort -from typing import Optional +from typing import Any, Dict, List, Optional from importlib import reload from moto.core import BaseBackend, BaseModel, BackendDict, CloudFormationModel from moto.core import CloudWatchMetricProvider @@ -99,6 +99,7 @@ class FakeKey(BaseModel, ManagedState): lock_mode=None, lock_legal_status=None, lock_until=None, + checksum_value=None, ): ManagedState.__init__( self, @@ -138,6 +139,7 @@ class FakeKey(BaseModel, ManagedState): self.lock_mode = lock_mode self.lock_legal_status = lock_legal_status self.lock_until = lock_until + self.checksum_value = checksum_value # Default metadata values self._metadata["Content-Type"] = "binary/octet-stream" @@ -1775,6 +1777,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): lock_mode=None, lock_legal_status=None, lock_until=None, + checksum_value=None, ): key_name = clean_key_name(key_name) if storage is not None and storage not in STORAGE_CLASS: @@ -1813,6 +1816,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): lock_mode=lock_mode, lock_legal_status=lock_legal_status, lock_until=lock_until, + checksum_value=checksum_value, ) existing_keys = bucket.keys.getlist(key_name, []) @@ -1847,6 +1851,30 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): key.lock_mode = retention[0] key.lock_until = retention[1] + def get_object_attributes( + self, + key: FakeKey, + attributes_to_get: List[str], + ) -> Dict[str, Any]: + """ + The following attributes are not yet returned: DeleteMarker, RequestCharged, ObjectParts + """ + response_keys = { + "etag": None, + "checksum": None, + "size": None, + "storage_class": None, + } + if "ETag" in attributes_to_get: + response_keys["etag"] = key.etag.replace('"', "") + if "Checksum" in attributes_to_get and key.checksum_value is not None: + response_keys["checksum"] = {key.checksum_algorithm: key.checksum_value} + if "ObjectSize" in attributes_to_get: + response_keys["size"] = key.size + if "StorageClass" in attributes_to_get: + response_keys["storage_class"] = key.storage_class + return response_keys + def get_object( self, bucket_name, diff --git a/moto/s3/responses.py b/moto/s3/responses.py index f10bd6500..c9f93393b 100644 --- a/moto/s3/responses.py +++ b/moto/s3/responses.py @@ -52,7 +52,7 @@ from .exceptions import ( LockNotEnabled, AccessForbidden, ) -from .models import s3_backends +from .models import s3_backends, S3Backend from .models import get_canned_acl, FakeGrantee, FakeGrant, FakeAcl, FakeKey from .utils import ( bucket_name_from_url, @@ -154,7 +154,7 @@ class S3Response(BaseResponse): super().__init__(service_name="s3") @property - def backend(self): + def backend(self) -> S3Backend: return s3_backends[self.current_account]["global"] @property @@ -1349,6 +1349,16 @@ class S3Response(BaseResponse): legal_hold = self.backend.get_object_legal_hold(key) template = self.response_template(S3_OBJECT_LEGAL_HOLD) return 200, response_headers, template.render(legal_hold=legal_hold) + if "attributes" in query: + attributes_to_get = headers.get("x-amz-object-attributes", "").split(",") + response_keys = self.backend.get_object_attributes(key, attributes_to_get) + + if key.version_id == "null": + response_headers.pop("x-amz-version-id") + response_headers["Last-Modified"] = key.last_modified_ISO8601 + + template = self.response_template(S3_OBJECT_ATTRIBUTES_RESPONSE) + return 200, response_headers, template.render(**response_keys) response_headers.update(key.metadata) response_headers.update(key.response_dict) @@ -1420,12 +1430,14 @@ class S3Response(BaseResponse): checksum_value = request.headers.get(checksum_header) if not checksum_value and checksum_algorithm: # Extract the checksum-value from the body first - search = re.search(rb"x-amz-checksum-\w+:(\w+={1,2})", body) + search = re.search(rb"x-amz-checksum-\w+:(.+={1,2})", body) checksum_value = search.group(1) if search else None if checksum_value: # TODO: AWS computes the provided value and verifies it's the same # Afterwards, it should be returned in every subsequent call + if isinstance(checksum_value, bytes): + checksum_value = checksum_value.decode("utf-8") response_headers.update({checksum_header: checksum_value}) elif checksum_algorithm: # If the value is not provided, we compute it and only return it as part of this request @@ -1580,6 +1592,7 @@ class S3Response(BaseResponse): lock_mode=lock_mode, lock_legal_status=legal_hold, lock_until=lock_until, + checksum_value=checksum_value, ) metadata = metadata_from_headers(request.headers) @@ -2896,3 +2909,20 @@ S3_ERROR_BUCKET_ONWERSHIP_NOT_FOUND = """ l/tqqyk7HZbfvFFpdq3+CAzA9JXUiV4ZajKYhwolOIpnmlvZrsI88AKsDLsgQI6EvZ9MuGHhk7M= """ + + +S3_OBJECT_ATTRIBUTES_RESPONSE = """ + + {% if etag is not none %}{{ etag }}{% endif %} + {% if checksum is not none %} + + {% if "CRC32" in checksum %}{{ checksum["CRC32"] }}{% endif %} + {% if "CRC32C" in checksum %}{{ checksum["CRC32C"] }}{% endif %} + {% if "SHA1" in checksum %}{{ checksum["SHA1"] }}{% endif %} + {% if "SHA256" in checksum %}{{ checksum["SHA256"] }}{% endif %} + + {% endif %} + {% if size is not none %}{{ size }}{% endif %} + {% if storage_class is not none %}{{ storage_class }}{% endif %} + +""" diff --git a/tests/test_s3/test_s3_object_attributes.py b/tests/test_s3/test_s3_object_attributes.py new file mode 100644 index 000000000..d53a64d61 --- /dev/null +++ b/tests/test_s3/test_s3_object_attributes.py @@ -0,0 +1,113 @@ +import boto3 +import pytest +from moto import mock_s3 +from uuid import uuid4 + + +@mock_s3 +class TestS3ObjectAttributes: + def setup_method(self, *args) -> None: # pylint: disable=unused-argument + self.bucket_name = str(uuid4()) + self.s3 = boto3.resource("s3", region_name="us-east-1") + self.client = boto3.client("s3", region_name="us-east-1") + self.bucket = self.s3.Bucket(self.bucket_name) + self.bucket.create() + + self.key = self.bucket.put_object(Key="mykey", Body=b"somedata") + + def test_get_etag(self): + actual_etag = self.key.e_tag[1:-1] # etag comes with quotes originally + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["ETag"] + ) + + headers = set(resp["ResponseMetadata"]["HTTPHeaders"].keys()) + assert "x-amz-version-id" not in headers + assert "last-modified" in headers + + resp.pop("ResponseMetadata") + + assert set(resp.keys()) == {"ETag", "LastModified"} + assert resp["ETag"] == actual_etag + + def test_get_attributes_storageclass(self): + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["StorageClass"] + ) + + resp.pop("ResponseMetadata") + assert set(resp.keys()) == {"StorageClass", "LastModified"} + assert resp["StorageClass"] == "STANDARD" + + def test_get_attributes_size(self): + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["ObjectSize"] + ) + + resp.pop("ResponseMetadata") + assert set(resp.keys()) == {"ObjectSize", "LastModified"} + assert resp["ObjectSize"] == 8 + + @pytest.mark.parametrize( + "algo_val", + [ + ("CRC32", "6Le+Qw=="), + ("SHA1", "hvfkN/qlp/zhXR3cuerq6jd2Z7g="), + ("SHA256", "ypeBEsobvcr6wjGzmiPcTaeG7/gUfE5yuYB3ha/uSLs="), + ], + ) + def test_get_attributes_checksum(self, algo_val): + algo, value = algo_val + self.client.put_object( + Bucket=self.bucket_name, Key="cs", Body="a", ChecksumAlgorithm=algo + ) + + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["Checksum"] + ) + resp.pop("ResponseMetadata") + + # Checksum is not returned, because it's not set + assert set(resp.keys()) == {"LastModified"} + + # Retrieve checksum from key that was created with CRC32 + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, Key="cs", ObjectAttributes=["Checksum"] + ) + + resp.pop("ResponseMetadata") + assert set(resp.keys()) == {"Checksum", "LastModified"} + assert resp["Checksum"] == {f"Checksum{algo}": value} + + def test_get_attributes_multiple(self): + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, + Key="mykey", + ObjectAttributes=["ObjectSize", "StorageClass"], + ) + + headers = set(resp["ResponseMetadata"]["HTTPHeaders"].keys()) + assert "x-amz-version-id" not in headers + + resp.pop("ResponseMetadata") + assert set(resp.keys()) == {"ObjectSize", "LastModified", "StorageClass"} + assert resp["ObjectSize"] == 8 + assert resp["StorageClass"] == "STANDARD" + + def test_get_versioned_object(self): + self.bucket.Versioning().enable() + key2 = self.bucket.put_object(Key="mykey", Body=b"moredata") + + resp = self.client.get_object_attributes( + Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["ETag"] + ) + + headers = resp["ResponseMetadata"]["HTTPHeaders"] + header_keys = set(headers.keys()) + assert "x-amz-version-id" in header_keys + assert headers["x-amz-version-id"] == key2.version_id + + resp.pop("ResponseMetadata") + + assert set(resp.keys()) == {"ETag", "LastModified", "VersionId"} + assert resp["VersionId"] == key2.version_id