S3: get_object_attributes() (#6075)

This commit is contained in:
Bert Blommers 2023-03-16 10:56:20 -01:00 committed by GitHub
parent 30a3df58f7
commit 94100c116c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 182 additions and 7 deletions

View File

@ -5563,7 +5563,7 @@
## s3
<details>
<summary>62% implemented</summary>
<summary>63% implemented</summary>
- [X] abort_multipart_upload
- [X] complete_multipart_upload
@ -5611,7 +5611,7 @@
- [ ] get_bucket_website
- [X] get_object
- [X] get_object_acl
- [ ] get_object_attributes
- [X] get_object_attributes
- [X] get_object_legal_hold
- [X] get_object_lock_configuration
- [ ] get_object_retention

View File

@ -73,7 +73,11 @@ s3
- [ ] get_bucket_website
- [X] get_object
- [X] get_object_acl
- [ ] get_object_attributes
- [X] get_object_attributes
The following attributes are not yet returned: DeleteMarker, RequestCharged, ObjectParts
- [X] get_object_legal_hold
- [X] get_object_lock_configuration
- [ ] get_object_retention

View File

@ -12,7 +12,7 @@ import sys
import urllib.parse
from bisect import insort
from typing import Optional
from typing import Any, Dict, List, Optional
from importlib import reload
from moto.core import BaseBackend, BaseModel, BackendDict, CloudFormationModel
from moto.core import CloudWatchMetricProvider
@ -99,6 +99,7 @@ class FakeKey(BaseModel, ManagedState):
lock_mode=None,
lock_legal_status=None,
lock_until=None,
checksum_value=None,
):
ManagedState.__init__(
self,
@ -138,6 +139,7 @@ class FakeKey(BaseModel, ManagedState):
self.lock_mode = lock_mode
self.lock_legal_status = lock_legal_status
self.lock_until = lock_until
self.checksum_value = checksum_value
# Default metadata values
self._metadata["Content-Type"] = "binary/octet-stream"
@ -1775,6 +1777,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
lock_mode=None,
lock_legal_status=None,
lock_until=None,
checksum_value=None,
):
key_name = clean_key_name(key_name)
if storage is not None and storage not in STORAGE_CLASS:
@ -1813,6 +1816,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
lock_mode=lock_mode,
lock_legal_status=lock_legal_status,
lock_until=lock_until,
checksum_value=checksum_value,
)
existing_keys = bucket.keys.getlist(key_name, [])
@ -1847,6 +1851,30 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
key.lock_mode = retention[0]
key.lock_until = retention[1]
def get_object_attributes(
self,
key: FakeKey,
attributes_to_get: List[str],
) -> Dict[str, Any]:
"""
The following attributes are not yet returned: DeleteMarker, RequestCharged, ObjectParts
"""
response_keys = {
"etag": None,
"checksum": None,
"size": None,
"storage_class": None,
}
if "ETag" in attributes_to_get:
response_keys["etag"] = key.etag.replace('"', "")
if "Checksum" in attributes_to_get and key.checksum_value is not None:
response_keys["checksum"] = {key.checksum_algorithm: key.checksum_value}
if "ObjectSize" in attributes_to_get:
response_keys["size"] = key.size
if "StorageClass" in attributes_to_get:
response_keys["storage_class"] = key.storage_class
return response_keys
def get_object(
self,
bucket_name,

View File

@ -52,7 +52,7 @@ from .exceptions import (
LockNotEnabled,
AccessForbidden,
)
from .models import s3_backends
from .models import s3_backends, S3Backend
from .models import get_canned_acl, FakeGrantee, FakeGrant, FakeAcl, FakeKey
from .utils import (
bucket_name_from_url,
@ -154,7 +154,7 @@ class S3Response(BaseResponse):
super().__init__(service_name="s3")
@property
def backend(self):
def backend(self) -> S3Backend:
return s3_backends[self.current_account]["global"]
@property
@ -1349,6 +1349,16 @@ class S3Response(BaseResponse):
legal_hold = self.backend.get_object_legal_hold(key)
template = self.response_template(S3_OBJECT_LEGAL_HOLD)
return 200, response_headers, template.render(legal_hold=legal_hold)
if "attributes" in query:
attributes_to_get = headers.get("x-amz-object-attributes", "").split(",")
response_keys = self.backend.get_object_attributes(key, attributes_to_get)
if key.version_id == "null":
response_headers.pop("x-amz-version-id")
response_headers["Last-Modified"] = key.last_modified_ISO8601
template = self.response_template(S3_OBJECT_ATTRIBUTES_RESPONSE)
return 200, response_headers, template.render(**response_keys)
response_headers.update(key.metadata)
response_headers.update(key.response_dict)
@ -1420,12 +1430,14 @@ class S3Response(BaseResponse):
checksum_value = request.headers.get(checksum_header)
if not checksum_value and checksum_algorithm:
# Extract the checksum-value from the body first
search = re.search(rb"x-amz-checksum-\w+:(\w+={1,2})", body)
search = re.search(rb"x-amz-checksum-\w+:(.+={1,2})", body)
checksum_value = search.group(1) if search else None
if checksum_value:
# TODO: AWS computes the provided value and verifies it's the same
# Afterwards, it should be returned in every subsequent call
if isinstance(checksum_value, bytes):
checksum_value = checksum_value.decode("utf-8")
response_headers.update({checksum_header: checksum_value})
elif checksum_algorithm:
# If the value is not provided, we compute it and only return it as part of this request
@ -1580,6 +1592,7 @@ class S3Response(BaseResponse):
lock_mode=lock_mode,
lock_legal_status=legal_hold,
lock_until=lock_until,
checksum_value=checksum_value,
)
metadata = metadata_from_headers(request.headers)
@ -2896,3 +2909,20 @@ S3_ERROR_BUCKET_ONWERSHIP_NOT_FOUND = """
<HostId>l/tqqyk7HZbfvFFpdq3+CAzA9JXUiV4ZajKYhwolOIpnmlvZrsI88AKsDLsgQI6EvZ9MuGHhk7M=</HostId>
</Error>
"""
S3_OBJECT_ATTRIBUTES_RESPONSE = """<?xml version="1.0" encoding="UTF-8"?>
<GetObjectAttributesOutput xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
{% if etag is not none %}<ETag>{{ etag }}</ETag>{% endif %}
{% if checksum is not none %}
<Checksum>
{% if "CRC32" in checksum %}<ChecksumCRC32>{{ checksum["CRC32"] }}</ChecksumCRC32>{% endif %}
{% if "CRC32C" in checksum %}<ChecksumCRC32C>{{ checksum["CRC32C"] }}</ChecksumCRC32C>{% endif %}
{% if "SHA1" in checksum %}<ChecksumSHA1>{{ checksum["SHA1"] }}</ChecksumSHA1>{% endif %}
{% if "SHA256" in checksum %}<ChecksumSHA256>{{ checksum["SHA256"] }}</ChecksumSHA256>{% endif %}
</Checksum>
{% endif %}
{% if size is not none %}<ObjectSize>{{ size }}</ObjectSize>{% endif %}
{% if storage_class is not none %}<StorageClass>{{ storage_class }}</StorageClass>{% endif %}
</GetObjectAttributesOutput>
"""

View File

@ -0,0 +1,113 @@
import boto3
import pytest
from moto import mock_s3
from uuid import uuid4
@mock_s3
class TestS3ObjectAttributes:
def setup_method(self, *args) -> None: # pylint: disable=unused-argument
self.bucket_name = str(uuid4())
self.s3 = boto3.resource("s3", region_name="us-east-1")
self.client = boto3.client("s3", region_name="us-east-1")
self.bucket = self.s3.Bucket(self.bucket_name)
self.bucket.create()
self.key = self.bucket.put_object(Key="mykey", Body=b"somedata")
def test_get_etag(self):
actual_etag = self.key.e_tag[1:-1] # etag comes with quotes originally
resp = self.client.get_object_attributes(
Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["ETag"]
)
headers = set(resp["ResponseMetadata"]["HTTPHeaders"].keys())
assert "x-amz-version-id" not in headers
assert "last-modified" in headers
resp.pop("ResponseMetadata")
assert set(resp.keys()) == {"ETag", "LastModified"}
assert resp["ETag"] == actual_etag
def test_get_attributes_storageclass(self):
resp = self.client.get_object_attributes(
Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["StorageClass"]
)
resp.pop("ResponseMetadata")
assert set(resp.keys()) == {"StorageClass", "LastModified"}
assert resp["StorageClass"] == "STANDARD"
def test_get_attributes_size(self):
resp = self.client.get_object_attributes(
Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["ObjectSize"]
)
resp.pop("ResponseMetadata")
assert set(resp.keys()) == {"ObjectSize", "LastModified"}
assert resp["ObjectSize"] == 8
@pytest.mark.parametrize(
"algo_val",
[
("CRC32", "6Le+Qw=="),
("SHA1", "hvfkN/qlp/zhXR3cuerq6jd2Z7g="),
("SHA256", "ypeBEsobvcr6wjGzmiPcTaeG7/gUfE5yuYB3ha/uSLs="),
],
)
def test_get_attributes_checksum(self, algo_val):
algo, value = algo_val
self.client.put_object(
Bucket=self.bucket_name, Key="cs", Body="a", ChecksumAlgorithm=algo
)
resp = self.client.get_object_attributes(
Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["Checksum"]
)
resp.pop("ResponseMetadata")
# Checksum is not returned, because it's not set
assert set(resp.keys()) == {"LastModified"}
# Retrieve checksum from key that was created with CRC32
resp = self.client.get_object_attributes(
Bucket=self.bucket_name, Key="cs", ObjectAttributes=["Checksum"]
)
resp.pop("ResponseMetadata")
assert set(resp.keys()) == {"Checksum", "LastModified"}
assert resp["Checksum"] == {f"Checksum{algo}": value}
def test_get_attributes_multiple(self):
resp = self.client.get_object_attributes(
Bucket=self.bucket_name,
Key="mykey",
ObjectAttributes=["ObjectSize", "StorageClass"],
)
headers = set(resp["ResponseMetadata"]["HTTPHeaders"].keys())
assert "x-amz-version-id" not in headers
resp.pop("ResponseMetadata")
assert set(resp.keys()) == {"ObjectSize", "LastModified", "StorageClass"}
assert resp["ObjectSize"] == 8
assert resp["StorageClass"] == "STANDARD"
def test_get_versioned_object(self):
self.bucket.Versioning().enable()
key2 = self.bucket.put_object(Key="mykey", Body=b"moredata")
resp = self.client.get_object_attributes(
Bucket=self.bucket_name, Key="mykey", ObjectAttributes=["ETag"]
)
headers = resp["ResponseMetadata"]["HTTPHeaders"]
header_keys = set(headers.keys())
assert "x-amz-version-id" in header_keys
assert headers["x-amz-version-id"] == key2.version_id
resp.pop("ResponseMetadata")
assert set(resp.keys()) == {"ETag", "LastModified", "VersionId"}
assert resp["VersionId"] == key2.version_id