update CopyObject logic (#6264)

* S3 update CopyObject logic

* add CopyObject tests

* re-add missing type annotation

* add ACL test

* lint/format

* fix typing

* fix test

* fix test test_copy_object_does_not_copy_storage_class
This commit is contained in:
Ben Simon Hartung 2023-05-01 12:14:31 +03:00 committed by GitHub
parent 7d6afe4b67
commit 2c0636ae67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 328 additions and 35 deletions

View File

@ -119,7 +119,7 @@ class FakeKey(BaseModel, ManagedState):
self.account_id = account_id
self.last_modified = datetime.datetime.utcnow()
self.acl: Optional[FakeAcl] = get_canned_acl("private")
self.website_redirect_location = None
self.website_redirect_location: Optional[str] = None
self.checksum_algorithm = None
self._storage_class: Optional[str] = storage if storage else "STANDARD"
self._metadata = LowercaseDict()
@ -2359,21 +2359,30 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
dest_bucket_name: str,
dest_key_name: str,
storage: Optional[str] = None,
acl: Optional[FakeAcl] = None,
encryption: Optional[str] = None,
kms_key_id: Optional[str] = None,
bucket_key_enabled: bool = False,
bucket_key_enabled: Any = None,
mdirective: Optional[str] = None,
metadata: Optional[Any] = None,
website_redirect_location: Optional[str] = None,
lock_mode: Optional[str] = None,
lock_legal_status: Optional[str] = None,
lock_until: Optional[str] = None,
) -> None:
if (
src_key.name == dest_key_name
and src_key.bucket_name == dest_bucket_name
and storage == src_key.storage_class
and acl == src_key.acl
and encryption == src_key.encryption
and kms_key_id == src_key.kms_key_id
and bucket_key_enabled == (src_key.bucket_key_enabled or False)
and mdirective != "REPLACE"
if src_key.name == dest_key_name and src_key.bucket_name == dest_bucket_name:
if src_key.encryption and src_key.encryption != "AES256" and not encryption:
# this a special case, as now S3 default to AES256 when not provided
# if the source key had encryption, and we did not specify it for the destination, S3 will accept a
# copy in place even without any required attributes
encryption = "AES256"
if not any(
(
storage,
encryption,
mdirective == "REPLACE",
website_redirect_location,
)
):
raise CopyObjectMustChangeSomething
@ -2381,20 +2390,24 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
bucket_name=dest_bucket_name,
key_name=dest_key_name,
value=src_key.value,
storage=storage or src_key.storage_class,
storage=storage,
multipart=src_key.multipart,
encryption=encryption or src_key.encryption,
kms_key_id=kms_key_id or src_key.kms_key_id,
bucket_key_enabled=bucket_key_enabled or src_key.bucket_key_enabled,
lock_mode=src_key.lock_mode,
lock_legal_status=src_key.lock_legal_status,
lock_until=src_key.lock_until,
encryption=encryption,
kms_key_id=kms_key_id, # TODO: use aws managed key if not provided
bucket_key_enabled=bucket_key_enabled,
lock_mode=lock_mode,
lock_legal_status=lock_legal_status,
lock_until=lock_until,
)
self.tagger.copy_tags(src_key.arn, new_key.arn)
if mdirective != "REPLACE":
new_key.set_metadata(src_key.metadata)
else:
new_key.set_metadata(metadata)
if website_redirect_location:
new_key.website_redirect_location = website_redirect_location
if acl is not None:
new_key.set_acl(acl)
if src_key.storage_class in ARCHIVE_STORAGE_CLASSES:
# Object copied from Glacier object should not have expiry
new_key.set_expiry(None)

View File

@ -1586,37 +1586,43 @@ class S3Response(BaseResponse):
):
raise ObjectNotInActiveTierError(key_to_copy)
bucket_key_enabled = (
request.headers.get(
"x-amz-server-side-encryption-bucket-key-enabled", ""
).lower()
== "true"
website_redirect_location = request.headers.get(
"x-amz-website-redirect-location"
)
mdirective = request.headers.get("x-amz-metadata-directive")
metadata = metadata_from_headers(request.headers)
self.backend.copy_object(
key_to_copy,
bucket_name,
key_name,
storage=storage_class,
acl=acl,
storage=request.headers.get("x-amz-storage-class"),
kms_key_id=kms_key_id,
encryption=encryption,
bucket_key_enabled=bucket_key_enabled,
mdirective=mdirective,
metadata=metadata,
website_redirect_location=website_redirect_location,
lock_mode=lock_mode,
lock_legal_status=legal_hold,
lock_until=lock_until,
)
else:
raise MissingKey(key=src_key)
new_key: FakeKey = self.backend.get_object(bucket_name, key_name) # type: ignore
if mdirective is not None and mdirective == "REPLACE":
metadata = metadata_from_headers(request.headers)
new_key.set_metadata(metadata, replace=True)
if acl is not None:
new_key.set_acl(acl)
tdirective = request.headers.get("x-amz-tagging-directive")
if tdirective == "REPLACE":
tagging = self._tagging_from_headers(request.headers)
self.backend.set_key_tags(new_key, tagging)
if key_to_copy.version_id != "null":
response_headers[
"x-amz-copy-source-version-id"
] = key_to_copy.version_id
# checksum stuff, do we need to compute hash of the copied object
checksum_algorithm = request.headers.get("x-amz-checksum-algorithm")

View File

@ -1,3 +1,5 @@
import datetime
import boto3
from botocore.client import ClientError
@ -408,3 +410,275 @@ def test_copy_object_with_kms_encryption():
result = client.head_object(Bucket="blah", Key="test2")
assert result["SSEKMSKeyId"] == kms_key
assert result["ServerSideEncryption"] == "aws:kms"
@mock_s3
@mock_kms
def test_copy_object_in_place_with_encryption():
kms_client = boto3.client("kms", region_name=DEFAULT_REGION_NAME)
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
kms_key = kms_client.create_key()["KeyMetadata"]["KeyId"]
bucket = s3.Bucket("test_bucket")
bucket.create()
key = "source-key"
resp = client.put_object(
Bucket="test_bucket",
Key=key,
Body=b"somedata",
ServerSideEncryption="aws:kms",
BucketKeyEnabled=True,
SSEKMSKeyId=kms_key,
)
assert resp["BucketKeyEnabled"] is True
# assert that you can copy in place with the same Encryption settings
client.copy_object(
Bucket="test_bucket",
CopySource=f"test_bucket/{key}",
Key=key,
ServerSideEncryption="aws:kms",
BucketKeyEnabled=True,
SSEKMSKeyId=kms_key,
)
# assert that the BucketKeyEnabled setting is not kept in the destination key
resp = client.copy_object(
Bucket="test_bucket",
CopySource=f"test_bucket/{key}",
Key=key,
ServerSideEncryption="aws:kms",
SSEKMSKeyId=kms_key,
)
assert "BucketKeyEnabled" not in resp
# this is an edge case, if the source object SSE was not AES256, AWS allows you to not specify any fields
# as it will use AES256 by default and is different from the source key
resp = client.copy_object(
Bucket="test_bucket",
CopySource=f"test_bucket/{key}",
Key=key,
)
assert resp["ServerSideEncryption"] == "AES256"
# check that it allows copying in the place with the same ServerSideEncryption setting as the source
resp = client.copy_object(
Bucket="test_bucket",
CopySource=f"test_bucket/{key}",
Key=key,
ServerSideEncryption="AES256",
)
assert resp["ServerSideEncryption"] == "AES256"
@mock_s3
def test_copy_object_in_place_with_storage_class():
# this test will validate that setting StorageClass (even the same as source) allows a copy in place
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "test-bucket"
bucket = s3.Bucket(bucket_name)
bucket.create()
key = "source-key"
bucket.put_object(Key=key, Body=b"somedata", StorageClass="STANDARD")
client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{key}",
Key=key,
StorageClass="STANDARD",
)
# verify that the copy worked
resp = client.get_object_attributes(
Bucket=bucket_name, Key=key, ObjectAttributes=["StorageClass"]
)
assert resp["StorageClass"] == "STANDARD"
@mock_s3
def test_copy_object_does_not_copy_storage_class():
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket = s3.Bucket("test_bucket")
bucket.create()
source_key = "source-key"
dest_key = "dest-key"
bucket.put_object(Key=source_key, Body=b"somedata", StorageClass="STANDARD_IA")
client.copy_object(
Bucket="test_bucket",
CopySource=f"test_bucket/{source_key}",
Key=dest_key,
)
# Verify that the destination key does not have STANDARD_IA as StorageClass
keys = dict([(k.key, k) for k in bucket.objects.all()])
keys[source_key].storage_class.should.equal("STANDARD_IA")
keys[dest_key].storage_class.should.equal("STANDARD")
@mock_s3
def test_copy_object_does_not_copy_acl():
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "testbucket"
bucket = s3.Bucket(bucket_name)
bucket.create()
source_key = "source-key"
dest_key = "dest-key"
control_key = "control-key"
# do not set ACL for the control key to get default ACL
bucket.put_object(Key=control_key, Body=b"somedata")
# set ACL for the source key to check if it will get copied
bucket.put_object(Key=source_key, Body=b"somedata", ACL="public-read")
# copy object without specifying ACL, so it should get default ACL
client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{source_key}",
Key=dest_key,
)
# Get the ACL from the all the keys
source_acl = client.get_object_acl(Bucket=bucket_name, Key=source_key)
dest_acl = client.get_object_acl(Bucket=bucket_name, Key=dest_key)
default_acl = client.get_object_acl(Bucket=bucket_name, Key=control_key)
# assert that the source key ACL are different from the destination key ACL
assert source_acl["Grants"] != dest_acl["Grants"]
# assert that the copied key got the default ACL like the control key
assert default_acl["Grants"] == dest_acl["Grants"]
@mock_s3
def test_copy_object_in_place_with_metadata():
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "testbucket"
bucket = s3.Bucket(bucket_name)
bucket.create()
key_name = "source-key"
bucket.put_object(Key=key_name, Body=b"somedata")
# test that giving metadata is not enough, and should provide MetadataDirective=REPLACE on top
with pytest.raises(ClientError) as e:
client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{key_name}",
Key=key_name,
Metadata={"key": "value"},
)
e.value.response["Error"]["Message"].should.equal(
"This copy request is illegal because it is trying to copy an object to itself without changing the object's metadata, storage class, website redirect location or encryption attributes."
)
# you can only provide MetadataDirective=REPLACE and it will copy without any metadata
client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{key_name}",
Key=key_name,
MetadataDirective="REPLACE",
)
result = client.head_object(Bucket=bucket_name, Key=key_name)
assert result["Metadata"] == {}
@mock_s3
def test_copy_objet_legal_hold():
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "testbucket"
source_key = "source-key"
dest_key = "dest-key"
client.create_bucket(Bucket=bucket_name, ObjectLockEnabledForBucket=True)
client.put_object(
Bucket=bucket_name,
Key=source_key,
Body=b"somedata",
ObjectLockLegalHoldStatus="ON",
)
head_object = client.head_object(Bucket=bucket_name, Key=source_key)
assert head_object["ObjectLockLegalHoldStatus"] == "ON"
assert "VersionId" in head_object
version_id = head_object["VersionId"]
resp = client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{source_key}",
Key=dest_key,
)
assert resp["CopySourceVersionId"] == version_id
assert resp["VersionId"] != version_id
# the destination key did not keep the legal hold from the source key
head_object = client.head_object(Bucket=bucket_name, Key=dest_key)
assert "ObjectLockLegalHoldStatus" not in head_object
@mock_s3
def test_s3_copy_object_lock():
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "testbucket"
source_key = "source-key"
dest_key = "dest-key"
client.create_bucket(Bucket=bucket_name, ObjectLockEnabledForBucket=True)
# manipulate a bit the datetime object for an easier comparison
retain_until = datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(
minutes=1
)
retain_until = retain_until.replace(microsecond=0)
client.put_object(
Bucket=bucket_name,
Key=source_key,
Body="test",
ObjectLockMode="GOVERNANCE",
ObjectLockRetainUntilDate=retain_until,
)
head_object = client.head_object(Bucket=bucket_name, Key=source_key)
assert head_object["ObjectLockMode"] == "GOVERNANCE"
assert head_object["ObjectLockRetainUntilDate"] == retain_until
assert "VersionId" in head_object
version_id = head_object["VersionId"]
resp = client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{source_key}",
Key=dest_key,
)
assert resp["CopySourceVersionId"] == version_id
assert resp["VersionId"] != version_id
# the destination key did not keep the lock mode nor the lock until from the source key
head_object = client.head_object(Bucket=bucket_name, Key=dest_key)
assert "ObjectLockMode" not in head_object
assert "ObjectLockRetainUntilDate" not in head_object
@mock_s3
def test_copy_object_in_place_website_redirect_location():
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "testbucket"
key = "source-key"
client.create_bucket(Bucket=bucket_name)
# this test will validate that setting WebsiteRedirectLocation (even the same as source) allows a copy in place
client.put_object(
Bucket=bucket_name,
Key=key,
Body="test",
WebsiteRedirectLocation="/test/direct",
)
head_object = client.head_object(Bucket=bucket_name, Key=key)
assert head_object["WebsiteRedirectLocation"] == "/test/direct"
# copy the object with the same WebsiteRedirectLocation as the source object
client.copy_object(
Bucket=bucket_name,
CopySource=f"{bucket_name}/{key}",
Key=key,
WebsiteRedirectLocation="/test/direct",
)
head_object = client.head_object(Bucket=bucket_name, Key=key)
assert head_object["WebsiteRedirectLocation"] == "/test/direct"