From db0bec14188083ecde1559bde35a01af4f147ef4 Mon Sep 17 00:00:00 2001 From: Bert Blommers Date: Wed, 9 Aug 2023 10:49:01 +0000 Subject: [PATCH] S3: select_object_content() now takes RecordDelimiter into account (#6618) --- moto/s3/models.py | 3 +-- moto/s3/responses.py | 4 ++-- moto/s3/select_object_content.py | 7 +++++-- tests/test_s3/test_s3_select.py | 15 +++++++++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/moto/s3/models.py b/moto/s3/models.py index 7b36f8abd..7b6237543 100644 --- a/moto/s3/models.py +++ b/moto/s3/models.py @@ -2522,7 +2522,6 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): key_name: str, select_query: str, input_details: Dict[str, Any], - output_details: Dict[str, Any], # pylint: disable=unused-argument ) -> List[bytes]: """ Highly experimental. Please raise an issue if you find any inconsistencies/bugs. @@ -2531,7 +2530,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider): - Function aliases (count(*) as cnt) - Most functions (only count() is supported) - Result is always in JSON - - FieldDelimiters and RecordDelimiters are ignored + - FieldDelimiters are ignored """ self.get_bucket(bucket_name) key = self.get_object(bucket_name, key_name) diff --git a/moto/s3/responses.py b/moto/s3/responses.py index ec20f8e02..fdd3c3a2d 100644 --- a/moto/s3/responses.py +++ b/moto/s3/responses.py @@ -2288,9 +2288,9 @@ class S3Response(BaseResponse): input_details = request["InputSerialization"] output_details = request["OutputSerialization"] results = self.backend.select_object_content( - bucket_name, key_name, select_query, input_details, output_details + bucket_name, key_name, select_query, input_details ) - return 200, {}, serialize_select(results) + return 200, {}, serialize_select(results, output_details) else: raise NotImplementedError( diff --git a/moto/s3/select_object_content.py b/moto/s3/select_object_content.py index e718df235..e9d9985e8 100644 --- a/moto/s3/select_object_content.py +++ b/moto/s3/select_object_content.py @@ -49,8 +49,11 @@ def _create_end_message() -> bytes: return _create_message(content_type=None, event_type=b"End", payload=b"") -def serialize_select(data_list: List[bytes]) -> bytes: +def serialize_select(data_list: List[bytes], output_details: Dict[str, Any]) -> bytes: + delimiter = ( + (output_details.get("JSON") or {}).get("RecordDelimiter") or "\n" + ).encode("utf-8") response = b"" for data in data_list: - response += _create_data_message(data + b",") + response += _create_data_message(data + delimiter) return response + _create_stats_message() + _create_end_message() diff --git a/tests/test_s3/test_s3_select.py b/tests/test_s3/test_s3_select.py index c304b8144..d299365f8 100644 --- a/tests/test_s3/test_s3_select.py +++ b/tests/test_s3/test_s3_select.py @@ -133,6 +133,21 @@ class TestS3Select(TestCase): result = list(content["Payload"]) assert {"Records": {"Payload": b'{"_1":3},'}} in result + def test_default_record_delimiter(self): + content = self.client.select_object_content( + Bucket=self.bucket_name, + Key="simple_csv", + Expression="SELECT count(*) FROM S3Object", + ExpressionType="SQL", + InputSerialization={ + "CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","} + }, + # RecordDelimiter is not specified - should default to new line (\n) + OutputSerialization={"JSON": {}}, + ) + result = list(content["Payload"]) + assert {"Records": {"Payload": b'{"_1":3}\n'}} in result + def test_extensive_json__select_list(self): content = self.client.select_object_content( Bucket=self.bucket_name,