S3: select_object_content() now takes RecordDelimiter into account (#6618)
This commit is contained in:
parent
db87597018
commit
db0bec1418
@ -2522,7 +2522,6 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
|
||||
key_name: str,
|
||||
select_query: str,
|
||||
input_details: Dict[str, Any],
|
||||
output_details: Dict[str, Any], # pylint: disable=unused-argument
|
||||
) -> List[bytes]:
|
||||
"""
|
||||
Highly experimental. Please raise an issue if you find any inconsistencies/bugs.
|
||||
@ -2531,7 +2530,7 @@ class S3Backend(BaseBackend, CloudWatchMetricProvider):
|
||||
- Function aliases (count(*) as cnt)
|
||||
- Most functions (only count() is supported)
|
||||
- Result is always in JSON
|
||||
- FieldDelimiters and RecordDelimiters are ignored
|
||||
- FieldDelimiters are ignored
|
||||
"""
|
||||
self.get_bucket(bucket_name)
|
||||
key = self.get_object(bucket_name, key_name)
|
||||
|
@ -2288,9 +2288,9 @@ class S3Response(BaseResponse):
|
||||
input_details = request["InputSerialization"]
|
||||
output_details = request["OutputSerialization"]
|
||||
results = self.backend.select_object_content(
|
||||
bucket_name, key_name, select_query, input_details, output_details
|
||||
bucket_name, key_name, select_query, input_details
|
||||
)
|
||||
return 200, {}, serialize_select(results)
|
||||
return 200, {}, serialize_select(results, output_details)
|
||||
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
|
@ -49,8 +49,11 @@ def _create_end_message() -> bytes:
|
||||
return _create_message(content_type=None, event_type=b"End", payload=b"")
|
||||
|
||||
|
||||
def serialize_select(data_list: List[bytes]) -> bytes:
|
||||
def serialize_select(data_list: List[bytes], output_details: Dict[str, Any]) -> bytes:
|
||||
delimiter = (
|
||||
(output_details.get("JSON") or {}).get("RecordDelimiter") or "\n"
|
||||
).encode("utf-8")
|
||||
response = b""
|
||||
for data in data_list:
|
||||
response += _create_data_message(data + b",")
|
||||
response += _create_data_message(data + delimiter)
|
||||
return response + _create_stats_message() + _create_end_message()
|
||||
|
@ -133,6 +133,21 @@ class TestS3Select(TestCase):
|
||||
result = list(content["Payload"])
|
||||
assert {"Records": {"Payload": b'{"_1":3},'}} in result
|
||||
|
||||
def test_default_record_delimiter(self):
|
||||
content = self.client.select_object_content(
|
||||
Bucket=self.bucket_name,
|
||||
Key="simple_csv",
|
||||
Expression="SELECT count(*) FROM S3Object",
|
||||
ExpressionType="SQL",
|
||||
InputSerialization={
|
||||
"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}
|
||||
},
|
||||
# RecordDelimiter is not specified - should default to new line (\n)
|
||||
OutputSerialization={"JSON": {}},
|
||||
)
|
||||
result = list(content["Payload"])
|
||||
assert {"Records": {"Payload": b'{"_1":3}\n'}} in result
|
||||
|
||||
def test_extensive_json__select_list(self):
|
||||
content = self.client.select_object_content(
|
||||
Bucket=self.bucket_name,
|
||||
|
Loading…
Reference in New Issue
Block a user