moto/tests/test_s3/test_s3_select.py

import bz2
import gzip
import json

import boto3
import pytest
from botocore.exceptions import ClientError

from . import s3_aws_verified

SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None}
SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"}
NESTED_JSON = {"a1": {"b1": "b2"}, "a2": [True, False], "a3": True, "a4": [1, 5]}
EXTENSIVE_JSON = [
    {
        "staff": [
            {
                "name": "Janelyn M",
                "city": "Chicago",
                "kids": [{"Name": "Josh"}, {"Name": "Jay"}],
            },
            {"name": "Stacy P", "city": "Seattle", "kids": {"Name": "Josh"}},
        ],
        "country": "USA",
    }
]
SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2]
SIMPLE_CSV = """a,b,c
e,r,f
y,u,i
q,w,y"""


def create_test_files(bucket_name):
    client = boto3.client("s3", "us-east-1")
    client.put_object(
        Bucket=bucket_name, Key="simple.json", Body=json.dumps(SIMPLE_JSON)
    )
    client.put_object(Bucket=bucket_name, Key="list.json", Body=json.dumps(SIMPLE_LIST))
    client.put_object(Bucket=bucket_name, Key="simple_csv", Body=SIMPLE_CSV)
    client.put_object(
        Bucket=bucket_name,
        Key="extensive.json",
        Body=json.dumps(EXTENSIVE_JSON),
    )
    client.put_object(
        Bucket=bucket_name,
        Key="nested.json",
        Body=json.dumps(NESTED_JSON),
    )
    client.put_object(
        Bucket=bucket_name,
        Key="json.gzip",
        Body=gzip.compress(json.dumps(NESTED_JSON).encode("utf-8")),
    )
    client.put_object(
        Bucket=bucket_name,
        Key="json.bz2",
        Body=bz2.compress(json.dumps(NESTED_JSON).encode("utf-8")),
    )
    client.put_object(
        Bucket=bucket_name,
        Key="csv.gzip",
        Body=gzip.compress(SIMPLE_CSV.encode("utf-8")),
    )
    client.put_object(
        Bucket=bucket_name, Key="csv.bz2", Body=bz2.compress(SIMPLE_CSV.encode("utf-8"))
    )


@pytest.mark.aws_verified
@s3_aws_verified
def test_query_all(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="simple.json",
        Expression="SELECT * FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"a1":"b1","a2":"b2","a3":null},'}} in result

    # Verify result is valid JSON
    json.loads(result[0]["Records"]["Payload"][0:-1].decode("utf-8"))

    # Verify result contains metadata
    stats = [res for res in result if "Stats" in res][0]["Stats"]
    assert "BytesScanned" in stats["Details"]
    assert "BytesProcessed" in stats["Details"]
    assert "BytesReturned" in stats["Details"]
    assert {"End": {}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_count_function(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="simple.json",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"_1":1},'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
@pytest.mark.xfail(message="Not yet implement in our parser")
def test_count_as(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="simple.json",
        Expression="SELECT count(*) as cnt FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"cnt":1},'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
@pytest.mark.xfail(message="Not yet implement in our parser")
def test_count_list_as(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="list.json",
        Expression="SELECT count(*) as cnt FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"cnt":1},'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_count_csv(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="simple_csv",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"_1":3},'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_default_record_delimiter(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="simple_csv",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}},
        # RecordDelimiter is not specified - should default to new line (\n)
        OutputSerialization={"JSON": {}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"_1":3}\n'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_extensive_json__select_list(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="extensive.json",
        Expression="select * from s3object[*].staff[*] s",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b"{},"}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_extensive_json__select_all(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="extensive.json",
        Expression="select * from s3object s",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    records = [res for res in result if "Records" in res][0]["Records"][
        "Payload"
    ].decode("utf-8")

    # For some reason, AWS returns records with a comma at the end
    assert records[-1] == ","

    # Because the original doc is a list, it is returned like this
    assert json.loads(records[:-1]) == {"_1": EXTENSIVE_JSON}


@pytest.mark.aws_verified
@s3_aws_verified
def test_nested_json__select_all(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="nested.json",
        Expression="select * from s3object s",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    records = [res for res in result if "Records" in res][0]["Records"][
        "Payload"
    ].decode("utf-8")

    # For some reason, AWS returns records with a comma at the end
    assert records[-1] == ","

    assert json.loads(records[:-1]) == NESTED_JSON


@pytest.mark.aws_verified
@s3_aws_verified
def test_gzipped_json(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="json.gzip",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}, "CompressionType": "GZIP"},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"_1":1},'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_bzipped_json(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="json.bz2",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"JSON": {"Type": "DOCUMENT"}, "CompressionType": "BZIP2"},
        OutputSerialization={"JSON": {"RecordDelimiter": ","}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b'{"_1":1},'}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_bzipped_csv_to_csv(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    create_test_files(bucket_name)

    # Count Records
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="csv.bz2",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},
        OutputSerialization={"CSV": {"RecordDelimiter": "_", "FieldDelimiter": ":"}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b"4_"}} in result

    # Count Records
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="csv.bz2",
        Expression="SELECT count(*) FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},
        OutputSerialization={"CSV": {}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b"4\n"}} in result

    # Mirror records
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="csv.bz2",
        Expression="SELECT * FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},
        OutputSerialization={"CSV": {}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b"a,b,c\ne,r,f\ny,u,i\nq,w,y\n"}} in result

    # Mirror records, specifying output format
    content = client.select_object_content(
        Bucket=bucket_name,
        Key="csv.bz2",
        Expression="SELECT * FROM S3Object",
        ExpressionType="SQL",
        InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},
        OutputSerialization={"CSV": {"RecordDelimiter": "\n", "FieldDelimiter": ":"}},
    )
    result = list(content["Payload"])
    assert {"Records": {"Payload": b"a:b:c\ne:r:f\ny:u:i\nq:w:y\n"}} in result


@pytest.mark.aws_verified
@s3_aws_verified
def test_select_unknown_key(bucket_name=None):
    client = boto3.client("s3", "us-east-1")
    with pytest.raises(ClientError) as exc:
        client.select_object_content(
            Bucket=bucket_name,
            Key="unknown",
            Expression="SELECT count(*) FROM S3Object",
            ExpressionType="SQL",
            InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},
            OutputSerialization={
                "CSV": {"RecordDelimiter": "\n", "FieldDelimiter": ":"}
            },
        )
    err = exc.value.response["Error"]
    assert err["Code"] == "NoSuchKey"
    assert err["Message"] == "The specified key does not exist."
    assert err["Key"] == "unknown"
S3: select_object_content() now supports Compressed requests and CSV outputs (#7514) 2024-03-25 05:34:06 -01:00			`import bz2`
			`import gzip`
S3/DynamoDB: Improve PartiQL tests (#6897) 2023-10-10 15:44:00 +00:00			`import json`
Admin: sorting imports with ruff (#7075) 2023-11-30 07:55:51 -08:00
			`import boto3`
Techdebt: Replace sure with regular assertions in S3 (#6603) 2023-08-07 12:48:48 -04:00			`import pytest`
S3: select_object_content() now supports Compressed requests and CSV outputs (#7514) 2024-03-25 05:34:06 -01:00			`from botocore.exceptions import ClientError`
Techdebt: Replace sure with regular assertions in S3 (#6603) 2023-08-07 12:48:48 -04:00
S3/DynamoDB: Improve PartiQL tests (#6897) 2023-10-10 15:44:00 +00:00			`from . import s3_aws_verified`
Techdebt: Replace sure with regular assertions in S3 (#6603) 2023-08-07 12:48:48 -04:00
S3 Select: Parse null-values (#6343) 2023-05-25 16:37:45 +00:00			`SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None}`
S3: select_object_content() (Experimental) (#6103) 2023-03-21 15:55:19 -01:00			`SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"}`
S3/DynamoDB: Improve PartiQL tests (#6897) 2023-10-10 15:44:00 +00:00			`NESTED_JSON = {"a1": {"b1": "b2"}, "a2": [True, False], "a3": True, "a4": [1, 5]}`
S3 Select: Support nested from-clauses (#6348) 2023-05-30 11:04:20 +00:00			`EXTENSIVE_JSON = [`
			`{`
			`"staff": [`
S3/DynamoDB: Improve PartiQL tests (#6897) 2023-10-10 15:44:00 +00:00			`{`
			`"name": "Janelyn M",`
			`"city": "Chicago",`
			`"kids": [{"Name": "Josh"}, {"Name": "Jay"}],`
			`},`
			`{"name": "Stacy P", "city": "Seattle", "kids": {"Name": "Josh"}},`
S3 Select: Support nested from-clauses (#6348) 2023-05-30 11:04:20 +00:00			`],`
			`"country": "USA",`
			`}`
			`]`
S3: select_object_content() (Experimental) (#6103) 2023-03-21 15:55:19 -01:00			`SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2]`
			`SIMPLE_CSV = """a,b,c`
			`e,r,f`
			`y,u,i`
			`q,w,y"""`


S3/DynamoDB: Improve PartiQL tests (#6897) 2023-10-10 15:44:00 +00:00			`def create_test_files(bucket_name):`
			`client = boto3.client("s3", "us-east-1")`
			`client.put_object(`
			`Bucket=bucket_name, Key="simple.json", Body=json.dumps(SIMPLE_JSON)`
			`)`
			`client.put_object(Bucket=bucket_name, Key="list.json", Body=json.dumps(SIMPLE_LIST))`
			`client.put_object(Bucket=bucket_name, Key="simple_csv", Body=SIMPLE_CSV)`
			`client.put_object(`
			`Bucket=bucket_name,`
			`Key="extensive.json",`
			`Body=json.dumps(EXTENSIVE_JSON),`
			`)`
			`client.put_object(`
			`Bucket=bucket_name,`
			`Key="nested.json",`
			`Body=json.dumps(NESTED_JSON),`
			`)`
S3: select_object_content() now supports Compressed requests and CSV outputs (#7514) 2024-03-25 05:34:06 -01:00			`client.put_object(`
			`Bucket=bucket_name,`
			`Key="json.gzip",`
			`Body=gzip.compress(json.dumps(NESTED_JSON).encode("utf-8")),`
			`)`
			`client.put_object(`
			`Bucket=bucket_name,`
			`Key="json.bz2",`
			`Body=bz2.compress(json.dumps(NESTED_JSON).encode("utf-8")),`
			`)`
			`client.put_object(`
			`Bucket=bucket_name,`
			`Key="csv.gzip",`
			`Body=gzip.compress(SIMPLE_CSV.encode("utf-8")),`
			`)`
			`client.put_object(`
			`Bucket=bucket_name, Key="csv.bz2", Body=bz2.compress(SIMPLE_CSV.encode("utf-8"))`
			`)`
S3/DynamoDB: Improve PartiQL tests (#6897) 2023-10-10 15:44:00 +00:00

			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_query_all(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="simple.json",`
			`Expression="SELECT * FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"a1":"b1","a2":"b2","a3":null},'}} in result`

			`# Verify result is valid JSON`
			`json.loads(result[0]["Records"]["Payload"][0:-1].decode("utf-8"))`

			`# Verify result contains metadata`
			`stats = [res for res in result if "Stats" in res][0]["Stats"]`
			`assert "BytesScanned" in stats["Details"]`
			`assert "BytesProcessed" in stats["Details"]`
			`assert "BytesReturned" in stats["Details"]`
			`assert {"End": {}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_count_function(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="simple.json",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"_1":1},'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`@pytest.mark.xfail(message="Not yet implement in our parser")`
			`def test_count_as(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="simple.json",`
			`Expression="SELECT count(*) as cnt FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"cnt":1},'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`@pytest.mark.xfail(message="Not yet implement in our parser")`
			`def test_count_list_as(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="list.json",`
			`Expression="SELECT count(*) as cnt FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"cnt":1},'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_count_csv(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="simple_csv",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"_1":3},'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_default_record_delimiter(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="simple_csv",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}},`
			`# RecordDelimiter is not specified - should default to new line (\n)`
			`OutputSerialization={"JSON": {}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"_1":3}\n'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_extensive_json__select_list(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="extensive.json",`
			`Expression="select * from s3object[].staff[] s",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b"{},"}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_extensive_json__select_all(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="extensive.json",`
			`Expression="select * from s3object s",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`records = [res for res in result if "Records" in res][0]["Records"][`
			`"Payload"`
			`].decode("utf-8")`

			`# For some reason, AWS returns records with a comma at the end`
			`assert records[-1] == ","`

			`# Because the original doc is a list, it is returned like this`
			`assert json.loads(records[:-1]) == {"_1": EXTENSIVE_JSON}`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_nested_json__select_all(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="nested.json",`
			`Expression="select * from s3object s",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`records = [res for res in result if "Records" in res][0]["Records"][`
			`"Payload"`
			`].decode("utf-8")`

			`# For some reason, AWS returns records with a comma at the end`
			`assert records[-1] == ","`

			`assert json.loads(records[:-1]) == NESTED_JSON`
S3: select_object_content() now supports Compressed requests and CSV outputs (#7514) 2024-03-25 05:34:06 -01:00

			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_gzipped_json(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="json.gzip",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}, "CompressionType": "GZIP"},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"_1":1},'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_bzipped_json(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="json.bz2",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"JSON": {"Type": "DOCUMENT"}, "CompressionType": "BZIP2"},`
			`OutputSerialization={"JSON": {"RecordDelimiter": ","}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b'{"_1":1},'}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_bzipped_csv_to_csv(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`create_test_files(bucket_name)`

			`# Count Records`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="csv.bz2",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},`
			`OutputSerialization={"CSV": {"RecordDelimiter": "_", "FieldDelimiter": ":"}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b"4_"}} in result`

			`# Count Records`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="csv.bz2",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},`
			`OutputSerialization={"CSV": {}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b"4\n"}} in result`

			`# Mirror records`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="csv.bz2",`
			`Expression="SELECT * FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},`
			`OutputSerialization={"CSV": {}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b"a,b,c\ne,r,f\ny,u,i\nq,w,y\n"}} in result`

			`# Mirror records, specifying output format`
			`content = client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="csv.bz2",`
			`Expression="SELECT * FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},`
			`OutputSerialization={"CSV": {"RecordDelimiter": "\n", "FieldDelimiter": ":"}},`
			`)`
			`result = list(content["Payload"])`
			`assert {"Records": {"Payload": b"a:b:c\ne:r:f\ny:u:i\nq:w:y\n"}} in result`


			`@pytest.mark.aws_verified`
			`@s3_aws_verified`
			`def test_select_unknown_key(bucket_name=None):`
			`client = boto3.client("s3", "us-east-1")`
			`with pytest.raises(ClientError) as exc:`
			`client.select_object_content(`
			`Bucket=bucket_name,`
			`Key="unknown",`
			`Expression="SELECT count(*) FROM S3Object",`
			`ExpressionType="SQL",`
			`InputSerialization={"CSV": {}, "CompressionType": "BZIP2"},`
			`OutputSerialization={`
			`"CSV": {"RecordDelimiter": "\n", "FieldDelimiter": ":"}`
			`},`
			`)`
			`err = exc.value.response["Error"]`
			`assert err["Code"] == "NoSuchKey"`
			`assert err["Message"] == "The specified key does not exist."`
			`assert err["Key"] == "unknown"`