From 85a069c0ecabc2e25f8943096609362e45525156 Mon Sep 17 00:00:00 2001 From: Bert Blommers Date: Tue, 30 May 2023 11:04:20 +0000 Subject: [PATCH] S3 Select: Support nested from-clauses (#6348) --- setup.cfg | 12 ++++----- tests/test_s3/test_s3_select.py | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/setup.cfg b/setup.cfg index d807a85df..103fb04d5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,7 +52,7 @@ all = openapi-spec-validator>=0.2.8 pyparsing>=3.0.7 jsondiff>=1.1.2 - py-partiql-parser==0.3.1 + py-partiql-parser==0.3.3 aws-xray-sdk!=0.96,>=0.93 setuptools server = @@ -66,7 +66,7 @@ server = openapi-spec-validator>=0.2.8 pyparsing>=3.0.7 jsondiff>=1.1.2 - py-partiql-parser==0.3.1 + py-partiql-parser==0.3.3 aws-xray-sdk!=0.96,>=0.93 setuptools flask!=2.2.0,!=2.2.1 @@ -100,7 +100,7 @@ cloudformation = openapi-spec-validator>=0.2.8 pyparsing>=3.0.7 jsondiff>=1.1.2 - py-partiql-parser==0.3.1 + py-partiql-parser==0.3.3 aws-xray-sdk!=0.96,>=0.93 setuptools cloudfront = @@ -123,10 +123,10 @@ dms = ds = sshpubkeys>=3.1.0 dynamodb = docker>=3.0.0 - py-partiql-parser==0.3.1 + py-partiql-parser==0.3.3 dynamodbstreams = docker>=3.0.0 - py-partiql-parser==0.3.1 + py-partiql-parser==0.3.3 ebs = sshpubkeys>=3.1.0 ec2 = sshpubkeys>=3.1.0 ec2instanceconnect = @@ -183,7 +183,7 @@ route53 = route53resolver = sshpubkeys>=3.1.0 s3 = PyYAML>=5.1 - py-partiql-parser==0.3.1 + py-partiql-parser==0.3.3 s3control = sagemaker = sdb = diff --git a/tests/test_s3/test_s3_select.py b/tests/test_s3/test_s3_select.py index 8b9d77a3f..2a67de892 100644 --- a/tests/test_s3/test_s3_select.py +++ b/tests/test_s3/test_s3_select.py @@ -8,6 +8,15 @@ from uuid import uuid4 SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None} SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"} +EXTENSIVE_JSON = [ + { + "staff": [ + {"name": "Janelyn M", "city": "Chicago", "kids": 2}, + {"name": "Stacy P", "city": "Seattle", "kids": 1}, + ], + "country": "USA", + } +] SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2] SIMPLE_CSV = """a,b,c e,r,f @@ -30,11 +39,17 @@ class TestS3Select(TestCase): self.client.put_object( Bucket=self.bucket_name, Key="simple_csv", Body=SIMPLE_CSV ) + self.client.put_object( + Bucket=self.bucket_name, + Key="extensive.json", + Body=json.dumps(EXTENSIVE_JSON), + ) def tearDown(self) -> None: self.client.delete_object(Bucket=self.bucket_name, Key="list.json") self.client.delete_object(Bucket=self.bucket_name, Key="simple.json") self.client.delete_object(Bucket=self.bucket_name, Key="simple_csv") + self.client.delete_object(Bucket=self.bucket_name, Key="extensive.json") self.client.delete_bucket(Bucket=self.bucket_name) def test_query_all(self): @@ -119,3 +134,31 @@ class TestS3Select(TestCase): ) result = list(x["Payload"]) result.should.contain({"Records": {"Payload": b'{"_1":3},'}}) + + def test_extensive_json__select_list(self): + x = self.client.select_object_content( + Bucket=self.bucket_name, + Key="extensive.json", + Expression="select * from s3object[*].staff[*] s", + ExpressionType="SQL", + InputSerialization={"JSON": {"Type": "DOCUMENT"}}, + OutputSerialization={"JSON": {"RecordDelimiter": ","}}, + ) + result = list(x["Payload"]) + assert {"Records": {"Payload": b"{},"}} in result + + def test_extensive_json__select_all(self): + x = self.client.select_object_content( + Bucket=self.bucket_name, + Key="extensive.json", + Expression="select * from s3object s", + ExpressionType="SQL", + InputSerialization={"JSON": {"Type": "DOCUMENT"}}, + OutputSerialization={"JSON": {"RecordDelimiter": ","}}, + ) + result = list(x["Payload"]) + assert { + "Records": { + "Payload": b'{"_1":[{"staff":[{"name":"Janelyn M","city":"Chicago","kids":2},{"name":"Stacy P","city":"Seattle","kids":1}],"country":"USA"}]},' + } + } in result