S3 Select: Support nested from-clauses (#6348)

This commit is contained in:
Bert Blommers 2023-05-30 11:04:20 +00:00 committed by GitHub
parent 4abf251f2c
commit 85a069c0ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 6 deletions

View File

@ -52,7 +52,7 @@ all =
openapi-spec-validator>=0.2.8 openapi-spec-validator>=0.2.8
pyparsing>=3.0.7 pyparsing>=3.0.7
jsondiff>=1.1.2 jsondiff>=1.1.2
py-partiql-parser==0.3.1 py-partiql-parser==0.3.3
aws-xray-sdk!=0.96,>=0.93 aws-xray-sdk!=0.96,>=0.93
setuptools setuptools
server = server =
@ -66,7 +66,7 @@ server =
openapi-spec-validator>=0.2.8 openapi-spec-validator>=0.2.8
pyparsing>=3.0.7 pyparsing>=3.0.7
jsondiff>=1.1.2 jsondiff>=1.1.2
py-partiql-parser==0.3.1 py-partiql-parser==0.3.3
aws-xray-sdk!=0.96,>=0.93 aws-xray-sdk!=0.96,>=0.93
setuptools setuptools
flask!=2.2.0,!=2.2.1 flask!=2.2.0,!=2.2.1
@ -100,7 +100,7 @@ cloudformation =
openapi-spec-validator>=0.2.8 openapi-spec-validator>=0.2.8
pyparsing>=3.0.7 pyparsing>=3.0.7
jsondiff>=1.1.2 jsondiff>=1.1.2
py-partiql-parser==0.3.1 py-partiql-parser==0.3.3
aws-xray-sdk!=0.96,>=0.93 aws-xray-sdk!=0.96,>=0.93
setuptools setuptools
cloudfront = cloudfront =
@ -123,10 +123,10 @@ dms =
ds = sshpubkeys>=3.1.0 ds = sshpubkeys>=3.1.0
dynamodb = dynamodb =
docker>=3.0.0 docker>=3.0.0
py-partiql-parser==0.3.1 py-partiql-parser==0.3.3
dynamodbstreams = dynamodbstreams =
docker>=3.0.0 docker>=3.0.0
py-partiql-parser==0.3.1 py-partiql-parser==0.3.3
ebs = sshpubkeys>=3.1.0 ebs = sshpubkeys>=3.1.0
ec2 = sshpubkeys>=3.1.0 ec2 = sshpubkeys>=3.1.0
ec2instanceconnect = ec2instanceconnect =
@ -183,7 +183,7 @@ route53 =
route53resolver = sshpubkeys>=3.1.0 route53resolver = sshpubkeys>=3.1.0
s3 = s3 =
PyYAML>=5.1 PyYAML>=5.1
py-partiql-parser==0.3.1 py-partiql-parser==0.3.3
s3control = s3control =
sagemaker = sagemaker =
sdb = sdb =

View File

@ -8,6 +8,15 @@ from uuid import uuid4
SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None} SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None}
SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"} SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"}
EXTENSIVE_JSON = [
{
"staff": [
{"name": "Janelyn M", "city": "Chicago", "kids": 2},
{"name": "Stacy P", "city": "Seattle", "kids": 1},
],
"country": "USA",
}
]
SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2] SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2]
SIMPLE_CSV = """a,b,c SIMPLE_CSV = """a,b,c
e,r,f e,r,f
@ -30,11 +39,17 @@ class TestS3Select(TestCase):
self.client.put_object( self.client.put_object(
Bucket=self.bucket_name, Key="simple_csv", Body=SIMPLE_CSV Bucket=self.bucket_name, Key="simple_csv", Body=SIMPLE_CSV
) )
self.client.put_object(
Bucket=self.bucket_name,
Key="extensive.json",
Body=json.dumps(EXTENSIVE_JSON),
)
def tearDown(self) -> None: def tearDown(self) -> None:
self.client.delete_object(Bucket=self.bucket_name, Key="list.json") self.client.delete_object(Bucket=self.bucket_name, Key="list.json")
self.client.delete_object(Bucket=self.bucket_name, Key="simple.json") self.client.delete_object(Bucket=self.bucket_name, Key="simple.json")
self.client.delete_object(Bucket=self.bucket_name, Key="simple_csv") self.client.delete_object(Bucket=self.bucket_name, Key="simple_csv")
self.client.delete_object(Bucket=self.bucket_name, Key="extensive.json")
self.client.delete_bucket(Bucket=self.bucket_name) self.client.delete_bucket(Bucket=self.bucket_name)
def test_query_all(self): def test_query_all(self):
@ -119,3 +134,31 @@ class TestS3Select(TestCase):
) )
result = list(x["Payload"]) result = list(x["Payload"])
result.should.contain({"Records": {"Payload": b'{"_1":3},'}}) result.should.contain({"Records": {"Payload": b'{"_1":3},'}})
def test_extensive_json__select_list(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="extensive.json",
Expression="select * from s3object[*].staff[*] s",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
assert {"Records": {"Payload": b"{},"}} in result
def test_extensive_json__select_all(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="extensive.json",
Expression="select * from s3object s",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
assert {
"Records": {
"Payload": b'{"_1":[{"staff":[{"name":"Janelyn M","city":"Chicago","kids":2},{"name":"Stacy P","city":"Seattle","kids":1}],"country":"USA"}]},'
}
} in result