moto/tests/test_s3/test_s3_select.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

165 lines
5.9 KiB
Python
Raw Normal View History

import boto3
import json
import pytest
from moto import mock_s3
from unittest import TestCase
from uuid import uuid4
2023-05-25 16:37:45 +00:00
SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None}
SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"}
EXTENSIVE_JSON = [
{
"staff": [
{"name": "Janelyn M", "city": "Chicago", "kids": 2},
{"name": "Stacy P", "city": "Seattle", "kids": 1},
],
"country": "USA",
}
]
SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2]
SIMPLE_CSV = """a,b,c
e,r,f
y,u,i
q,w,y"""
@mock_s3
class TestS3Select(TestCase):
def setUp(self) -> None:
self.client = boto3.client("s3", "us-east-1")
self.bucket_name = str(uuid4())
self.client.create_bucket(Bucket=self.bucket_name)
self.client.put_object(
Bucket=self.bucket_name, Key="simple.json", Body=json.dumps(SIMPLE_JSON)
)
self.client.put_object(
Bucket=self.bucket_name, Key="list.json", Body=json.dumps(SIMPLE_LIST)
)
self.client.put_object(
Bucket=self.bucket_name, Key="simple_csv", Body=SIMPLE_CSV
)
self.client.put_object(
Bucket=self.bucket_name,
Key="extensive.json",
Body=json.dumps(EXTENSIVE_JSON),
)
def tearDown(self) -> None:
self.client.delete_object(Bucket=self.bucket_name, Key="list.json")
self.client.delete_object(Bucket=self.bucket_name, Key="simple.json")
self.client.delete_object(Bucket=self.bucket_name, Key="simple_csv")
self.client.delete_object(Bucket=self.bucket_name, Key="extensive.json")
self.client.delete_bucket(Bucket=self.bucket_name)
def test_query_all(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple.json",
Expression="SELECT * FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
2023-05-25 16:37:45 +00:00
result.should.contain(
{"Records": {"Payload": b'{"a1":"b1","a2":"b2","a3":null},'}}
)
# Verify result is valid JSON
json.loads(result[0]["Records"]["Payload"][0:-1].decode("utf-8"))
# Verify result contains metadata
result.should.contain(
{
"Stats": {
"Details": {
"BytesScanned": 24,
"BytesProcessed": 24,
"BytesReturned": 22,
}
}
}
)
result.should.contain({"End": {}})
def test_count_function(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple.json",
Expression="SELECT count(*) FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
result.should.contain({"Records": {"Payload": b'{"_1":1},'}})
@pytest.mark.xfail(message="Not yet implement in our parser")
def test_count_as(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple.json",
Expression="SELECT count(*) as cnt FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
result.should.contain({"Records": {"Payload": b'{"cnt":1},'}})
@pytest.mark.xfail(message="Not yet implement in our parser")
def test_count_list_as(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="list.json",
Expression="SELECT count(*) as cnt FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
result.should.contain({"Records": {"Payload": b'{"cnt":1},'}})
def test_count_csv(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple_csv",
Expression="SELECT count(*) FROM S3Object",
ExpressionType="SQL",
InputSerialization={
"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}
},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
result.should.contain({"Records": {"Payload": b'{"_1":3},'}})
def test_extensive_json__select_list(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="extensive.json",
Expression="select * from s3object[*].staff[*] s",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
assert {"Records": {"Payload": b"{},"}} in result
def test_extensive_json__select_all(self):
x = self.client.select_object_content(
Bucket=self.bucket_name,
Key="extensive.json",
Expression="select * from s3object s",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(x["Payload"])
assert {
"Records": {
"Payload": b'{"_1":[{"staff":[{"name":"Janelyn M","city":"Chicago","kids":2},{"name":"Stacy P","city":"Seattle","kids":1}],"country":"USA"}]},'
}
} in result