moto/tests/test_s3/test_s3_select.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

178 lines
6.4 KiB
Python
Raw Normal View History

import json
from unittest import TestCase
from uuid import uuid4
import boto3
import pytest
from moto import mock_s3
2023-05-25 16:37:45 +00:00
SIMPLE_JSON = {"a1": "b1", "a2": "b2", "a3": None}
SIMPLE_JSON2 = {"a1": "b2", "a3": "b3"}
EXTENSIVE_JSON = [
{
"staff": [
{"name": "Janelyn M", "city": "Chicago", "kids": 2},
{"name": "Stacy P", "city": "Seattle", "kids": 1},
],
"country": "USA",
}
]
SIMPLE_LIST = [SIMPLE_JSON, SIMPLE_JSON2]
SIMPLE_CSV = """a,b,c
e,r,f
y,u,i
q,w,y"""
@mock_s3
class TestS3Select(TestCase):
def setUp(self) -> None:
self.client = boto3.client("s3", "us-east-1")
self.bucket_name = str(uuid4())
self.client.create_bucket(Bucket=self.bucket_name)
self.client.put_object(
Bucket=self.bucket_name, Key="simple.json", Body=json.dumps(SIMPLE_JSON)
)
self.client.put_object(
Bucket=self.bucket_name, Key="list.json", Body=json.dumps(SIMPLE_LIST)
)
self.client.put_object(
Bucket=self.bucket_name, Key="simple_csv", Body=SIMPLE_CSV
)
self.client.put_object(
Bucket=self.bucket_name,
Key="extensive.json",
Body=json.dumps(EXTENSIVE_JSON),
)
def tearDown(self) -> None:
self.client.delete_object(Bucket=self.bucket_name, Key="list.json")
self.client.delete_object(Bucket=self.bucket_name, Key="simple.json")
self.client.delete_object(Bucket=self.bucket_name, Key="simple_csv")
self.client.delete_object(Bucket=self.bucket_name, Key="extensive.json")
self.client.delete_bucket(Bucket=self.bucket_name)
def test_query_all(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple.json",
Expression="SELECT * FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b'{"a1":"b1","a2":"b2","a3":null},'}} in result
2023-05-25 16:37:45 +00:00
# Verify result is valid JSON
json.loads(result[0]["Records"]["Payload"][0:-1].decode("utf-8"))
# Verify result contains metadata
assert {
"Stats": {
"Details": {
"BytesScanned": 24,
"BytesProcessed": 24,
"BytesReturned": 22,
}
}
} in result
assert {"End": {}} in result
def test_count_function(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple.json",
Expression="SELECT count(*) FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b'{"_1":1},'}} in result
@pytest.mark.xfail(message="Not yet implement in our parser")
def test_count_as(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple.json",
Expression="SELECT count(*) as cnt FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b'{"cnt":1},'}} in result
@pytest.mark.xfail(message="Not yet implement in our parser")
def test_count_list_as(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="list.json",
Expression="SELECT count(*) as cnt FROM S3Object",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b'{"cnt":1},'}} in result
def test_count_csv(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple_csv",
Expression="SELECT count(*) FROM S3Object",
ExpressionType="SQL",
InputSerialization={
"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}
},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b'{"_1":3},'}} in result
def test_default_record_delimiter(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="simple_csv",
Expression="SELECT count(*) FROM S3Object",
ExpressionType="SQL",
InputSerialization={
"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}
},
# RecordDelimiter is not specified - should default to new line (\n)
OutputSerialization={"JSON": {}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b'{"_1":3}\n'}} in result
def test_extensive_json__select_list(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="extensive.json",
Expression="select * from s3object[*].staff[*] s",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {"Records": {"Payload": b"{},"}} in result
def test_extensive_json__select_all(self):
content = self.client.select_object_content(
Bucket=self.bucket_name,
Key="extensive.json",
Expression="select * from s3object s",
ExpressionType="SQL",
InputSerialization={"JSON": {"Type": "DOCUMENT"}},
OutputSerialization={"JSON": {"RecordDelimiter": ","}},
)
result = list(content["Payload"])
assert {
"Records": {
"Payload": b'{"_1":[{"staff":[{"name":"Janelyn M","city":"Chicago","kids":2},{"name":"Stacy P","city":"Seattle","kids":1}],"country":"USA"}]},'
}
} in result