2021-08-26 09:49:41 +00:00
|
|
|
import json
|
2023-11-30 15:55:51 +00:00
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
2018-07-10 17:50:47 +00:00
|
|
|
import boto3
|
2023-11-30 15:55:51 +00:00
|
|
|
import pytest
|
2018-07-11 15:39:40 +00:00
|
|
|
from botocore.client import ClientError
|
2021-01-11 13:10:18 +00:00
|
|
|
from freezegun import freeze_time
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2021-01-11 13:10:18 +00:00
|
|
|
from moto import mock_glue, settings
|
2022-08-13 09:49:43 +00:00
|
|
|
from moto.core import DEFAULT_ACCOUNT_ID as ACCOUNT_ID
|
2018-07-10 17:50:47 +00:00
|
|
|
|
2023-11-30 15:55:51 +00:00
|
|
|
from . import helpers
|
2018-07-10 17:50:47 +00:00
|
|
|
|
2023-02-09 13:42:16 +00:00
|
|
|
FROZEN_CREATE_TIME = datetime(2015, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
|
2021-01-11 13:10:18 +00:00
|
|
|
|
|
|
|
|
2018-07-10 17:50:47 +00:00
|
|
|
@mock_glue
|
2021-01-11 13:10:18 +00:00
|
|
|
@freeze_time(FROZEN_CREATE_TIME)
|
2018-07-10 17:50:47 +00:00
|
|
|
def test_create_database():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2022-07-27 12:38:08 +00:00
|
|
|
database_catalog_id = ACCOUNT_ID
|
2021-01-11 13:10:18 +00:00
|
|
|
database_input = helpers.create_database_input(database_name)
|
2022-07-27 12:38:08 +00:00
|
|
|
helpers.create_database(client, database_name, database_input, database_catalog_id)
|
2018-07-10 17:50:47 +00:00
|
|
|
|
2018-07-26 21:05:09 +00:00
|
|
|
response = helpers.get_database(client, database_name)
|
2019-10-31 15:44:26 +00:00
|
|
|
database = response["Database"]
|
2018-07-10 17:50:47 +00:00
|
|
|
|
2023-09-29 12:08:22 +00:00
|
|
|
assert database["Name"] == database_name
|
|
|
|
assert database["CatalogId"] == ACCOUNT_ID
|
2023-07-29 22:26:04 +00:00
|
|
|
assert database.get("Description") == database_input.get("Description")
|
|
|
|
assert database.get("LocationUri") == database_input.get("LocationUri")
|
|
|
|
assert database.get("Parameters") == database_input.get("Parameters")
|
2021-01-11 13:10:18 +00:00
|
|
|
if not settings.TEST_SERVER_MODE:
|
2023-07-29 22:26:04 +00:00
|
|
|
assert database["CreateTime"].timestamp() == FROZEN_CREATE_TIME.timestamp()
|
|
|
|
assert database["CreateTableDefaultPermissions"] == database_input.get(
|
|
|
|
"CreateTableDefaultPermissions"
|
2021-01-11 13:10:18 +00:00
|
|
|
)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert database.get("TargetDatabase") == database_input.get("TargetDatabase")
|
|
|
|
assert database.get("CatalogId") == database_catalog_id
|
2018-07-11 15:39:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_create_database_already_exists():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "cantcreatethisdatabasetwice"
|
2018-07-26 21:05:09 +00:00
|
|
|
helpers.create_database(client, database_name)
|
2018-07-11 15:39:40 +00:00
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2018-07-26 21:05:09 +00:00
|
|
|
helpers.create_database(client, database_name)
|
2018-07-11 15:39:40 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "AlreadyExistsException"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_database_not_exits():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "nosuchdatabase"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.get_database(client, database_name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert (
|
|
|
|
exc.value.response["Error"]["Message"] == "Database nosuchdatabase not found."
|
2019-10-31 15:44:26 +00:00
|
|
|
)
|
2018-07-26 21:05:09 +00:00
|
|
|
|
|
|
|
|
2020-04-21 18:10:39 +00:00
|
|
|
@mock_glue
|
2023-09-29 12:08:22 +00:00
|
|
|
def test_get_databases():
|
2020-04-21 18:10:39 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
response = client.get_databases()
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(response["DatabaseList"]) == 0
|
2020-04-21 18:10:39 +00:00
|
|
|
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name_1, database_name_2 = "firstdatabase", "seconddatabase"
|
|
|
|
|
2021-01-11 13:10:18 +00:00
|
|
|
helpers.create_database(client, database_name_1, {"Name": database_name_1})
|
|
|
|
helpers.create_database(client, database_name_2, {"Name": database_name_2})
|
2020-04-21 18:10:39 +00:00
|
|
|
|
2020-04-21 20:33:55 +00:00
|
|
|
database_list = sorted(
|
|
|
|
client.get_databases()["DatabaseList"], key=lambda x: x["Name"]
|
|
|
|
)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(database_list) == 2
|
|
|
|
assert database_list[0]["Name"] == database_name_1
|
2023-09-29 12:08:22 +00:00
|
|
|
assert database_list[0]["CatalogId"] == ACCOUNT_ID
|
2023-07-29 22:26:04 +00:00
|
|
|
assert database_list[1]["Name"] == database_name_2
|
2023-09-29 12:08:22 +00:00
|
|
|
assert database_list[1]["CatalogId"] == ACCOUNT_ID
|
2020-04-21 18:10:39 +00:00
|
|
|
|
|
|
|
|
2022-08-24 09:24:05 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_update_database():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "existingdatabase"
|
|
|
|
database_catalog_id = ACCOUNT_ID
|
|
|
|
helpers.create_database(
|
|
|
|
client, database_name, {"Name": database_name}, database_catalog_id
|
|
|
|
)
|
|
|
|
|
|
|
|
response = helpers.get_database(client, database_name)
|
|
|
|
database = response["Database"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert database.get("CatalogId") == database_catalog_id
|
|
|
|
assert database.get("Description") is None
|
|
|
|
assert database.get("LocationUri") is None
|
2022-08-24 09:24:05 +00:00
|
|
|
|
|
|
|
database_input = {
|
|
|
|
"Name": database_name,
|
|
|
|
"Description": "desc",
|
|
|
|
"LocationUri": "s3://bucket/existingdatabase/",
|
|
|
|
}
|
|
|
|
client.update_database(
|
|
|
|
CatalogId=database_catalog_id, Name=database_name, DatabaseInput=database_input
|
|
|
|
)
|
|
|
|
|
|
|
|
response = helpers.get_database(client, database_name)
|
|
|
|
database = response["Database"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert database.get("CatalogId") == database_catalog_id
|
|
|
|
assert database.get("Description") == "desc"
|
|
|
|
assert database.get("LocationUri") == "s3://bucket/existingdatabase/"
|
2022-08-24 09:24:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_update_unknown_database():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.update_database(Name="x", DatabaseInput={"Name": "x"})
|
|
|
|
err = exc.value.response["Error"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert err["Code"] == "EntityNotFoundException"
|
|
|
|
assert err["Message"] == "Database x not found."
|
2022-08-24 09:24:05 +00:00
|
|
|
|
|
|
|
|
2021-11-24 13:21:45 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_delete_database():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name_1, database_name_2 = "firstdatabase", "seconddatabase"
|
|
|
|
|
|
|
|
helpers.create_database(client, database_name_1, {"Name": database_name_1})
|
|
|
|
helpers.create_database(client, database_name_2, {"Name": database_name_2})
|
|
|
|
|
|
|
|
client.delete_database(Name=database_name_1)
|
|
|
|
|
|
|
|
database_list = sorted(
|
|
|
|
client.get_databases()["DatabaseList"], key=lambda x: x["Name"]
|
|
|
|
)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert [db["Name"] for db in database_list] == [database_name_2]
|
2021-11-24 13:21:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_delete_unknown_database():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.delete_database(Name="x")
|
|
|
|
err = exc.value.response["Error"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert err["Code"] == "EntityNotFoundException"
|
|
|
|
assert err["Message"] == "Database x not found."
|
2021-11-24 13:21:45 +00:00
|
|
|
|
|
|
|
|
2018-07-26 21:05:09 +00:00
|
|
|
@mock_glue
|
2022-06-20 22:29:16 +00:00
|
|
|
@freeze_time(FROZEN_CREATE_TIME)
|
2018-07-26 21:05:09 +00:00
|
|
|
def test_create_table():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2018-07-26 21:05:09 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
table_name = "myspecialtable"
|
2018-10-02 16:25:14 +00:00
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
2018-07-26 21:05:09 +00:00
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
|
|
|
|
response = helpers.get_table(client, database_name, table_name)
|
2019-10-31 15:44:26 +00:00
|
|
|
table = response["Table"]
|
2018-07-26 21:05:09 +00:00
|
|
|
|
2022-06-20 22:29:16 +00:00
|
|
|
if not settings.TEST_SERVER_MODE:
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table["CreateTime"].timestamp() == FROZEN_CREATE_TIME.timestamp()
|
2022-06-20 22:29:16 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table["Name"] == table_input["Name"]
|
|
|
|
assert table["StorageDescriptor"] == table_input["StorageDescriptor"]
|
|
|
|
assert table["PartitionKeys"] == table_input["PartitionKeys"]
|
2018-07-26 21:05:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_create_table_already_exists():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2018-07-26 21:05:09 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
table_name = "cantcreatethistabletwice"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_table(client, database_name, table_name)
|
2018-07-26 21:05:09 +00:00
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_table(client, database_name, table_name)
|
2018-07-26 21:05:09 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "AlreadyExistsException"
|
2018-07-26 21:05:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_tables():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2018-07-26 21:05:09 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
table_names = ["myfirsttable", "mysecondtable", "mythirdtable"]
|
2018-07-26 21:05:09 +00:00
|
|
|
table_inputs = {}
|
|
|
|
|
|
|
|
for table_name in table_names:
|
2018-10-02 16:25:14 +00:00
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
2018-07-26 21:05:09 +00:00
|
|
|
table_inputs[table_name] = table_input
|
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
|
|
|
|
response = helpers.get_tables(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
tables = response["TableList"]
|
2018-07-26 21:05:09 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(tables) == 3
|
2018-07-26 21:05:09 +00:00
|
|
|
|
|
|
|
for table in tables:
|
2019-10-31 15:44:26 +00:00
|
|
|
table_name = table["Name"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table_name == table_inputs[table_name]["Name"]
|
|
|
|
assert (
|
|
|
|
table["StorageDescriptor"] == table_inputs[table_name]["StorageDescriptor"]
|
2019-10-31 15:44:26 +00:00
|
|
|
)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table["PartitionKeys"] == table_inputs[table_name]["PartitionKeys"]
|
2023-09-29 12:08:22 +00:00
|
|
|
assert table["CatalogId"] == ACCOUNT_ID
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
2022-09-28 10:44:01 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_get_tables_expression():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
table_names = [
|
|
|
|
"mytableprefix_123",
|
|
|
|
"mytableprefix_something_test",
|
|
|
|
"something_mytablepostfix",
|
|
|
|
"test_catchthis123_test",
|
|
|
|
"asduas6781catchthisasdas",
|
|
|
|
"fakecatchthisfake",
|
2022-10-05 21:27:08 +00:00
|
|
|
"trailingtest.",
|
|
|
|
"trailingtest...",
|
2022-09-28 10:44:01 +00:00
|
|
|
]
|
|
|
|
table_inputs = {}
|
|
|
|
|
|
|
|
for table_name in table_names:
|
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
|
|
|
table_inputs[table_name] = table_input
|
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
|
|
|
|
prefix_expression = "mytableprefix_\\w+"
|
|
|
|
postfix_expression = "\\w+_mytablepostfix"
|
|
|
|
string_expression = "\\w+catchthis\\w+"
|
|
|
|
|
2022-10-05 21:27:08 +00:00
|
|
|
# even though * is an invalid regex, sadly glue api treats it as a glob-like wildcard
|
|
|
|
star_expression1 = "*"
|
|
|
|
star_expression2 = "mytable*"
|
|
|
|
star_expression3 = "*table*"
|
|
|
|
star_expression4 = "*catch*is*"
|
|
|
|
star_expression5 = ".*catch*is*"
|
|
|
|
star_expression6 = "trailing*.*"
|
|
|
|
|
2022-09-28 10:44:01 +00:00
|
|
|
response_prefix = helpers.get_tables(client, database_name, prefix_expression)
|
|
|
|
response_postfix = helpers.get_tables(client, database_name, postfix_expression)
|
|
|
|
response_string_match = helpers.get_tables(client, database_name, string_expression)
|
2022-10-05 21:27:08 +00:00
|
|
|
response_star_expression1 = helpers.get_tables(
|
|
|
|
client, database_name, star_expression1
|
|
|
|
)
|
|
|
|
response_star_expression2 = helpers.get_tables(
|
|
|
|
client, database_name, star_expression2
|
|
|
|
)
|
|
|
|
response_star_expression3 = helpers.get_tables(
|
|
|
|
client, database_name, star_expression3
|
|
|
|
)
|
|
|
|
response_star_expression4 = helpers.get_tables(
|
|
|
|
client, database_name, star_expression4
|
|
|
|
)
|
|
|
|
response_star_expression5 = helpers.get_tables(
|
|
|
|
client, database_name, star_expression5
|
|
|
|
)
|
|
|
|
response_star_expression6 = helpers.get_tables(
|
|
|
|
client, database_name, star_expression6
|
|
|
|
)
|
2022-09-28 10:44:01 +00:00
|
|
|
|
|
|
|
tables_prefix = response_prefix["TableList"]
|
|
|
|
tables_postfix = response_postfix["TableList"]
|
|
|
|
tables_string_match = response_string_match["TableList"]
|
2022-10-05 21:27:08 +00:00
|
|
|
tables_star_expression1 = response_star_expression1["TableList"]
|
|
|
|
tables_star_expression2 = response_star_expression2["TableList"]
|
|
|
|
tables_star_expression3 = response_star_expression3["TableList"]
|
|
|
|
tables_star_expression4 = response_star_expression4["TableList"]
|
|
|
|
tables_star_expression5 = response_star_expression5["TableList"]
|
|
|
|
tables_star_expression6 = response_star_expression6["TableList"]
|
2022-09-28 10:44:01 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(tables_prefix) == 2
|
|
|
|
assert len(tables_postfix) == 1
|
|
|
|
assert len(tables_string_match) == 3
|
|
|
|
assert len(tables_star_expression1) == 8
|
|
|
|
assert len(tables_star_expression2) == 2
|
|
|
|
assert len(tables_star_expression3) == 3
|
|
|
|
assert len(tables_star_expression4) == 3
|
|
|
|
assert len(tables_star_expression5) == 3
|
|
|
|
assert len(tables_star_expression6) == 2
|
2022-09-28 10:44:01 +00:00
|
|
|
|
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_get_table_versions():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
version_inputs = {}
|
|
|
|
|
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
version_inputs["1"] = table_input
|
|
|
|
|
2023-03-03 19:43:44 +00:00
|
|
|
# Get table should retrieve the first version
|
|
|
|
table = client.get_table(DatabaseName=database_name, Name=table_name)["Table"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table["StorageDescriptor"]["Columns"] == []
|
|
|
|
assert table["VersionId"] == "1"
|
2023-09-29 12:08:22 +00:00
|
|
|
assert table["CatalogId"] == ACCOUNT_ID
|
2023-03-03 19:43:44 +00:00
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
columns = [{"Name": "country", "Type": "string"}]
|
2018-10-02 16:25:14 +00:00
|
|
|
table_input = helpers.create_table_input(database_name, table_name, columns=columns)
|
|
|
|
helpers.update_table(client, database_name, table_name, table_input)
|
|
|
|
version_inputs["2"] = table_input
|
|
|
|
|
2020-01-20 23:21:11 +00:00
|
|
|
# Updateing with an identical input should still create a new version
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.update_table(client, database_name, table_name, table_input)
|
|
|
|
version_inputs["3"] = table_input
|
|
|
|
|
|
|
|
response = helpers.get_table_versions(client, database_name, table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
vers = response["TableVersions"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(vers) == 3
|
|
|
|
assert vers[0]["Table"]["StorageDescriptor"]["Columns"] == []
|
|
|
|
assert vers[-1]["Table"]["StorageDescriptor"]["Columns"] == columns
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
for n, ver in enumerate(vers):
|
|
|
|
n = str(n + 1)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert ver["VersionId"] == n
|
|
|
|
assert ver["Table"]["VersionId"] == n
|
|
|
|
assert ver["Table"]["Name"] == table_name
|
|
|
|
assert (
|
|
|
|
ver["Table"]["StorageDescriptor"] == version_inputs[n]["StorageDescriptor"]
|
2019-10-31 15:44:26 +00:00
|
|
|
)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert ver["Table"]["PartitionKeys"] == version_inputs[n]["PartitionKeys"]
|
|
|
|
assert "UpdateTime" in ver["Table"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
response = helpers.get_table_version(client, database_name, table_name, "3")
|
2019-10-31 15:44:26 +00:00
|
|
|
ver = response["TableVersion"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert ver["VersionId"] == "3"
|
|
|
|
assert ver["Table"]["Name"] == table_name
|
|
|
|
assert ver["Table"]["StorageDescriptor"]["Columns"] == columns
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-03-03 19:43:44 +00:00
|
|
|
# get_table should retrieve the latest version
|
|
|
|
table = client.get_table(DatabaseName=database_name, Name=table_name)["Table"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table["StorageDescriptor"]["Columns"] == columns
|
|
|
|
assert table["VersionId"] == "3"
|
2023-03-03 19:43:44 +00:00
|
|
|
|
|
|
|
table = client.get_tables(DatabaseName=database_name)["TableList"][0]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert table["StorageDescriptor"]["Columns"] == columns
|
|
|
|
assert table["VersionId"] == "3"
|
2023-03-03 19:43:44 +00:00
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_table_version_not_found():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.get_table_version(client, database_name, "myfirsttable", "20")
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert exc.value.response["Error"]["Message"] == "Version not found."
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_table_version_invalid_input():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.get_table_version(client, database_name, "myfirsttable", "10not-an-int")
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "InvalidInputException"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
2023-02-24 21:24:01 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_delete_table_version():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
version_inputs = {}
|
|
|
|
|
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
version_inputs["1"] = table_input
|
|
|
|
|
|
|
|
columns = [{"Name": "country", "Type": "string"}]
|
|
|
|
table_input = helpers.create_table_input(database_name, table_name, columns=columns)
|
|
|
|
helpers.update_table(client, database_name, table_name, table_input)
|
|
|
|
version_inputs["2"] = table_input
|
|
|
|
|
|
|
|
# Updateing with an identical input should still create a new version
|
|
|
|
helpers.update_table(client, database_name, table_name, table_input)
|
|
|
|
version_inputs["3"] = table_input
|
|
|
|
|
|
|
|
response = helpers.get_table_versions(client, database_name, table_name)
|
|
|
|
vers = response["TableVersions"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(vers) == 3
|
2023-02-24 21:24:01 +00:00
|
|
|
|
|
|
|
client.delete_table_version(
|
|
|
|
DatabaseName=database_name, TableName=table_name, VersionId="2"
|
|
|
|
)
|
|
|
|
|
|
|
|
response = helpers.get_table_versions(client, database_name, table_name)
|
|
|
|
vers = response["TableVersions"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(vers) == 2
|
|
|
|
assert [v["VersionId"] for v in vers] == ["1", "3"]
|
2023-02-24 21:24:01 +00:00
|
|
|
|
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_get_table_not_exits():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.get_table(client, database_name, "myfirsttable")
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert exc.value.response["Error"]["Message"] == "Table myfirsttable not found."
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_table_when_database_not_exits():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "nosuchdatabase"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.get_table(client, database_name, "myfirsttable")
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert (
|
|
|
|
exc.value.response["Error"]["Message"] == "Database nosuchdatabase not found."
|
2019-10-31 15:44:26 +00:00
|
|
|
)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
2019-05-25 09:58:41 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_delete_table():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2019-05-25 09:58:41 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
table_name = "myspecialtable"
|
2019-05-25 09:58:41 +00:00
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
|
|
|
|
result = client.delete_table(DatabaseName=database_name, Name=table_name)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert result["ResponseMetadata"]["HTTPStatusCode"] == 200
|
2019-05-25 09:58:41 +00:00
|
|
|
|
|
|
|
# confirm table is deleted
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-05-25 09:58:41 +00:00
|
|
|
helpers.get_table(client, database_name, table_name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert exc.value.response["Error"]["Message"] == "Table myspecialtable not found."
|
2019-10-31 15:44:26 +00:00
|
|
|
|
2019-05-25 09:58:41 +00:00
|
|
|
|
2019-06-10 19:14:30 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_batch_delete_table():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
2019-06-10 19:14:30 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
table_name = "myspecialtable"
|
2019-06-10 19:14:30 +00:00
|
|
|
table_input = helpers.create_table_input(database_name, table_name)
|
|
|
|
helpers.create_table(client, database_name, table_name, table_input)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
result = client.batch_delete_table(
|
|
|
|
DatabaseName=database_name, TablesToDelete=[table_name]
|
|
|
|
)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert result["ResponseMetadata"]["HTTPStatusCode"] == 200
|
2019-06-10 19:14:30 +00:00
|
|
|
|
|
|
|
# confirm table is deleted
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-06-10 19:14:30 +00:00
|
|
|
helpers.get_table(client, database_name, table_name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert exc.value.response["Error"]["Message"] == "Table myspecialtable not found."
|
2019-06-10 19:14:30 +00:00
|
|
|
|
2019-05-25 09:58:41 +00:00
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_get_partitions_empty():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(response["Partitions"]) == 0
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_create_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2022-12-09 23:56:08 +00:00
|
|
|
before = datetime.now(timezone.utc)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
part_input = helpers.create_partition_input(
|
|
|
|
database_name, table_name, values=values
|
|
|
|
)
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_partition(client, database_name, table_name, part_input)
|
|
|
|
|
2022-12-09 23:56:08 +00:00
|
|
|
after = datetime.now(timezone.utc)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partitions = response["Partitions"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(partitions) == 1
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
partition = partitions[0]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["StorageDescriptor"] == part_input["StorageDescriptor"]
|
|
|
|
assert partition["Values"] == values
|
|
|
|
assert partition["CreationTime"] > before
|
|
|
|
assert partition["CreationTime"] < after
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_create_partition_already_exist():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_partition(client, database_name, table_name, values=values)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "AlreadyExistsException"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_partition_not_found():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.get_partition(client, database_name, table_name, values)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert "partition" in exc.value.response["Error"]["Message"]
|
2019-10-31 15:44:26 +00:00
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2019-06-07 08:28:10 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_batch_create_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2019-06-07 08:28:10 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2022-12-09 23:56:08 +00:00
|
|
|
before = datetime.now(timezone.utc)
|
2019-06-07 08:28:10 +00:00
|
|
|
|
|
|
|
partition_inputs = []
|
|
|
|
for i in range(0, 20):
|
2022-11-17 22:41:08 +00:00
|
|
|
values = [f"2018-10-{i:2}"]
|
2019-10-31 15:44:26 +00:00
|
|
|
part_input = helpers.create_partition_input(
|
|
|
|
database_name, table_name, values=values
|
|
|
|
)
|
2019-06-07 08:28:10 +00:00
|
|
|
partition_inputs.append(part_input)
|
|
|
|
|
|
|
|
client.batch_create_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
2019-10-31 15:44:26 +00:00
|
|
|
PartitionInputList=partition_inputs,
|
2019-06-07 08:28:10 +00:00
|
|
|
)
|
|
|
|
|
2022-12-09 23:56:08 +00:00
|
|
|
after = datetime.now(timezone.utc)
|
2019-06-07 08:28:10 +00:00
|
|
|
|
|
|
|
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partitions = response["Partitions"]
|
2019-06-07 08:28:10 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(partitions) == 20
|
2019-06-07 08:28:10 +00:00
|
|
|
|
|
|
|
for idx, partition in enumerate(partitions):
|
|
|
|
partition_input = partition_inputs[idx]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["StorageDescriptor"] == partition_input["StorageDescriptor"]
|
|
|
|
assert partition["Values"] == partition_input["Values"]
|
|
|
|
assert partition["CreationTime"] > before
|
|
|
|
assert partition["CreationTime"] < after
|
2019-06-07 08:28:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_batch_create_partition_already_exist():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2019-06-07 08:28:10 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partition_input = helpers.create_partition_input(
|
|
|
|
database_name, table_name, values=values
|
|
|
|
)
|
2019-06-07 08:28:10 +00:00
|
|
|
|
|
|
|
response = client.batch_create_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
2019-10-31 15:44:26 +00:00
|
|
|
PartitionInputList=[partition_input],
|
2019-06-07 08:28:10 +00:00
|
|
|
)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(response["Errors"]) == 1
|
|
|
|
assert response["Errors"][0]["PartitionValues"] == values
|
|
|
|
assert response["Errors"][0]["ErrorDetail"]["ErrorCode"] == "AlreadyExistsException"
|
2019-06-07 08:28:10 +00:00
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
values = [["2018-10-01"], ["2018-09-01"]]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[0])
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[1])
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
response = client.get_partition(
|
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=values[1]
|
|
|
|
)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partition = response["Partition"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["Values"] == values[1]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
2019-07-17 19:07:19 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_batch_get_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2019-07-17 19:07:19 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
values = [["2018-10-01"], ["2018-09-01"]]
|
2019-07-17 19:07:19 +00:00
|
|
|
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[0])
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[1])
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partitions_to_get = [{"Values": values[0]}, {"Values": values[1]}]
|
|
|
|
response = client.batch_get_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
|
|
|
PartitionsToGet=partitions_to_get,
|
|
|
|
)
|
2019-07-17 19:07:19 +00:00
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partitions = response["Partitions"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(partitions) == 2
|
2019-07-17 19:07:19 +00:00
|
|
|
|
|
|
|
partition = partitions[1]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["Values"] == values[1]
|
2019-07-17 19:07:19 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_batch_get_partition_missing_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2019-07-17 19:07:19 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
values = [["2018-10-01"], ["2018-09-01"], ["2018-08-01"]]
|
2019-07-17 19:07:19 +00:00
|
|
|
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[0])
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[2])
|
|
|
|
|
|
|
|
partitions_to_get = [
|
2019-10-31 15:44:26 +00:00
|
|
|
{"Values": values[0]},
|
|
|
|
{"Values": values[1]},
|
|
|
|
{"Values": values[2]},
|
2019-07-17 19:07:19 +00:00
|
|
|
]
|
2019-10-31 15:44:26 +00:00
|
|
|
response = client.batch_get_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
|
|
|
PartitionsToGet=partitions_to_get,
|
|
|
|
)
|
2019-07-17 19:07:19 +00:00
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
partitions = response["Partitions"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(partitions) == 2
|
2019-07-17 19:07:19 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partitions[0]["Values"] == values[0]
|
|
|
|
assert partitions[1]["Values"] == values[2]
|
2019-07-17 19:07:19 +00:00
|
|
|
|
|
|
|
|
2018-10-02 16:25:14 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_update_partition_not_found_moving():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.update_partition(
|
|
|
|
client,
|
|
|
|
database_name,
|
|
|
|
table_name,
|
|
|
|
old_values=["0000-00-00"],
|
|
|
|
values=["2018-10-02"],
|
|
|
|
)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert "partition" in exc.value.response["Error"]["Message"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_update_partition_not_found_change_in_place():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.update_partition(
|
|
|
|
client, database_name, table_name, old_values=values, values=values
|
|
|
|
)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert "partition" in exc.value.response["Error"]["Message"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_update_partition_cannot_overwrite():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
values = [["2018-10-01"], ["2018-09-01"]]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[0])
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values[1])
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-10-31 15:44:26 +00:00
|
|
|
helpers.update_partition(
|
|
|
|
client, database_name, table_name, old_values=values[0], values=values[1]
|
|
|
|
)
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "AlreadyExistsException"
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_update_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values)
|
|
|
|
|
|
|
|
response = helpers.update_partition(
|
|
|
|
client,
|
|
|
|
database_name,
|
|
|
|
table_name,
|
|
|
|
old_values=values,
|
|
|
|
values=values,
|
2019-10-31 15:44:26 +00:00
|
|
|
columns=[{"Name": "country", "Type": "string"}],
|
2018-10-02 16:25:14 +00:00
|
|
|
)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
response = client.get_partition(
|
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=values
|
|
|
|
)
|
|
|
|
partition = response["Partition"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["StorageDescriptor"]["Columns"] == [
|
|
|
|
{"Name": "country", "Type": "string"}
|
|
|
|
]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_update_partition_move():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
|
|
|
new_values = ["2018-09-01"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=values)
|
|
|
|
|
|
|
|
response = helpers.update_partition(
|
|
|
|
client,
|
|
|
|
database_name,
|
|
|
|
table_name,
|
|
|
|
old_values=values,
|
|
|
|
values=new_values,
|
2019-10-31 15:44:26 +00:00
|
|
|
columns=[{"Name": "country", "Type": "string"}],
|
2018-10-02 16:25:14 +00:00
|
|
|
)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2018-10-02 16:25:14 +00:00
|
|
|
helpers.get_partition(client, database_name, table_name, values)
|
|
|
|
|
|
|
|
# Old partition shouldn't exist anymore
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
2019-10-31 15:44:26 +00:00
|
|
|
|
|
|
|
response = client.get_partition(
|
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=new_values
|
|
|
|
)
|
|
|
|
partition = response["Partition"]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["StorageDescriptor"]["Columns"] == [
|
|
|
|
{"Name": "country", "Type": "string"}
|
|
|
|
]
|
2018-10-02 16:25:14 +00:00
|
|
|
|
2019-06-11 19:14:28 +00:00
|
|
|
|
2020-12-10 20:03:37 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_batch_update_partition():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
|
|
|
|
values = [
|
|
|
|
["2020-12-04"],
|
|
|
|
["2020-12-05"],
|
|
|
|
["2020-12-06"],
|
|
|
|
]
|
|
|
|
|
|
|
|
new_values = [
|
|
|
|
["2020-11-04"],
|
|
|
|
["2020-11-05"],
|
|
|
|
["2020-11-06"],
|
|
|
|
]
|
|
|
|
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
batch_update_values = []
|
|
|
|
for idx, value in enumerate(values):
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=value)
|
|
|
|
batch_update_values.append(
|
|
|
|
{
|
|
|
|
"PartitionValueList": value,
|
|
|
|
"PartitionInput": helpers.create_partition_input(
|
|
|
|
database_name,
|
|
|
|
table_name,
|
|
|
|
values=new_values[idx],
|
|
|
|
columns=[{"Name": "country", "Type": "string"}],
|
|
|
|
),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
response = client.batch_update_partition(
|
2022-03-10 14:39:59 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, Entries=batch_update_values
|
2020-12-10 20:03:37 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
for value in values:
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
helpers.get_partition(client, database_name, table_name, value)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
2020-12-10 20:03:37 +00:00
|
|
|
|
|
|
|
for value in new_values:
|
|
|
|
response = client.get_partition(
|
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=value
|
|
|
|
)
|
|
|
|
partition = response["Partition"]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partition["TableName"] == table_name
|
|
|
|
assert partition["StorageDescriptor"]["Columns"] == [
|
|
|
|
{"Name": "country", "Type": "string"}
|
|
|
|
]
|
2020-12-10 20:03:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_batch_update_partition_missing_partition():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
|
|
|
|
values = [
|
|
|
|
["2020-12-05"],
|
|
|
|
["2020-12-06"],
|
|
|
|
]
|
|
|
|
|
|
|
|
new_values = [
|
|
|
|
["2020-11-05"],
|
|
|
|
["2020-11-06"],
|
|
|
|
]
|
|
|
|
|
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
batch_update_values = []
|
|
|
|
for idx, value in enumerate(values):
|
|
|
|
helpers.create_partition(client, database_name, table_name, values=value)
|
|
|
|
batch_update_values.append(
|
|
|
|
{
|
|
|
|
"PartitionValueList": value,
|
|
|
|
"PartitionInput": helpers.create_partition_input(
|
|
|
|
database_name,
|
|
|
|
table_name,
|
|
|
|
values=new_values[idx],
|
|
|
|
columns=[{"Name": "country", "Type": "string"}],
|
|
|
|
),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
# add a non-existent partition to the batch update values
|
|
|
|
batch_update_values.append(
|
|
|
|
{
|
|
|
|
"PartitionValueList": ["2020-10-10"],
|
|
|
|
"PartitionInput": helpers.create_partition_input(
|
|
|
|
database_name,
|
|
|
|
table_name,
|
|
|
|
values=["2019-09-09"],
|
|
|
|
columns=[{"Name": "country", "Type": "string"}],
|
|
|
|
),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
response = client.batch_update_partition(
|
2022-03-10 14:39:59 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, Entries=batch_update_values
|
2020-12-10 20:03:37 +00:00
|
|
|
)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(response["Errors"]) == 1
|
|
|
|
assert response["Errors"][0]["PartitionValueList"] == ["2020-10-10"]
|
2020-12-10 20:03:37 +00:00
|
|
|
|
|
|
|
|
2019-06-11 19:14:28 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_delete_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2019-06-11 19:14:28 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
part_input = helpers.create_partition_input(
|
|
|
|
database_name, table_name, values=values
|
|
|
|
)
|
2019-06-11 19:14:28 +00:00
|
|
|
helpers.create_partition(client, database_name, table_name, part_input)
|
|
|
|
|
|
|
|
client.delete_partition(
|
2019-10-31 15:44:26 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=values
|
2019-06-11 19:14:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
|
2019-10-31 15:44:26 +00:00
|
|
|
partitions = response["Partitions"]
|
2023-07-29 22:26:04 +00:00
|
|
|
assert partitions == []
|
2019-06-11 19:14:28 +00:00
|
|
|
|
2019-10-31 15:44:26 +00:00
|
|
|
|
2019-06-11 19:14:28 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_delete_partition_bad_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
|
|
|
values = ["2018-10-01"]
|
2019-06-11 19:14:28 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
2020-10-06 05:54:49 +00:00
|
|
|
with pytest.raises(ClientError) as exc:
|
2019-06-11 19:14:28 +00:00
|
|
|
client.delete_partition(
|
2019-10-31 15:44:26 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=values
|
2019-06-11 19:14:28 +00:00
|
|
|
)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
2019-10-31 15:44:26 +00:00
|
|
|
|
2019-06-11 19:14:28 +00:00
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_batch_delete_partition():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2019-06-11 19:14:28 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
partition_inputs = []
|
|
|
|
for i in range(0, 20):
|
2022-11-17 22:41:08 +00:00
|
|
|
values = [f"2018-10-{i:2}"]
|
2019-10-31 15:44:26 +00:00
|
|
|
part_input = helpers.create_partition_input(
|
|
|
|
database_name, table_name, values=values
|
|
|
|
)
|
2019-06-11 19:14:28 +00:00
|
|
|
partition_inputs.append(part_input)
|
|
|
|
|
|
|
|
client.batch_create_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
2019-10-31 15:44:26 +00:00
|
|
|
PartitionInputList=partition_inputs,
|
2019-06-11 19:14:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
partition_values = [{"Values": p["Values"]} for p in partition_inputs]
|
|
|
|
|
|
|
|
response = client.batch_delete_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
|
|
|
PartitionsToDelete=partition_values,
|
|
|
|
)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert "Errors" not in response
|
2019-10-31 15:44:26 +00:00
|
|
|
|
2019-06-11 19:14:28 +00:00
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_batch_delete_partition_with_bad_partitions():
|
2019-10-31 15:44:26 +00:00
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
database_name = "myspecialdatabase"
|
|
|
|
table_name = "myfirsttable"
|
2019-06-11 19:14:28 +00:00
|
|
|
helpers.create_database(client, database_name)
|
|
|
|
helpers.create_table(client, database_name, table_name)
|
|
|
|
|
|
|
|
partition_inputs = []
|
|
|
|
for i in range(0, 20):
|
2022-11-17 22:41:08 +00:00
|
|
|
values = [f"2018-10-{i:2}"]
|
2019-10-31 15:44:26 +00:00
|
|
|
part_input = helpers.create_partition_input(
|
|
|
|
database_name, table_name, values=values
|
|
|
|
)
|
2019-06-11 19:14:28 +00:00
|
|
|
partition_inputs.append(part_input)
|
|
|
|
|
|
|
|
client.batch_create_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
2019-10-31 15:44:26 +00:00
|
|
|
PartitionInputList=partition_inputs,
|
2019-06-11 19:14:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
partition_values = [{"Values": p["Values"]} for p in partition_inputs]
|
|
|
|
|
|
|
|
partition_values.insert(5, {"Values": ["2018-11-01"]})
|
|
|
|
partition_values.insert(10, {"Values": ["2018-11-02"]})
|
|
|
|
partition_values.insert(15, {"Values": ["2018-11-03"]})
|
|
|
|
|
|
|
|
response = client.batch_delete_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
|
|
|
PartitionsToDelete=partition_values,
|
|
|
|
)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(response["Errors"]) == 3
|
2019-10-31 15:44:26 +00:00
|
|
|
error_partitions = map(lambda x: x["PartitionValues"], response["Errors"])
|
2023-07-29 22:26:04 +00:00
|
|
|
assert ["2018-11-01"] in error_partitions
|
|
|
|
assert ["2018-11-02"] in error_partitions
|
|
|
|
assert ["2018-11-03"] in error_partitions
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
@freeze_time(FROZEN_CREATE_TIME)
|
|
|
|
def test_create_crawler_scheduled():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
role = "arn:aws:iam::123456789012:role/Glue/Role"
|
|
|
|
database_name = "my_database_name"
|
|
|
|
description = "my crawler description"
|
|
|
|
targets = {
|
|
|
|
"S3Targets": [{"Path": "s3://my-source-bucket/"}],
|
|
|
|
"JdbcTargets": [],
|
|
|
|
"MongoDBTargets": [],
|
|
|
|
"DynamoDBTargets": [],
|
|
|
|
"CatalogTargets": [],
|
|
|
|
}
|
|
|
|
schedule = "cron(15 12 * * ? *)"
|
|
|
|
classifiers = []
|
|
|
|
table_prefix = "my_table_prefix_"
|
|
|
|
schema_change_policy = {
|
|
|
|
"UpdateBehavior": "LOG",
|
|
|
|
"DeleteBehavior": "LOG",
|
|
|
|
}
|
|
|
|
recrawl_policy = {"RecrawlBehavior": "CRAWL_NEW_FOLDERS_ONLY"}
|
|
|
|
lineage_configuration = {"CrawlerLineageSettings": "DISABLE"}
|
|
|
|
configuration = json.dumps(
|
|
|
|
{
|
|
|
|
"Version": 1.0,
|
|
|
|
"CrawlerOutput": {
|
|
|
|
"Partitions": {"AddOrUpdateBehavior": "InheritFromTable"},
|
|
|
|
},
|
|
|
|
"Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
crawler_security_configuration = "my_security_configuration"
|
|
|
|
tags = {"tag_key": "tag_value"}
|
|
|
|
helpers.create_crawler(
|
|
|
|
client,
|
|
|
|
name,
|
|
|
|
role,
|
|
|
|
targets,
|
|
|
|
database_name=database_name,
|
|
|
|
description=description,
|
|
|
|
schedule=schedule,
|
|
|
|
classifiers=classifiers,
|
|
|
|
table_prefix=table_prefix,
|
|
|
|
schema_change_policy=schema_change_policy,
|
|
|
|
recrawl_policy=recrawl_policy,
|
|
|
|
lineage_configuration=lineage_configuration,
|
|
|
|
configuration=configuration,
|
|
|
|
crawler_security_configuration=crawler_security_configuration,
|
|
|
|
tags=tags,
|
|
|
|
)
|
|
|
|
|
|
|
|
response = client.get_crawler(Name=name)
|
|
|
|
crawler = response["Crawler"]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert crawler.get("Name") == name
|
|
|
|
assert crawler.get("Role") == role
|
|
|
|
assert crawler.get("DatabaseName") == database_name
|
|
|
|
assert crawler.get("Description") == description
|
|
|
|
assert crawler.get("Targets") == targets
|
|
|
|
assert crawler["Schedule"] == {"ScheduleExpression": schedule, "State": "SCHEDULED"}
|
|
|
|
assert crawler.get("Classifiers") == classifiers
|
|
|
|
assert crawler.get("TablePrefix") == table_prefix
|
|
|
|
assert crawler.get("SchemaChangePolicy") == schema_change_policy
|
|
|
|
assert crawler.get("RecrawlPolicy") == recrawl_policy
|
|
|
|
assert crawler.get("LineageConfiguration") == lineage_configuration
|
|
|
|
assert crawler.get("Configuration") == configuration
|
|
|
|
assert crawler["CrawlerSecurityConfiguration"] == crawler_security_configuration
|
|
|
|
|
|
|
|
assert crawler.get("State") == "READY"
|
|
|
|
assert crawler.get("CrawlElapsedTime") == 0
|
|
|
|
assert crawler.get("Version") == 1
|
2021-08-26 09:49:41 +00:00
|
|
|
if not settings.TEST_SERVER_MODE:
|
2023-07-29 22:26:04 +00:00
|
|
|
assert crawler["CreationTime"].timestamp() == FROZEN_CREATE_TIME.timestamp()
|
|
|
|
assert crawler["LastUpdated"].timestamp() == FROZEN_CREATE_TIME.timestamp()
|
2021-08-26 09:49:41 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert "LastCrawl" not in crawler
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
@freeze_time(FROZEN_CREATE_TIME)
|
|
|
|
def test_create_crawler_unscheduled():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
role = "arn:aws:iam::123456789012:role/Glue/Role"
|
|
|
|
database_name = "my_database_name"
|
|
|
|
description = "my crawler description"
|
|
|
|
targets = {
|
|
|
|
"S3Targets": [{"Path": "s3://my-source-bucket/"}],
|
|
|
|
"JdbcTargets": [],
|
|
|
|
"MongoDBTargets": [],
|
|
|
|
"DynamoDBTargets": [],
|
|
|
|
"CatalogTargets": [],
|
|
|
|
}
|
|
|
|
classifiers = []
|
|
|
|
table_prefix = "my_table_prefix_"
|
|
|
|
schema_change_policy = {
|
|
|
|
"UpdateBehavior": "LOG",
|
|
|
|
"DeleteBehavior": "LOG",
|
|
|
|
}
|
|
|
|
recrawl_policy = {"RecrawlBehavior": "CRAWL_NEW_FOLDERS_ONLY"}
|
|
|
|
lineage_configuration = {"CrawlerLineageSettings": "DISABLE"}
|
|
|
|
configuration = json.dumps(
|
|
|
|
{
|
|
|
|
"Version": 1.0,
|
|
|
|
"CrawlerOutput": {
|
|
|
|
"Partitions": {"AddOrUpdateBehavior": "InheritFromTable"},
|
|
|
|
},
|
|
|
|
"Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
crawler_security_configuration = "my_security_configuration"
|
|
|
|
tags = {"tag_key": "tag_value"}
|
|
|
|
helpers.create_crawler(
|
|
|
|
client,
|
|
|
|
name,
|
|
|
|
role,
|
|
|
|
targets,
|
|
|
|
database_name=database_name,
|
|
|
|
description=description,
|
|
|
|
classifiers=classifiers,
|
|
|
|
table_prefix=table_prefix,
|
|
|
|
schema_change_policy=schema_change_policy,
|
|
|
|
recrawl_policy=recrawl_policy,
|
|
|
|
lineage_configuration=lineage_configuration,
|
|
|
|
configuration=configuration,
|
|
|
|
crawler_security_configuration=crawler_security_configuration,
|
|
|
|
tags=tags,
|
|
|
|
)
|
|
|
|
|
|
|
|
response = client.get_crawler(Name=name)
|
|
|
|
crawler = response["Crawler"]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert crawler.get("Name") == name
|
|
|
|
assert crawler.get("Role") == role
|
|
|
|
assert crawler.get("DatabaseName") == database_name
|
|
|
|
assert crawler.get("Description") == description
|
|
|
|
assert crawler.get("Targets") == targets
|
|
|
|
assert "Schedule" not in crawler
|
|
|
|
assert crawler.get("Classifiers") == classifiers
|
|
|
|
assert crawler.get("TablePrefix") == table_prefix
|
|
|
|
assert crawler.get("SchemaChangePolicy") == schema_change_policy
|
|
|
|
assert crawler.get("RecrawlPolicy") == recrawl_policy
|
|
|
|
assert crawler.get("LineageConfiguration") == lineage_configuration
|
|
|
|
assert crawler.get("Configuration") == configuration
|
|
|
|
assert crawler["CrawlerSecurityConfiguration"] == crawler_security_configuration
|
|
|
|
|
|
|
|
assert crawler.get("State") == "READY"
|
|
|
|
assert crawler.get("CrawlElapsedTime") == 0
|
|
|
|
assert crawler.get("Version") == 1
|
2021-08-26 09:49:41 +00:00
|
|
|
if not settings.TEST_SERVER_MODE:
|
2023-07-29 22:26:04 +00:00
|
|
|
assert crawler["CreationTime"].timestamp() == FROZEN_CREATE_TIME.timestamp()
|
|
|
|
assert crawler["LastUpdated"].timestamp() == FROZEN_CREATE_TIME.timestamp()
|
2021-08-26 09:49:41 +00:00
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert "LastCrawl" not in crawler
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_create_crawler_already_exists():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "AlreadyExistsException"
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_crawler_not_exits():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.get_crawler(Name=name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert (
|
|
|
|
exc.value.response["Error"]["Message"] == "Crawler my_crawler_name not found."
|
2021-08-26 09:49:41 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_crawlers_empty():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
response = client.get_crawlers()
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(response["Crawlers"]) == 0
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_get_crawlers_several_items():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name_1, name_2 = "my_crawler_name_1", "my_crawler_name_2"
|
|
|
|
|
|
|
|
helpers.create_crawler(client, name_1)
|
|
|
|
helpers.create_crawler(client, name_2)
|
|
|
|
|
|
|
|
crawlers = sorted(client.get_crawlers()["Crawlers"], key=lambda x: x["Name"])
|
2023-07-29 22:26:04 +00:00
|
|
|
assert len(crawlers) == 2
|
|
|
|
assert crawlers[0].get("Name") == name_1
|
|
|
|
assert crawlers[1].get("Name") == name_2
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
2021-10-28 21:20:08 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_start_crawler():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
|
|
|
|
client.start_crawler(Name=name)
|
|
|
|
|
|
|
|
response = client.get_crawler(Name=name)
|
|
|
|
crawler = response["Crawler"]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert crawler.get("State") == "RUNNING"
|
2021-10-28 21:20:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_start_crawler_should_raise_exception_if_already_running():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
|
|
|
|
client.start_crawler(Name=name)
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.start_crawler(Name=name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "CrawlerRunningException"
|
2021-10-28 21:20:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_stop_crawler():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
client.start_crawler(Name=name)
|
|
|
|
|
|
|
|
client.stop_crawler(Name=name)
|
|
|
|
|
|
|
|
response = client.get_crawler(Name=name)
|
|
|
|
crawler = response["Crawler"]
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert crawler.get("State") == "STOPPING"
|
2021-10-28 21:20:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_stop_crawler_should_raise_exception_if_not_running():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.stop_crawler(Name=name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "CrawlerNotRunningException"
|
2021-10-28 21:20:08 +00:00
|
|
|
|
|
|
|
|
2021-08-26 09:49:41 +00:00
|
|
|
@mock_glue
|
|
|
|
def test_delete_crawler():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
helpers.create_crawler(client, name)
|
|
|
|
|
|
|
|
result = client.delete_crawler(Name=name)
|
2023-07-29 22:26:04 +00:00
|
|
|
assert result["ResponseMetadata"]["HTTPStatusCode"] == 200
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
# confirm crawler is deleted
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.get_crawler(Name=name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert (
|
|
|
|
exc.value.response["Error"]["Message"] == "Crawler my_crawler_name not found."
|
2021-08-26 09:49:41 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@mock_glue
|
|
|
|
def test_delete_crawler_not_exists():
|
|
|
|
client = boto3.client("glue", region_name="us-east-1")
|
|
|
|
name = "my_crawler_name"
|
|
|
|
|
|
|
|
with pytest.raises(ClientError) as exc:
|
|
|
|
client.delete_crawler(Name=name)
|
|
|
|
|
2023-07-29 22:26:04 +00:00
|
|
|
assert exc.value.response["Error"]["Code"] == "EntityNotFoundException"
|
|
|
|
assert (
|
|
|
|
exc.value.response["Error"]["Message"] == "Crawler my_crawler_name not found."
|
2021-08-26 09:49:41 +00:00
|
|
|
)
|