moto/tests/test_glue/test_datacatalog.py

1419 lines
47 KiB
Python

import sure # noqa # pylint: disable=unused-import
import re
import pytest
import json
import boto3
from botocore.client import ClientError
from datetime import datetime, timezone
from freezegun import freeze_time
from moto import mock_glue, settings
from moto.core import DEFAULT_ACCOUNT_ID as ACCOUNT_ID
from . import helpers
FROZEN_CREATE_TIME = datetime(2015, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
@mock_glue
@freeze_time(FROZEN_CREATE_TIME)
def test_create_database():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
database_catalog_id = ACCOUNT_ID
database_input = helpers.create_database_input(database_name)
helpers.create_database(client, database_name, database_input, database_catalog_id)
response = helpers.get_database(client, database_name)
database = response["Database"]
database.get("Name").should.equal(database_name)
database.get("Description").should.equal(database_input.get("Description"))
database.get("LocationUri").should.equal(database_input.get("LocationUri"))
database.get("Parameters").should.equal(database_input.get("Parameters"))
if not settings.TEST_SERVER_MODE:
database.get("CreateTime").timestamp().should.equal(
FROZEN_CREATE_TIME.timestamp()
)
database.get("CreateTableDefaultPermissions").should.equal(
database_input.get("CreateTableDefaultPermissions")
)
database.get("TargetDatabase").should.equal(database_input.get("TargetDatabase"))
database.get("CatalogId").should.equal(database_catalog_id)
@mock_glue
def test_create_database_already_exists():
client = boto3.client("glue", region_name="us-east-1")
database_name = "cantcreatethisdatabasetwice"
helpers.create_database(client, database_name)
with pytest.raises(ClientError) as exc:
helpers.create_database(client, database_name)
exc.value.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_database_not_exits():
client = boto3.client("glue", region_name="us-east-1")
database_name = "nosuchdatabase"
with pytest.raises(ClientError) as exc:
helpers.get_database(client, database_name)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Database nosuchdatabase not found"
)
@mock_glue
def test_get_databases_empty():
client = boto3.client("glue", region_name="us-east-1")
response = client.get_databases()
response["DatabaseList"].should.have.length_of(0)
@mock_glue
def test_get_databases_several_items():
client = boto3.client("glue", region_name="us-east-1")
database_name_1, database_name_2 = "firstdatabase", "seconddatabase"
helpers.create_database(client, database_name_1, {"Name": database_name_1})
helpers.create_database(client, database_name_2, {"Name": database_name_2})
database_list = sorted(
client.get_databases()["DatabaseList"], key=lambda x: x["Name"]
)
database_list.should.have.length_of(2)
database_list[0]["Name"].should.equal(database_name_1)
database_list[1]["Name"].should.equal(database_name_2)
@mock_glue
def test_update_database():
client = boto3.client("glue", region_name="us-east-1")
database_name = "existingdatabase"
database_catalog_id = ACCOUNT_ID
helpers.create_database(
client, database_name, {"Name": database_name}, database_catalog_id
)
response = helpers.get_database(client, database_name)
database = response["Database"]
database.get("CatalogId").should.equal(database_catalog_id)
database.get("Description").should.be.none
database.get("LocationUri").should.be.none
database_input = {
"Name": database_name,
"Description": "desc",
"LocationUri": "s3://bucket/existingdatabase/",
}
client.update_database(
CatalogId=database_catalog_id, Name=database_name, DatabaseInput=database_input
)
response = helpers.get_database(client, database_name)
database = response["Database"]
database.get("CatalogId").should.equal(database_catalog_id)
database.get("Description").should.equal("desc")
database.get("LocationUri").should.equal("s3://bucket/existingdatabase/")
@mock_glue
def test_update_unknown_database():
client = boto3.client("glue", region_name="us-east-1")
with pytest.raises(ClientError) as exc:
client.update_database(Name="x", DatabaseInput={"Name": "x"})
err = exc.value.response["Error"]
err["Code"].should.equal("EntityNotFoundException")
err["Message"].should.equal("Database x not found.")
@mock_glue
def test_delete_database():
client = boto3.client("glue", region_name="us-east-1")
database_name_1, database_name_2 = "firstdatabase", "seconddatabase"
helpers.create_database(client, database_name_1, {"Name": database_name_1})
helpers.create_database(client, database_name_2, {"Name": database_name_2})
client.delete_database(Name=database_name_1)
database_list = sorted(
client.get_databases()["DatabaseList"], key=lambda x: x["Name"]
)
[db["Name"] for db in database_list].should.equal([database_name_2])
@mock_glue
def test_delete_unknown_database():
client = boto3.client("glue", region_name="us-east-1")
with pytest.raises(ClientError) as exc:
client.delete_database(Name="x")
err = exc.value.response["Error"]
err["Code"].should.equal("EntityNotFoundException")
err["Message"].should.equal("Database x not found.")
@mock_glue
@freeze_time(FROZEN_CREATE_TIME)
def test_create_table():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = "myspecialtable"
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
response = helpers.get_table(client, database_name, table_name)
table = response["Table"]
if not settings.TEST_SERVER_MODE:
table["CreateTime"].timestamp().should.equal(FROZEN_CREATE_TIME.timestamp())
table["Name"].should.equal(table_input["Name"])
table["StorageDescriptor"].should.equal(table_input["StorageDescriptor"])
table["PartitionKeys"].should.equal(table_input["PartitionKeys"])
@mock_glue
def test_create_table_already_exists():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = "cantcreatethistabletwice"
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
helpers.create_table(client, database_name, table_name)
exc.value.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_tables():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_names = ["myfirsttable", "mysecondtable", "mythirdtable"]
table_inputs = {}
for table_name in table_names:
table_input = helpers.create_table_input(database_name, table_name)
table_inputs[table_name] = table_input
helpers.create_table(client, database_name, table_name, table_input)
response = helpers.get_tables(client, database_name)
tables = response["TableList"]
tables.should.have.length_of(3)
for table in tables:
table_name = table["Name"]
table_name.should.equal(table_inputs[table_name]["Name"])
table["StorageDescriptor"].should.equal(
table_inputs[table_name]["StorageDescriptor"]
)
table["PartitionKeys"].should.equal(table_inputs[table_name]["PartitionKeys"])
@mock_glue
def test_get_tables_expression():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_names = [
"mytableprefix_123",
"mytableprefix_something_test",
"something_mytablepostfix",
"test_catchthis123_test",
"asduas6781catchthisasdas",
"fakecatchthisfake",
"trailingtest.",
"trailingtest...",
]
table_inputs = {}
for table_name in table_names:
table_input = helpers.create_table_input(database_name, table_name)
table_inputs[table_name] = table_input
helpers.create_table(client, database_name, table_name, table_input)
prefix_expression = "mytableprefix_\\w+"
postfix_expression = "\\w+_mytablepostfix"
string_expression = "\\w+catchthis\\w+"
# even though * is an invalid regex, sadly glue api treats it as a glob-like wildcard
star_expression1 = "*"
star_expression2 = "mytable*"
star_expression3 = "*table*"
star_expression4 = "*catch*is*"
star_expression5 = ".*catch*is*"
star_expression6 = "trailing*.*"
response_prefix = helpers.get_tables(client, database_name, prefix_expression)
response_postfix = helpers.get_tables(client, database_name, postfix_expression)
response_string_match = helpers.get_tables(client, database_name, string_expression)
response_star_expression1 = helpers.get_tables(
client, database_name, star_expression1
)
response_star_expression2 = helpers.get_tables(
client, database_name, star_expression2
)
response_star_expression3 = helpers.get_tables(
client, database_name, star_expression3
)
response_star_expression4 = helpers.get_tables(
client, database_name, star_expression4
)
response_star_expression5 = helpers.get_tables(
client, database_name, star_expression5
)
response_star_expression6 = helpers.get_tables(
client, database_name, star_expression6
)
tables_prefix = response_prefix["TableList"]
tables_postfix = response_postfix["TableList"]
tables_string_match = response_string_match["TableList"]
tables_star_expression1 = response_star_expression1["TableList"]
tables_star_expression2 = response_star_expression2["TableList"]
tables_star_expression3 = response_star_expression3["TableList"]
tables_star_expression4 = response_star_expression4["TableList"]
tables_star_expression5 = response_star_expression5["TableList"]
tables_star_expression6 = response_star_expression6["TableList"]
tables_prefix.should.have.length_of(2)
tables_postfix.should.have.length_of(1)
tables_string_match.should.have.length_of(3)
tables_star_expression1.should.have.length_of(8)
tables_star_expression2.should.have.length_of(2)
tables_star_expression3.should.have.length_of(3)
tables_star_expression4.should.have.length_of(3)
tables_star_expression5.should.have.length_of(3)
tables_star_expression6.should.have.length_of(2)
@mock_glue
def test_get_table_versions():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = "myfirsttable"
version_inputs = {}
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
version_inputs["1"] = table_input
# Get table should retrieve the first version
table = client.get_table(DatabaseName=database_name, Name=table_name)["Table"]
table["StorageDescriptor"]["Columns"].should.equal([])
table["VersionId"].should.equal("1")
columns = [{"Name": "country", "Type": "string"}]
table_input = helpers.create_table_input(database_name, table_name, columns=columns)
helpers.update_table(client, database_name, table_name, table_input)
version_inputs["2"] = table_input
# Updateing with an identical input should still create a new version
helpers.update_table(client, database_name, table_name, table_input)
version_inputs["3"] = table_input
response = helpers.get_table_versions(client, database_name, table_name)
vers = response["TableVersions"]
vers.should.have.length_of(3)
vers[0]["Table"]["StorageDescriptor"]["Columns"].should.equal([])
vers[-1]["Table"]["StorageDescriptor"]["Columns"].should.equal(columns)
for n, ver in enumerate(vers):
n = str(n + 1)
ver["VersionId"].should.equal(n)
ver["Table"]["VersionId"].should.equal(n)
ver["Table"]["Name"].should.equal(table_name)
ver["Table"]["StorageDescriptor"].should.equal(
version_inputs[n]["StorageDescriptor"]
)
ver["Table"]["PartitionKeys"].should.equal(version_inputs[n]["PartitionKeys"])
ver["Table"].should.have.key("UpdateTime")
response = helpers.get_table_version(client, database_name, table_name, "3")
ver = response["TableVersion"]
ver["VersionId"].should.equal("3")
ver["Table"]["Name"].should.equal(table_name)
ver["Table"]["StorageDescriptor"]["Columns"].should.equal(columns)
# get_table should retrieve the latest version
table = client.get_table(DatabaseName=database_name, Name=table_name)["Table"]
table["StorageDescriptor"]["Columns"].should.equal(columns)
table["VersionId"].should.equal("3")
table = client.get_tables(DatabaseName=database_name)["TableList"][0]
table["StorageDescriptor"]["Columns"].should.equal(columns)
table["VersionId"].should.equal("3")
@mock_glue
def test_get_table_version_not_found():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
helpers.get_table_version(client, database_name, "myfirsttable", "20")
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match("version", re.I)
@mock_glue
def test_get_table_version_invalid_input():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
helpers.get_table_version(client, database_name, "myfirsttable", "10not-an-int")
exc.value.response["Error"]["Code"].should.equal("InvalidInputException")
@mock_glue
def test_delete_table_version():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = "myfirsttable"
version_inputs = {}
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
version_inputs["1"] = table_input
columns = [{"Name": "country", "Type": "string"}]
table_input = helpers.create_table_input(database_name, table_name, columns=columns)
helpers.update_table(client, database_name, table_name, table_input)
version_inputs["2"] = table_input
# Updateing with an identical input should still create a new version
helpers.update_table(client, database_name, table_name, table_input)
version_inputs["3"] = table_input
response = helpers.get_table_versions(client, database_name, table_name)
vers = response["TableVersions"]
vers.should.have.length_of(3)
client.delete_table_version(
DatabaseName=database_name, TableName=table_name, VersionId="2"
)
response = helpers.get_table_versions(client, database_name, table_name)
vers = response["TableVersions"]
vers.should.have.length_of(2)
[v["VersionId"] for v in vers].should.equal(["1", "3"])
@mock_glue
def test_get_table_not_exits():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
with pytest.raises(ClientError) as exc:
helpers.get_table(client, database_name, "myfirsttable")
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match("Table myfirsttable not found")
@mock_glue
def test_get_table_when_database_not_exits():
client = boto3.client("glue", region_name="us-east-1")
database_name = "nosuchdatabase"
with pytest.raises(ClientError) as exc:
helpers.get_table(client, database_name, "myfirsttable")
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Database nosuchdatabase not found"
)
@mock_glue
def test_delete_table():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = "myspecialtable"
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
result = client.delete_table(DatabaseName=database_name, Name=table_name)
result["ResponseMetadata"]["HTTPStatusCode"].should.equal(200)
# confirm table is deleted
with pytest.raises(ClientError) as exc:
helpers.get_table(client, database_name, table_name)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Table myspecialtable not found"
)
@mock_glue
def test_batch_delete_table():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
helpers.create_database(client, database_name)
table_name = "myspecialtable"
table_input = helpers.create_table_input(database_name, table_name)
helpers.create_table(client, database_name, table_name, table_input)
result = client.batch_delete_table(
DatabaseName=database_name, TablesToDelete=[table_name]
)
result["ResponseMetadata"]["HTTPStatusCode"].should.equal(200)
# confirm table is deleted
with pytest.raises(ClientError) as exc:
helpers.get_table(client, database_name, table_name)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Table myspecialtable not found"
)
@mock_glue
def test_get_partitions_empty():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
response["Partitions"].should.have.length_of(0)
@mock_glue
def test_create_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
before = datetime.now(timezone.utc)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
helpers.create_partition(client, database_name, table_name, part_input)
after = datetime.now(timezone.utc)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
partitions = response["Partitions"]
partitions.should.have.length_of(1)
partition = partitions[0]
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"].should.equal(part_input["StorageDescriptor"])
partition["Values"].should.equal(values)
partition["CreationTime"].should.be.greater_than(before)
partition["CreationTime"].should.be.lower_than(after)
@mock_glue
def test_create_partition_already_exist():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
helpers.create_partition(client, database_name, table_name, values=values)
with pytest.raises(ClientError) as exc:
helpers.create_partition(client, database_name, table_name, values=values)
exc.value.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_partition_not_found():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
helpers.get_partition(client, database_name, table_name, values)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match("partition")
@mock_glue
def test_batch_create_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
before = datetime.now(timezone.utc)
partition_inputs = []
for i in range(0, 20):
values = [f"2018-10-{i:2}"]
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
partition_inputs.append(part_input)
client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=partition_inputs,
)
after = datetime.now(timezone.utc)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
partitions = response["Partitions"]
partitions.should.have.length_of(20)
for idx, partition in enumerate(partitions):
partition_input = partition_inputs[idx]
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"].should.equal(
partition_input["StorageDescriptor"]
)
partition["Values"].should.equal(partition_input["Values"])
partition["CreationTime"].should.be.greater_than(before)
partition["CreationTime"].should.be.lower_than(after)
@mock_glue
def test_batch_create_partition_already_exist():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
helpers.create_partition(client, database_name, table_name, values=values)
partition_input = helpers.create_partition_input(
database_name, table_name, values=values
)
response = client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=[partition_input],
)
response.should.have.key("Errors")
response["Errors"].should.have.length_of(1)
response["Errors"][0]["PartitionValues"].should.equal(values)
response["Errors"][0]["ErrorDetail"]["ErrorCode"].should.equal(
"AlreadyExistsException"
)
@mock_glue
def test_get_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [["2018-10-01"], ["2018-09-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[1])
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values[1]
)
partition = response["Partition"]
partition["TableName"].should.equal(table_name)
partition["Values"].should.equal(values[1])
@mock_glue
def test_batch_get_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [["2018-10-01"], ["2018-09-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[1])
partitions_to_get = [{"Values": values[0]}, {"Values": values[1]}]
response = client.batch_get_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionsToGet=partitions_to_get,
)
partitions = response["Partitions"]
partitions.should.have.length_of(2)
partition = partitions[1]
partition["TableName"].should.equal(table_name)
partition["Values"].should.equal(values[1])
@mock_glue
def test_batch_get_partition_missing_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [["2018-10-01"], ["2018-09-01"], ["2018-08-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[2])
partitions_to_get = [
{"Values": values[0]},
{"Values": values[1]},
{"Values": values[2]},
]
response = client.batch_get_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionsToGet=partitions_to_get,
)
partitions = response["Partitions"]
partitions.should.have.length_of(2)
partitions[0]["Values"].should.equal(values[0])
partitions[1]["Values"].should.equal(values[2])
@mock_glue
def test_update_partition_not_found_moving():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
helpers.update_partition(
client,
database_name,
table_name,
old_values=["0000-00-00"],
values=["2018-10-02"],
)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match("partition")
@mock_glue
def test_update_partition_not_found_change_in_place():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
helpers.update_partition(
client, database_name, table_name, old_values=values, values=values
)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match("partition")
@mock_glue
def test_update_partition_cannot_overwrite():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
values = [["2018-10-01"], ["2018-09-01"]]
helpers.create_partition(client, database_name, table_name, values=values[0])
helpers.create_partition(client, database_name, table_name, values=values[1])
with pytest.raises(ClientError) as exc:
helpers.update_partition(
client, database_name, table_name, old_values=values[0], values=values[1]
)
exc.value.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_update_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
helpers.create_partition(client, database_name, table_name, values=values)
response = helpers.update_partition(
client,
database_name,
table_name,
old_values=values,
values=values,
columns=[{"Name": "country", "Type": "string"}],
)
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)
partition = response["Partition"]
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"]["Columns"].should.equal(
[{"Name": "country", "Type": "string"}]
)
@mock_glue
def test_update_partition_move():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
new_values = ["2018-09-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
helpers.create_partition(client, database_name, table_name, values=values)
response = helpers.update_partition(
client,
database_name,
table_name,
old_values=values,
values=new_values,
columns=[{"Name": "country", "Type": "string"}],
)
with pytest.raises(ClientError) as exc:
helpers.get_partition(client, database_name, table_name, values)
# Old partition shouldn't exist anymore
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=new_values
)
partition = response["Partition"]
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"]["Columns"].should.equal(
[{"Name": "country", "Type": "string"}]
)
@mock_glue
def test_batch_update_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = [
["2020-12-04"],
["2020-12-05"],
["2020-12-06"],
]
new_values = [
["2020-11-04"],
["2020-11-05"],
["2020-11-06"],
]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
batch_update_values = []
for idx, value in enumerate(values):
helpers.create_partition(client, database_name, table_name, values=value)
batch_update_values.append(
{
"PartitionValueList": value,
"PartitionInput": helpers.create_partition_input(
database_name,
table_name,
values=new_values[idx],
columns=[{"Name": "country", "Type": "string"}],
),
}
)
response = client.batch_update_partition(
DatabaseName=database_name, TableName=table_name, Entries=batch_update_values
)
for value in values:
with pytest.raises(ClientError) as exc:
helpers.get_partition(client, database_name, table_name, value)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
for value in new_values:
response = client.get_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=value
)
partition = response["Partition"]
partition["TableName"].should.equal(table_name)
partition["StorageDescriptor"]["Columns"].should.equal(
[{"Name": "country", "Type": "string"}]
)
@mock_glue
def test_batch_update_partition_missing_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = [
["2020-12-05"],
["2020-12-06"],
]
new_values = [
["2020-11-05"],
["2020-11-06"],
]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
batch_update_values = []
for idx, value in enumerate(values):
helpers.create_partition(client, database_name, table_name, values=value)
batch_update_values.append(
{
"PartitionValueList": value,
"PartitionInput": helpers.create_partition_input(
database_name,
table_name,
values=new_values[idx],
columns=[{"Name": "country", "Type": "string"}],
),
}
)
# add a non-existent partition to the batch update values
batch_update_values.append(
{
"PartitionValueList": ["2020-10-10"],
"PartitionInput": helpers.create_partition_input(
database_name,
table_name,
values=["2019-09-09"],
columns=[{"Name": "country", "Type": "string"}],
),
}
)
response = client.batch_update_partition(
DatabaseName=database_name, TableName=table_name, Entries=batch_update_values
)
response.should.have.key("Errors")
response["Errors"].should.have.length_of(1)
response["Errors"][0]["PartitionValueList"].should.equal(["2020-10-10"])
@mock_glue
def test_delete_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
helpers.create_partition(client, database_name, table_name, part_input)
client.delete_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)
response = client.get_partitions(DatabaseName=database_name, TableName=table_name)
partitions = response["Partitions"]
partitions.should.equal([])
@mock_glue
def test_delete_partition_bad_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
values = ["2018-10-01"]
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
with pytest.raises(ClientError) as exc:
client.delete_partition(
DatabaseName=database_name, TableName=table_name, PartitionValues=values
)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
@mock_glue
def test_batch_delete_partition():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
partition_inputs = []
for i in range(0, 20):
values = [f"2018-10-{i:2}"]
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
partition_inputs.append(part_input)
client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=partition_inputs,
)
partition_values = [{"Values": p["Values"]} for p in partition_inputs]
response = client.batch_delete_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionsToDelete=partition_values,
)
response.should_not.have.key("Errors")
@mock_glue
def test_batch_delete_partition_with_bad_partitions():
client = boto3.client("glue", region_name="us-east-1")
database_name = "myspecialdatabase"
table_name = "myfirsttable"
helpers.create_database(client, database_name)
helpers.create_table(client, database_name, table_name)
partition_inputs = []
for i in range(0, 20):
values = [f"2018-10-{i:2}"]
part_input = helpers.create_partition_input(
database_name, table_name, values=values
)
partition_inputs.append(part_input)
client.batch_create_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionInputList=partition_inputs,
)
partition_values = [{"Values": p["Values"]} for p in partition_inputs]
partition_values.insert(5, {"Values": ["2018-11-01"]})
partition_values.insert(10, {"Values": ["2018-11-02"]})
partition_values.insert(15, {"Values": ["2018-11-03"]})
response = client.batch_delete_partition(
DatabaseName=database_name,
TableName=table_name,
PartitionsToDelete=partition_values,
)
response.should.have.key("Errors")
response["Errors"].should.have.length_of(3)
error_partitions = map(lambda x: x["PartitionValues"], response["Errors"])
["2018-11-01"].should.be.within(error_partitions)
["2018-11-02"].should.be.within(error_partitions)
["2018-11-03"].should.be.within(error_partitions)
@mock_glue
@freeze_time(FROZEN_CREATE_TIME)
def test_create_crawler_scheduled():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
role = "arn:aws:iam::123456789012:role/Glue/Role"
database_name = "my_database_name"
description = "my crawler description"
targets = {
"S3Targets": [{"Path": "s3://my-source-bucket/"}],
"JdbcTargets": [],
"MongoDBTargets": [],
"DynamoDBTargets": [],
"CatalogTargets": [],
}
schedule = "cron(15 12 * * ? *)"
classifiers = []
table_prefix = "my_table_prefix_"
schema_change_policy = {
"UpdateBehavior": "LOG",
"DeleteBehavior": "LOG",
}
recrawl_policy = {"RecrawlBehavior": "CRAWL_NEW_FOLDERS_ONLY"}
lineage_configuration = {"CrawlerLineageSettings": "DISABLE"}
configuration = json.dumps(
{
"Version": 1.0,
"CrawlerOutput": {
"Partitions": {"AddOrUpdateBehavior": "InheritFromTable"},
},
"Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"},
}
)
crawler_security_configuration = "my_security_configuration"
tags = {"tag_key": "tag_value"}
helpers.create_crawler(
client,
name,
role,
targets,
database_name=database_name,
description=description,
schedule=schedule,
classifiers=classifiers,
table_prefix=table_prefix,
schema_change_policy=schema_change_policy,
recrawl_policy=recrawl_policy,
lineage_configuration=lineage_configuration,
configuration=configuration,
crawler_security_configuration=crawler_security_configuration,
tags=tags,
)
response = client.get_crawler(Name=name)
crawler = response["Crawler"]
crawler.get("Name").should.equal(name)
crawler.get("Role").should.equal(role)
crawler.get("DatabaseName").should.equal(database_name)
crawler.get("Description").should.equal(description)
crawler.get("Targets").should.equal(targets)
crawler.get("Schedule").should.equal(
{"ScheduleExpression": schedule, "State": "SCHEDULED"}
)
crawler.get("Classifiers").should.equal(classifiers)
crawler.get("TablePrefix").should.equal(table_prefix)
crawler.get("SchemaChangePolicy").should.equal(schema_change_policy)
crawler.get("RecrawlPolicy").should.equal(recrawl_policy)
crawler.get("LineageConfiguration").should.equal(lineage_configuration)
crawler.get("Configuration").should.equal(configuration)
crawler.get("CrawlerSecurityConfiguration").should.equal(
crawler_security_configuration
)
crawler.get("State").should.equal("READY")
crawler.get("CrawlElapsedTime").should.equal(0)
crawler.get("Version").should.equal(1)
if not settings.TEST_SERVER_MODE:
crawler.get("CreationTime").timestamp().should.equal(
FROZEN_CREATE_TIME.timestamp()
)
crawler.get("LastUpdated").timestamp().should.equal(
FROZEN_CREATE_TIME.timestamp()
)
crawler.should.not_have.key("LastCrawl")
@mock_glue
@freeze_time(FROZEN_CREATE_TIME)
def test_create_crawler_unscheduled():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
role = "arn:aws:iam::123456789012:role/Glue/Role"
database_name = "my_database_name"
description = "my crawler description"
targets = {
"S3Targets": [{"Path": "s3://my-source-bucket/"}],
"JdbcTargets": [],
"MongoDBTargets": [],
"DynamoDBTargets": [],
"CatalogTargets": [],
}
classifiers = []
table_prefix = "my_table_prefix_"
schema_change_policy = {
"UpdateBehavior": "LOG",
"DeleteBehavior": "LOG",
}
recrawl_policy = {"RecrawlBehavior": "CRAWL_NEW_FOLDERS_ONLY"}
lineage_configuration = {"CrawlerLineageSettings": "DISABLE"}
configuration = json.dumps(
{
"Version": 1.0,
"CrawlerOutput": {
"Partitions": {"AddOrUpdateBehavior": "InheritFromTable"},
},
"Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"},
}
)
crawler_security_configuration = "my_security_configuration"
tags = {"tag_key": "tag_value"}
helpers.create_crawler(
client,
name,
role,
targets,
database_name=database_name,
description=description,
classifiers=classifiers,
table_prefix=table_prefix,
schema_change_policy=schema_change_policy,
recrawl_policy=recrawl_policy,
lineage_configuration=lineage_configuration,
configuration=configuration,
crawler_security_configuration=crawler_security_configuration,
tags=tags,
)
response = client.get_crawler(Name=name)
crawler = response["Crawler"]
crawler.get("Name").should.equal(name)
crawler.get("Role").should.equal(role)
crawler.get("DatabaseName").should.equal(database_name)
crawler.get("Description").should.equal(description)
crawler.get("Targets").should.equal(targets)
crawler.should.not_have.key("Schedule")
crawler.get("Classifiers").should.equal(classifiers)
crawler.get("TablePrefix").should.equal(table_prefix)
crawler.get("SchemaChangePolicy").should.equal(schema_change_policy)
crawler.get("RecrawlPolicy").should.equal(recrawl_policy)
crawler.get("LineageConfiguration").should.equal(lineage_configuration)
crawler.get("Configuration").should.equal(configuration)
crawler.get("CrawlerSecurityConfiguration").should.equal(
crawler_security_configuration
)
crawler.get("State").should.equal("READY")
crawler.get("CrawlElapsedTime").should.equal(0)
crawler.get("Version").should.equal(1)
if not settings.TEST_SERVER_MODE:
crawler.get("CreationTime").timestamp().should.equal(
FROZEN_CREATE_TIME.timestamp()
)
crawler.get("LastUpdated").timestamp().should.equal(
FROZEN_CREATE_TIME.timestamp()
)
crawler.should.not_have.key("LastCrawl")
@mock_glue
def test_create_crawler_already_exists():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
helpers.create_crawler(client, name)
with pytest.raises(ClientError) as exc:
helpers.create_crawler(client, name)
exc.value.response["Error"]["Code"].should.equal("AlreadyExistsException")
@mock_glue
def test_get_crawler_not_exits():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
with pytest.raises(ClientError) as exc:
client.get_crawler(Name=name)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Crawler my_crawler_name not found"
)
@mock_glue
def test_get_crawlers_empty():
client = boto3.client("glue", region_name="us-east-1")
response = client.get_crawlers()
response["Crawlers"].should.have.length_of(0)
@mock_glue
def test_get_crawlers_several_items():
client = boto3.client("glue", region_name="us-east-1")
name_1, name_2 = "my_crawler_name_1", "my_crawler_name_2"
helpers.create_crawler(client, name_1)
helpers.create_crawler(client, name_2)
crawlers = sorted(client.get_crawlers()["Crawlers"], key=lambda x: x["Name"])
crawlers.should.have.length_of(2)
crawlers[0].get("Name").should.equal(name_1)
crawlers[1].get("Name").should.equal(name_2)
@mock_glue
def test_start_crawler():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
helpers.create_crawler(client, name)
client.start_crawler(Name=name)
response = client.get_crawler(Name=name)
crawler = response["Crawler"]
crawler.get("State").should.equal("RUNNING")
@mock_glue
def test_start_crawler_should_raise_exception_if_already_running():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
helpers.create_crawler(client, name)
client.start_crawler(Name=name)
with pytest.raises(ClientError) as exc:
client.start_crawler(Name=name)
exc.value.response["Error"]["Code"].should.equal("CrawlerRunningException")
@mock_glue
def test_stop_crawler():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
helpers.create_crawler(client, name)
client.start_crawler(Name=name)
client.stop_crawler(Name=name)
response = client.get_crawler(Name=name)
crawler = response["Crawler"]
crawler.get("State").should.equal("STOPPING")
@mock_glue
def test_stop_crawler_should_raise_exception_if_not_running():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
helpers.create_crawler(client, name)
with pytest.raises(ClientError) as exc:
client.stop_crawler(Name=name)
exc.value.response["Error"]["Code"].should.equal("CrawlerNotRunningException")
@mock_glue
def test_delete_crawler():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
helpers.create_crawler(client, name)
result = client.delete_crawler(Name=name)
result["ResponseMetadata"]["HTTPStatusCode"].should.equal(200)
# confirm crawler is deleted
with pytest.raises(ClientError) as exc:
client.get_crawler(Name=name)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Crawler my_crawler_name not found"
)
@mock_glue
def test_delete_crawler_not_exists():
client = boto3.client("glue", region_name="us-east-1")
name = "my_crawler_name"
with pytest.raises(ClientError) as exc:
client.delete_crawler(Name=name)
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
exc.value.response["Error"]["Message"].should.match(
"Crawler my_crawler_name not found"
)