2018-12-21 11:28:56 +00:00
|
|
|
import copy
|
|
|
|
|
2023-11-30 15:55:51 +00:00
|
|
|
from .fixtures.datacatalog import DATABASE_INPUT, PARTITION_INPUT, TABLE_INPUT
|
2022-08-04 15:48:08 +00:00
|
|
|
from .fixtures.schema_registry import (
|
2022-08-05 20:59:01 +00:00
|
|
|
TEST_AVRO_DATA_FORMAT,
|
|
|
|
TEST_AVRO_SCHEMA_DEFINITION,
|
2023-11-30 15:55:51 +00:00
|
|
|
TEST_BACKWARD_COMPATIBILITY,
|
2022-08-05 20:59:01 +00:00
|
|
|
TEST_NEW_AVRO_SCHEMA_DEFINITION,
|
2023-11-30 15:55:51 +00:00
|
|
|
TEST_REGISTRY_NAME,
|
|
|
|
TEST_SCHEMA_ID,
|
|
|
|
TEST_SCHEMA_NAME,
|
2022-08-04 15:48:08 +00:00
|
|
|
)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
2021-01-11 13:10:18 +00:00
|
|
|
def create_database_input(database_name):
|
|
|
|
database_input = copy.deepcopy(DATABASE_INPUT)
|
|
|
|
database_input["Name"] = database_name
|
2022-11-17 22:41:08 +00:00
|
|
|
database_input["LocationUri"] = f"s3://my-bucket/{database_name}"
|
2021-01-11 13:10:18 +00:00
|
|
|
return database_input
|
|
|
|
|
|
|
|
|
2024-02-09 22:21:39 +00:00
|
|
|
def create_database(
|
|
|
|
client, database_name, database_input=None, catalog_id=None, tags=None
|
|
|
|
):
|
2021-01-11 13:10:18 +00:00
|
|
|
if database_input is None:
|
|
|
|
database_input = create_database_input(database_name)
|
2022-07-27 12:38:08 +00:00
|
|
|
|
|
|
|
database_kwargs = {"DatabaseInput": database_input}
|
|
|
|
if catalog_id is not None:
|
|
|
|
database_kwargs["CatalogId"] = catalog_id
|
2024-02-09 22:21:39 +00:00
|
|
|
if tags is not None:
|
|
|
|
database_kwargs["Tags"] = tags
|
2022-07-27 12:38:08 +00:00
|
|
|
return client.create_database(**database_kwargs)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_database(client, database_name):
|
|
|
|
return client.get_database(Name=database_name)
|
|
|
|
|
|
|
|
|
2021-10-18 19:44:29 +00:00
|
|
|
def create_table_input(database_name, table_name, columns=None, partition_keys=None):
|
2018-12-21 11:28:56 +00:00
|
|
|
table_input = copy.deepcopy(TABLE_INPUT)
|
2019-12-23 07:38:53 +00:00
|
|
|
table_input["Name"] = table_name
|
2021-10-18 19:44:29 +00:00
|
|
|
table_input["PartitionKeys"] = partition_keys or []
|
|
|
|
table_input["StorageDescriptor"]["Columns"] = columns or []
|
2019-12-23 07:38:53 +00:00
|
|
|
table_input["StorageDescriptor"]["Location"] = (
|
2022-11-17 22:41:08 +00:00
|
|
|
f"s3://my-bucket/{database_name}/{table_name}"
|
2024-03-21 20:17:29 +00:00
|
|
|
)
|
2018-12-21 11:28:56 +00:00
|
|
|
return table_input
|
|
|
|
|
|
|
|
|
|
|
|
def create_table(client, database_name, table_name, table_input=None, **kwargs):
|
|
|
|
if table_input is None:
|
|
|
|
table_input = create_table_input(database_name, table_name, **kwargs)
|
|
|
|
|
2019-12-23 07:38:53 +00:00
|
|
|
return client.create_table(DatabaseName=database_name, TableInput=table_input)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def update_table(client, database_name, table_name, table_input=None, **kwargs):
|
|
|
|
if table_input is None:
|
|
|
|
table_input = create_table_input(database_name, table_name, **kwargs)
|
|
|
|
|
2019-12-23 07:38:53 +00:00
|
|
|
return client.update_table(DatabaseName=database_name, TableInput=table_input)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_table(client, database_name, table_name):
|
2019-12-23 07:38:53 +00:00
|
|
|
return client.get_table(DatabaseName=database_name, Name=table_name)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
2022-09-28 10:44:01 +00:00
|
|
|
def get_tables(client, database_name, expression=None):
|
|
|
|
if expression:
|
|
|
|
return client.get_tables(DatabaseName=database_name, Expression=expression)
|
|
|
|
else:
|
|
|
|
return client.get_tables(DatabaseName=database_name)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_table_versions(client, database_name, table_name):
|
2019-12-23 07:38:53 +00:00
|
|
|
return client.get_table_versions(DatabaseName=database_name, TableName=table_name)
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_table_version(client, database_name, table_name, version_id):
|
|
|
|
return client.get_table_version(
|
2019-12-23 07:38:53 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, VersionId=version_id
|
2018-12-21 11:28:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-04-14 19:37:10 +00:00
|
|
|
def create_column(name, type_, comment=None, parameters=None):
|
|
|
|
column = {"Name": name, "Type": type_}
|
|
|
|
if comment is not None:
|
|
|
|
column["Comment"] = comment
|
|
|
|
if parameters is not None:
|
|
|
|
column["Parameters"] = parameters
|
|
|
|
return column
|
|
|
|
|
|
|
|
|
2021-10-18 19:44:29 +00:00
|
|
|
def create_partition_input(database_name, table_name, values=None, columns=None):
|
2022-11-17 22:41:08 +00:00
|
|
|
root_path = f"s3://my-bucket/{database_name}/{table_name}"
|
2018-12-21 11:28:56 +00:00
|
|
|
|
|
|
|
part_input = copy.deepcopy(PARTITION_INPUT)
|
2021-10-18 19:44:29 +00:00
|
|
|
part_input["Values"] = values or []
|
|
|
|
part_input["StorageDescriptor"]["Columns"] = columns or []
|
2019-12-23 07:38:53 +00:00
|
|
|
part_input["StorageDescriptor"]["SerdeInfo"]["Parameters"]["path"] = root_path
|
2018-12-21 11:28:56 +00:00
|
|
|
return part_input
|
|
|
|
|
|
|
|
|
|
|
|
def create_partition(client, database_name, table_name, partiton_input=None, **kwargs):
|
|
|
|
if partiton_input is None:
|
|
|
|
partiton_input = create_partition_input(database_name, table_name, **kwargs)
|
|
|
|
return client.create_partition(
|
2019-12-23 07:38:53 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionInput=partiton_input
|
2018-12-21 11:28:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2019-12-23 07:38:53 +00:00
|
|
|
def update_partition(
|
2021-10-18 19:44:29 +00:00
|
|
|
client, database_name, table_name, old_values=None, partiton_input=None, **kwargs
|
2019-12-23 07:38:53 +00:00
|
|
|
):
|
2018-12-21 11:28:56 +00:00
|
|
|
if partiton_input is None:
|
|
|
|
partiton_input = create_partition_input(database_name, table_name, **kwargs)
|
|
|
|
return client.update_partition(
|
|
|
|
DatabaseName=database_name,
|
|
|
|
TableName=table_name,
|
|
|
|
PartitionInput=partiton_input,
|
2021-10-18 19:44:29 +00:00
|
|
|
PartitionValueList=old_values or [],
|
2018-12-21 11:28:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def get_partition(client, database_name, table_name, values):
|
|
|
|
return client.get_partition(
|
2019-12-23 07:38:53 +00:00
|
|
|
DatabaseName=database_name, TableName=table_name, PartitionValues=values
|
2018-12-21 11:28:56 +00:00
|
|
|
)
|
2021-08-26 09:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def create_crawler(
|
|
|
|
client, crawler_name, crawler_role=None, crawler_targets=None, **kwargs
|
|
|
|
):
|
|
|
|
optional_param_map = {
|
|
|
|
"database_name": "DatabaseName",
|
|
|
|
"description": "Description",
|
|
|
|
"schedule": "Schedule",
|
|
|
|
"classifiers": "Classifiers",
|
|
|
|
"table_prefix": "TablePrefix",
|
|
|
|
"schema_change_policy": "SchemaChangePolicy",
|
|
|
|
"recrawl_policy": "RecrawlPolicy",
|
|
|
|
"lineage_configuration": "LineageConfiguration",
|
|
|
|
"configuration": "Configuration",
|
|
|
|
"crawler_security_configuration": "CrawlerSecurityConfiguration",
|
|
|
|
"tags": "Tags",
|
|
|
|
}
|
|
|
|
|
|
|
|
params = {
|
|
|
|
boto3_key: kwargs.get(key)
|
|
|
|
for key, boto3_key in optional_param_map.items()
|
|
|
|
if kwargs.get(key) is not None
|
|
|
|
}
|
|
|
|
|
|
|
|
if crawler_role is None:
|
|
|
|
crawler_role = "arn:aws:iam::123456789012:role/Glue/Role"
|
|
|
|
|
|
|
|
if crawler_targets is None:
|
|
|
|
crawler_targets = {
|
|
|
|
"S3Targets": [],
|
|
|
|
"JdbcTargets": [],
|
|
|
|
"MongoDBTargets": [],
|
|
|
|
"DynamoDBTargets": [],
|
|
|
|
"CatalogTargets": [],
|
|
|
|
}
|
|
|
|
|
|
|
|
return client.create_crawler(
|
|
|
|
Name=crawler_name, Role=crawler_role, Targets=crawler_targets, **params
|
|
|
|
)
|
2022-08-04 15:48:08 +00:00
|
|
|
|
|
|
|
|
2022-08-05 20:59:01 +00:00
|
|
|
def create_registry(client, registry_name=TEST_REGISTRY_NAME):
|
2022-08-04 15:48:08 +00:00
|
|
|
return client.create_registry(RegistryName=registry_name)
|
|
|
|
|
|
|
|
|
|
|
|
def create_schema(
|
|
|
|
client,
|
|
|
|
registry_id,
|
2022-08-05 20:59:01 +00:00
|
|
|
data_format=TEST_AVRO_DATA_FORMAT,
|
|
|
|
compatibility=TEST_BACKWARD_COMPATIBILITY,
|
|
|
|
schema_definition=TEST_AVRO_SCHEMA_DEFINITION,
|
2022-08-04 15:48:08 +00:00
|
|
|
):
|
|
|
|
return client.create_schema(
|
|
|
|
RegistryId=registry_id,
|
2022-08-05 20:59:01 +00:00
|
|
|
SchemaName=TEST_SCHEMA_NAME,
|
2022-08-04 15:48:08 +00:00
|
|
|
DataFormat=data_format,
|
|
|
|
Compatibility=compatibility,
|
|
|
|
SchemaDefinition=schema_definition,
|
|
|
|
)
|
2022-08-05 20:59:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
def register_schema_version(client):
|
|
|
|
return client.register_schema_version(
|
|
|
|
SchemaId=TEST_SCHEMA_ID, SchemaDefinition=TEST_NEW_AVRO_SCHEMA_DEFINITION
|
|
|
|
)
|