| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | import copy | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 13:10:18 +00:00
										 |  |  | from .fixtures.datacatalog import TABLE_INPUT, PARTITION_INPUT, DATABASE_INPUT | 
					
						
							| 
									
										
										
										
											2022-08-04 08:48:08 -07:00
										 |  |  | from .fixtures.schema_registry import ( | 
					
						
							| 
									
										
										
										
											2022-08-05 13:59:01 -07:00
										 |  |  |     TEST_REGISTRY_NAME, | 
					
						
							|  |  |  |     TEST_SCHEMA_NAME, | 
					
						
							|  |  |  |     TEST_BACKWARD_COMPATIBILITY, | 
					
						
							|  |  |  |     TEST_AVRO_DATA_FORMAT, | 
					
						
							|  |  |  |     TEST_AVRO_SCHEMA_DEFINITION, | 
					
						
							|  |  |  |     TEST_SCHEMA_ID, | 
					
						
							|  |  |  |     TEST_NEW_AVRO_SCHEMA_DEFINITION, | 
					
						
							| 
									
										
										
										
											2022-08-04 08:48:08 -07:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 13:10:18 +00:00
										 |  |  | def create_database_input(database_name): | 
					
						
							|  |  |  |     database_input = copy.deepcopy(DATABASE_INPUT) | 
					
						
							|  |  |  |     database_input["Name"] = database_name | 
					
						
							| 
									
										
										
										
											2022-11-17 21:41:08 -01:00
										 |  |  |     database_input["LocationUri"] = f"s3://my-bucket/{database_name}" | 
					
						
							| 
									
										
										
										
											2021-01-11 13:10:18 +00:00
										 |  |  |     return database_input | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-27 14:38:08 +02:00
										 |  |  | def create_database(client, database_name, database_input=None, catalog_id=None): | 
					
						
							| 
									
										
										
										
											2021-01-11 13:10:18 +00:00
										 |  |  |     if database_input is None: | 
					
						
							|  |  |  |         database_input = create_database_input(database_name) | 
					
						
							| 
									
										
										
										
											2022-07-27 14:38:08 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     database_kwargs = {"DatabaseInput": database_input} | 
					
						
							|  |  |  |     if catalog_id is not None: | 
					
						
							|  |  |  |         database_kwargs["CatalogId"] = catalog_id | 
					
						
							|  |  |  |     return client.create_database(**database_kwargs) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_database(client, database_name): | 
					
						
							|  |  |  |     return client.get_database(Name=database_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  | def create_table_input(database_name, table_name, columns=None, partition_keys=None): | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     table_input = copy.deepcopy(TABLE_INPUT) | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     table_input["Name"] = table_name | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     table_input["PartitionKeys"] = partition_keys or [] | 
					
						
							|  |  |  |     table_input["StorageDescriptor"]["Columns"] = columns or [] | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     table_input["StorageDescriptor"][ | 
					
						
							|  |  |  |         "Location" | 
					
						
							| 
									
										
										
										
											2022-11-17 21:41:08 -01:00
										 |  |  |     ] = f"s3://my-bucket/{database_name}/{table_name}" | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     return table_input | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_table(client, database_name, table_name, table_input=None, **kwargs): | 
					
						
							|  |  |  |     if table_input is None: | 
					
						
							|  |  |  |         table_input = create_table_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.create_table(DatabaseName=database_name, TableInput=table_input) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def update_table(client, database_name, table_name, table_input=None, **kwargs): | 
					
						
							|  |  |  |     if table_input is None: | 
					
						
							|  |  |  |         table_input = create_table_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.update_table(DatabaseName=database_name, TableInput=table_input) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_table(client, database_name, table_name): | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.get_table(DatabaseName=database_name, Name=table_name) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-28 11:44:01 +01:00
										 |  |  | def get_tables(client, database_name, expression=None): | 
					
						
							|  |  |  |     if expression: | 
					
						
							|  |  |  |         return client.get_tables(DatabaseName=database_name, Expression=expression) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         return client.get_tables(DatabaseName=database_name) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_table_versions(client, database_name, table_name): | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.get_table_versions(DatabaseName=database_name, TableName=table_name) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_table_version(client, database_name, table_name, version_id): | 
					
						
							|  |  |  |     return client.get_table_version( | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |         DatabaseName=database_name, TableName=table_name, VersionId=version_id | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-14 21:37:10 +02:00
										 |  |  | def create_column(name, type_, comment=None, parameters=None): | 
					
						
							|  |  |  |     column = {"Name": name, "Type": type_} | 
					
						
							|  |  |  |     if comment is not None: | 
					
						
							|  |  |  |         column["Comment"] = comment | 
					
						
							|  |  |  |     if parameters is not None: | 
					
						
							|  |  |  |         column["Parameters"] = parameters | 
					
						
							|  |  |  |     return column | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  | def create_partition_input(database_name, table_name, values=None, columns=None): | 
					
						
							| 
									
										
										
										
											2022-11-17 21:41:08 -01:00
										 |  |  |     root_path = f"s3://my-bucket/{database_name}/{table_name}" | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     part_input = copy.deepcopy(PARTITION_INPUT) | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     part_input["Values"] = values or [] | 
					
						
							|  |  |  |     part_input["StorageDescriptor"]["Columns"] = columns or [] | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     part_input["StorageDescriptor"]["SerdeInfo"]["Parameters"]["path"] = root_path | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     return part_input | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_partition(client, database_name, table_name, partiton_input=None, **kwargs): | 
					
						
							|  |  |  |     if partiton_input is None: | 
					
						
							|  |  |  |         partiton_input = create_partition_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  |     return client.create_partition( | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |         DatabaseName=database_name, TableName=table_name, PartitionInput=partiton_input | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  | def update_partition( | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     client, database_name, table_name, old_values=None, partiton_input=None, **kwargs | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  | ): | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     if partiton_input is None: | 
					
						
							|  |  |  |         partiton_input = create_partition_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  |     return client.update_partition( | 
					
						
							|  |  |  |         DatabaseName=database_name, | 
					
						
							|  |  |  |         TableName=table_name, | 
					
						
							|  |  |  |         PartitionInput=partiton_input, | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |         PartitionValueList=old_values or [], | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_partition(client, database_name, table_name, values): | 
					
						
							|  |  |  |     return client.get_partition( | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |         DatabaseName=database_name, TableName=table_name, PartitionValues=values | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-08-26 10:49:41 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_crawler( | 
					
						
							|  |  |  |     client, crawler_name, crawler_role=None, crawler_targets=None, **kwargs | 
					
						
							|  |  |  | ): | 
					
						
							|  |  |  |     optional_param_map = { | 
					
						
							|  |  |  |         "database_name": "DatabaseName", | 
					
						
							|  |  |  |         "description": "Description", | 
					
						
							|  |  |  |         "schedule": "Schedule", | 
					
						
							|  |  |  |         "classifiers": "Classifiers", | 
					
						
							|  |  |  |         "table_prefix": "TablePrefix", | 
					
						
							|  |  |  |         "schema_change_policy": "SchemaChangePolicy", | 
					
						
							|  |  |  |         "recrawl_policy": "RecrawlPolicy", | 
					
						
							|  |  |  |         "lineage_configuration": "LineageConfiguration", | 
					
						
							|  |  |  |         "configuration": "Configuration", | 
					
						
							|  |  |  |         "crawler_security_configuration": "CrawlerSecurityConfiguration", | 
					
						
							|  |  |  |         "tags": "Tags", | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params = { | 
					
						
							|  |  |  |         boto3_key: kwargs.get(key) | 
					
						
							|  |  |  |         for key, boto3_key in optional_param_map.items() | 
					
						
							|  |  |  |         if kwargs.get(key) is not None | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if crawler_role is None: | 
					
						
							|  |  |  |         crawler_role = "arn:aws:iam::123456789012:role/Glue/Role" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if crawler_targets is None: | 
					
						
							|  |  |  |         crawler_targets = { | 
					
						
							|  |  |  |             "S3Targets": [], | 
					
						
							|  |  |  |             "JdbcTargets": [], | 
					
						
							|  |  |  |             "MongoDBTargets": [], | 
					
						
							|  |  |  |             "DynamoDBTargets": [], | 
					
						
							|  |  |  |             "CatalogTargets": [], | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return client.create_crawler( | 
					
						
							| 
									
										
										
										
											2022-03-10 13:39:59 -01:00
										 |  |  |         Name=crawler_name, Role=crawler_role, Targets=crawler_targets, **params | 
					
						
							| 
									
										
										
										
											2021-08-26 10:49:41 +01:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-08-04 08:48:08 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-05 13:59:01 -07:00
										 |  |  | def create_registry(client, registry_name=TEST_REGISTRY_NAME): | 
					
						
							| 
									
										
										
										
											2022-08-04 08:48:08 -07:00
										 |  |  |     return client.create_registry(RegistryName=registry_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_schema( | 
					
						
							|  |  |  |     client, | 
					
						
							|  |  |  |     registry_id, | 
					
						
							| 
									
										
										
										
											2022-08-05 13:59:01 -07:00
										 |  |  |     data_format=TEST_AVRO_DATA_FORMAT, | 
					
						
							|  |  |  |     compatibility=TEST_BACKWARD_COMPATIBILITY, | 
					
						
							|  |  |  |     schema_definition=TEST_AVRO_SCHEMA_DEFINITION, | 
					
						
							| 
									
										
										
										
											2022-08-04 08:48:08 -07:00
										 |  |  | ): | 
					
						
							|  |  |  |     return client.create_schema( | 
					
						
							|  |  |  |         RegistryId=registry_id, | 
					
						
							| 
									
										
										
										
											2022-08-05 13:59:01 -07:00
										 |  |  |         SchemaName=TEST_SCHEMA_NAME, | 
					
						
							| 
									
										
										
										
											2022-08-04 08:48:08 -07:00
										 |  |  |         DataFormat=data_format, | 
					
						
							|  |  |  |         Compatibility=compatibility, | 
					
						
							|  |  |  |         SchemaDefinition=schema_definition, | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-08-05 13:59:01 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def register_schema_version(client): | 
					
						
							|  |  |  |     return client.register_schema_version( | 
					
						
							|  |  |  |         SchemaId=TEST_SCHEMA_ID, SchemaDefinition=TEST_NEW_AVRO_SCHEMA_DEFINITION | 
					
						
							|  |  |  |     ) |