| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | import copy | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 13:10:18 +00:00
										 |  |  | from .fixtures.datacatalog import TABLE_INPUT, PARTITION_INPUT, DATABASE_INPUT | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 13:10:18 +00:00
										 |  |  | def create_database_input(database_name): | 
					
						
							|  |  |  |     database_input = copy.deepcopy(DATABASE_INPUT) | 
					
						
							|  |  |  |     database_input["Name"] = database_name | 
					
						
							|  |  |  |     database_input["LocationUri"] = "s3://my-bucket/{database_name}".format( | 
					
						
							|  |  |  |         database_name=database_name | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     return database_input | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_database(client, database_name, database_input=None): | 
					
						
							|  |  |  |     if database_input is None: | 
					
						
							|  |  |  |         database_input = create_database_input(database_name) | 
					
						
							|  |  |  |     return client.create_database(DatabaseInput=database_input) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_database(client, database_name): | 
					
						
							|  |  |  |     return client.get_database(Name=database_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  | def create_table_input(database_name, table_name, columns=None, partition_keys=None): | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     table_input = copy.deepcopy(TABLE_INPUT) | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     table_input["Name"] = table_name | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     table_input["PartitionKeys"] = partition_keys or [] | 
					
						
							|  |  |  |     table_input["StorageDescriptor"]["Columns"] = columns or [] | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     table_input["StorageDescriptor"][ | 
					
						
							|  |  |  |         "Location" | 
					
						
							|  |  |  |     ] = "s3://my-bucket/{database_name}/{table_name}".format( | 
					
						
							|  |  |  |         database_name=database_name, table_name=table_name | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  |     return table_input | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_table(client, database_name, table_name, table_input=None, **kwargs): | 
					
						
							|  |  |  |     if table_input is None: | 
					
						
							|  |  |  |         table_input = create_table_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.create_table(DatabaseName=database_name, TableInput=table_input) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def update_table(client, database_name, table_name, table_input=None, **kwargs): | 
					
						
							|  |  |  |     if table_input is None: | 
					
						
							|  |  |  |         table_input = create_table_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.update_table(DatabaseName=database_name, TableInput=table_input) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_table(client, database_name, table_name): | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.get_table(DatabaseName=database_name, Name=table_name) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_tables(client, database_name): | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.get_tables(DatabaseName=database_name) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_table_versions(client, database_name, table_name): | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     return client.get_table_versions(DatabaseName=database_name, TableName=table_name) | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_table_version(client, database_name, table_name, version_id): | 
					
						
							|  |  |  |     return client.get_table_version( | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |         DatabaseName=database_name, TableName=table_name, VersionId=version_id | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-14 21:37:10 +02:00
										 |  |  | def create_column(name, type_, comment=None, parameters=None): | 
					
						
							|  |  |  |     column = {"Name": name, "Type": type_} | 
					
						
							|  |  |  |     if comment is not None: | 
					
						
							|  |  |  |         column["Comment"] = comment | 
					
						
							|  |  |  |     if parameters is not None: | 
					
						
							|  |  |  |         column["Parameters"] = parameters | 
					
						
							|  |  |  |     return column | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  | def create_partition_input(database_name, table_name, values=None, columns=None): | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     root_path = "s3://my-bucket/{database_name}/{table_name}".format( | 
					
						
							|  |  |  |         database_name=database_name, table_name=table_name | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     part_input = copy.deepcopy(PARTITION_INPUT) | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     part_input["Values"] = values or [] | 
					
						
							|  |  |  |     part_input["StorageDescriptor"]["Columns"] = columns or [] | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |     part_input["StorageDescriptor"]["SerdeInfo"]["Parameters"]["path"] = root_path | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     return part_input | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_partition(client, database_name, table_name, partiton_input=None, **kwargs): | 
					
						
							|  |  |  |     if partiton_input is None: | 
					
						
							|  |  |  |         partiton_input = create_partition_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  |     return client.create_partition( | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |         DatabaseName=database_name, TableName=table_name, PartitionInput=partiton_input | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  | def update_partition( | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |     client, database_name, table_name, old_values=None, partiton_input=None, **kwargs | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  | ): | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     if partiton_input is None: | 
					
						
							|  |  |  |         partiton_input = create_partition_input(database_name, table_name, **kwargs) | 
					
						
							|  |  |  |     return client.update_partition( | 
					
						
							|  |  |  |         DatabaseName=database_name, | 
					
						
							|  |  |  |         TableName=table_name, | 
					
						
							|  |  |  |         PartitionInput=partiton_input, | 
					
						
							| 
									
										
										
										
											2021-10-18 19:44:29 +00:00
										 |  |  |         PartitionValueList=old_values or [], | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_partition(client, database_name, table_name, values): | 
					
						
							|  |  |  |     return client.get_partition( | 
					
						
							| 
									
										
										
										
											2019-12-23 08:38:53 +01:00
										 |  |  |         DatabaseName=database_name, TableName=table_name, PartitionValues=values | 
					
						
							| 
									
										
										
										
											2018-12-21 12:28:56 +01:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-08-26 10:49:41 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def create_crawler( | 
					
						
							|  |  |  |     client, crawler_name, crawler_role=None, crawler_targets=None, **kwargs | 
					
						
							|  |  |  | ): | 
					
						
							|  |  |  |     optional_param_map = { | 
					
						
							|  |  |  |         "database_name": "DatabaseName", | 
					
						
							|  |  |  |         "description": "Description", | 
					
						
							|  |  |  |         "schedule": "Schedule", | 
					
						
							|  |  |  |         "classifiers": "Classifiers", | 
					
						
							|  |  |  |         "table_prefix": "TablePrefix", | 
					
						
							|  |  |  |         "schema_change_policy": "SchemaChangePolicy", | 
					
						
							|  |  |  |         "recrawl_policy": "RecrawlPolicy", | 
					
						
							|  |  |  |         "lineage_configuration": "LineageConfiguration", | 
					
						
							|  |  |  |         "configuration": "Configuration", | 
					
						
							|  |  |  |         "crawler_security_configuration": "CrawlerSecurityConfiguration", | 
					
						
							|  |  |  |         "tags": "Tags", | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params = { | 
					
						
							|  |  |  |         boto3_key: kwargs.get(key) | 
					
						
							|  |  |  |         for key, boto3_key in optional_param_map.items() | 
					
						
							|  |  |  |         if kwargs.get(key) is not None | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if crawler_role is None: | 
					
						
							|  |  |  |         crawler_role = "arn:aws:iam::123456789012:role/Glue/Role" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if crawler_targets is None: | 
					
						
							|  |  |  |         crawler_targets = { | 
					
						
							|  |  |  |             "S3Targets": [], | 
					
						
							|  |  |  |             "JdbcTargets": [], | 
					
						
							|  |  |  |             "MongoDBTargets": [], | 
					
						
							|  |  |  |             "DynamoDBTargets": [], | 
					
						
							|  |  |  |             "CatalogTargets": [], | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return client.create_crawler( | 
					
						
							| 
									
										
										
										
											2022-03-10 13:39:59 -01:00
										 |  |  |         Name=crawler_name, Role=crawler_role, Targets=crawler_targets, **params | 
					
						
							| 
									
										
										
										
											2021-08-26 10:49:41 +01:00
										 |  |  |     ) |