Glue - Implemented create_crawler, get_crawler, get_crawlers, delete_crawler. Updated IMPLEMENTATION_COVERAGE.md. (#4222)
This commit is contained in:
parent
cbbeaff23e
commit
21021a6a03
@ -1027,7 +1027,7 @@
|
|||||||
|
|
||||||
## batch
|
## batch
|
||||||
<details>
|
<details>
|
||||||
<summary>78% implemented</summary>
|
<summary>84% implemented</summary>
|
||||||
|
|
||||||
- [X] cancel_job
|
- [X] cancel_job
|
||||||
- [X] create_compute_environment
|
- [X] create_compute_environment
|
||||||
@ -3797,7 +3797,7 @@
|
|||||||
- [ ] modify_vpc_endpoint_connection_notification
|
- [ ] modify_vpc_endpoint_connection_notification
|
||||||
- [ ] modify_vpc_endpoint_service_configuration
|
- [ ] modify_vpc_endpoint_service_configuration
|
||||||
- [ ] modify_vpc_endpoint_service_permissions
|
- [ ] modify_vpc_endpoint_service_permissions
|
||||||
- [ ] modify_vpc_peering_connection_options
|
- [X] modify_vpc_peering_connection_options
|
||||||
- [X] modify_vpc_tenancy
|
- [X] modify_vpc_tenancy
|
||||||
- [ ] modify_vpn_connection
|
- [ ] modify_vpn_connection
|
||||||
- [ ] modify_vpn_connection_options
|
- [ ] modify_vpn_connection_options
|
||||||
@ -4904,7 +4904,7 @@
|
|||||||
|
|
||||||
## glue
|
## glue
|
||||||
<details>
|
<details>
|
||||||
<summary>4% implemented</summary>
|
<summary>7% implemented</summary>
|
||||||
|
|
||||||
- [ ] batch_create_partition
|
- [ ] batch_create_partition
|
||||||
- [ ] batch_delete_connection
|
- [ ] batch_delete_connection
|
||||||
@ -4923,7 +4923,7 @@
|
|||||||
- [ ] check_schema_version_validity
|
- [ ] check_schema_version_validity
|
||||||
- [ ] create_classifier
|
- [ ] create_classifier
|
||||||
- [ ] create_connection
|
- [ ] create_connection
|
||||||
- [ ] create_crawler
|
- [X] create_crawler
|
||||||
- [X] create_database
|
- [X] create_database
|
||||||
- [ ] create_dev_endpoint
|
- [ ] create_dev_endpoint
|
||||||
- [ ] create_job
|
- [ ] create_job
|
||||||
@ -4942,7 +4942,7 @@
|
|||||||
- [ ] delete_column_statistics_for_partition
|
- [ ] delete_column_statistics_for_partition
|
||||||
- [ ] delete_column_statistics_for_table
|
- [ ] delete_column_statistics_for_table
|
||||||
- [ ] delete_connection
|
- [ ] delete_connection
|
||||||
- [ ] delete_crawler
|
- [X] delete_crawler
|
||||||
- [ ] delete_database
|
- [ ] delete_database
|
||||||
- [ ] delete_dev_endpoint
|
- [ ] delete_dev_endpoint
|
||||||
- [ ] delete_job
|
- [ ] delete_job
|
||||||
@ -4966,9 +4966,9 @@
|
|||||||
- [ ] get_column_statistics_for_table
|
- [ ] get_column_statistics_for_table
|
||||||
- [ ] get_connection
|
- [ ] get_connection
|
||||||
- [ ] get_connections
|
- [ ] get_connections
|
||||||
- [ ] get_crawler
|
- [X] get_crawler
|
||||||
- [ ] get_crawler_metrics
|
- [ ] get_crawler_metrics
|
||||||
- [ ] get_crawlers
|
- [X] get_crawlers
|
||||||
- [ ] get_data_catalog_encryption_settings
|
- [ ] get_data_catalog_encryption_settings
|
||||||
- [X] get_database
|
- [X] get_database
|
||||||
- [X] get_databases
|
- [X] get_databases
|
||||||
@ -10589,7 +10589,7 @@
|
|||||||
|
|
||||||
## ssm
|
## ssm
|
||||||
<details>
|
<details>
|
||||||
<summary>16% implemented</summary>
|
<summary>17% implemented</summary>
|
||||||
|
|
||||||
- [X] add_tags_to_resource
|
- [X] add_tags_to_resource
|
||||||
- [ ] associate_ops_item_related_item
|
- [ ] associate_ops_item_related_item
|
||||||
@ -10626,7 +10626,7 @@
|
|||||||
- [ ] describe_automation_step_executions
|
- [ ] describe_automation_step_executions
|
||||||
- [ ] describe_available_patches
|
- [ ] describe_available_patches
|
||||||
- [X] describe_document
|
- [X] describe_document
|
||||||
- [ ] describe_document_permission
|
- [X] describe_document_permission
|
||||||
- [ ] describe_effective_instance_associations
|
- [ ] describe_effective_instance_associations
|
||||||
- [ ] describe_effective_patches_for_patch_baseline
|
- [ ] describe_effective_patches_for_patch_baseline
|
||||||
- [ ] describe_instance_associations_status
|
- [ ] describe_instance_associations_status
|
||||||
@ -10692,7 +10692,7 @@
|
|||||||
- [ ] list_resource_compliance_summaries
|
- [ ] list_resource_compliance_summaries
|
||||||
- [ ] list_resource_data_sync
|
- [ ] list_resource_data_sync
|
||||||
- [X] list_tags_for_resource
|
- [X] list_tags_for_resource
|
||||||
- [ ] modify_document_permission
|
- [X] modify_document_permission
|
||||||
- [ ] put_compliance_items
|
- [ ] put_compliance_items
|
||||||
- [ ] put_inventory
|
- [ ] put_inventory
|
||||||
- [X] put_parameter
|
- [X] put_parameter
|
||||||
|
@ -28,6 +28,11 @@ class PartitionAlreadyExistsException(AlreadyExistsException):
|
|||||||
super(PartitionAlreadyExistsException, self).__init__("Partition")
|
super(PartitionAlreadyExistsException, self).__init__("Partition")
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlerAlreadyExistsException(AlreadyExistsException):
|
||||||
|
def __init__(self):
|
||||||
|
super(CrawlerAlreadyExistsException, self).__init__("Crawler")
|
||||||
|
|
||||||
|
|
||||||
class EntityNotFoundException(GlueClientError):
|
class EntityNotFoundException(GlueClientError):
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
super(GlueClientError, self).__init__("EntityNotFoundException", msg)
|
super(GlueClientError, self).__init__("EntityNotFoundException", msg)
|
||||||
@ -48,6 +53,13 @@ class PartitionNotFoundException(EntityNotFoundException):
|
|||||||
super(PartitionNotFoundException, self).__init__("Cannot find partition.")
|
super(PartitionNotFoundException, self).__init__("Cannot find partition.")
|
||||||
|
|
||||||
|
|
||||||
|
class CrawlerNotFoundException(EntityNotFoundException):
|
||||||
|
def __init__(self, crawler):
|
||||||
|
super(CrawlerNotFoundException, self).__init__(
|
||||||
|
"Crawler %s not found." % crawler
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VersionNotFoundException(EntityNotFoundException):
|
class VersionNotFoundException(EntityNotFoundException):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(VersionNotFoundException, self).__init__("Version not found.")
|
super(VersionNotFoundException, self).__init__("Version not found.")
|
||||||
|
@ -7,6 +7,8 @@ from moto.core import BaseBackend, BaseModel
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
JsonRESTError,
|
JsonRESTError,
|
||||||
|
CrawlerAlreadyExistsException,
|
||||||
|
CrawlerNotFoundException,
|
||||||
DatabaseAlreadyExistsException,
|
DatabaseAlreadyExistsException,
|
||||||
DatabaseNotFoundException,
|
DatabaseNotFoundException,
|
||||||
TableAlreadyExistsException,
|
TableAlreadyExistsException,
|
||||||
@ -20,6 +22,7 @@ from .exceptions import (
|
|||||||
class GlueBackend(BaseBackend):
|
class GlueBackend(BaseBackend):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.databases = OrderedDict()
|
self.databases = OrderedDict()
|
||||||
|
self.crawlers = OrderedDict()
|
||||||
|
|
||||||
def create_database(self, database_name, database_input):
|
def create_database(self, database_name, database_input):
|
||||||
if database_name in self.databases:
|
if database_name in self.databases:
|
||||||
@ -67,6 +70,59 @@ class GlueBackend(BaseBackend):
|
|||||||
raise TableNotFoundException(table_name)
|
raise TableNotFoundException(table_name)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def create_crawler(
|
||||||
|
self,
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
database_name,
|
||||||
|
description,
|
||||||
|
targets,
|
||||||
|
schedule,
|
||||||
|
classifiers,
|
||||||
|
table_prefix,
|
||||||
|
schema_change_policy,
|
||||||
|
recrawl_policy,
|
||||||
|
lineage_configuration,
|
||||||
|
configuration,
|
||||||
|
crawler_security_configuration,
|
||||||
|
tags,
|
||||||
|
):
|
||||||
|
if name in self.crawlers:
|
||||||
|
raise CrawlerAlreadyExistsException()
|
||||||
|
|
||||||
|
crawler = FakeCrawler(
|
||||||
|
name=name,
|
||||||
|
role=role,
|
||||||
|
database_name=database_name,
|
||||||
|
description=description,
|
||||||
|
targets=targets,
|
||||||
|
schedule=schedule,
|
||||||
|
classifiers=classifiers,
|
||||||
|
table_prefix=table_prefix,
|
||||||
|
schema_change_policy=schema_change_policy,
|
||||||
|
recrawl_policy=recrawl_policy,
|
||||||
|
lineage_configuration=lineage_configuration,
|
||||||
|
configuration=configuration,
|
||||||
|
crawler_security_configuration=crawler_security_configuration,
|
||||||
|
tags=tags,
|
||||||
|
)
|
||||||
|
self.crawlers[name] = crawler
|
||||||
|
|
||||||
|
def get_crawler(self, name):
|
||||||
|
try:
|
||||||
|
return self.crawlers[name]
|
||||||
|
except KeyError:
|
||||||
|
raise CrawlerNotFoundException(name)
|
||||||
|
|
||||||
|
def get_crawlers(self):
|
||||||
|
return [self.crawlers[key] for key in self.crawlers] if self.crawlers else []
|
||||||
|
|
||||||
|
def delete_crawler(self, name):
|
||||||
|
try:
|
||||||
|
del self.crawlers[name]
|
||||||
|
except KeyError:
|
||||||
|
raise CrawlerNotFoundException(name)
|
||||||
|
|
||||||
|
|
||||||
class FakeDatabase(BaseModel):
|
class FakeDatabase(BaseModel):
|
||||||
def __init__(self, database_name, database_input):
|
def __init__(self, database_name, database_input):
|
||||||
@ -177,4 +233,100 @@ class FakePartition(BaseModel):
|
|||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
class FakeCrawler(BaseModel):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
database_name,
|
||||||
|
description,
|
||||||
|
targets,
|
||||||
|
schedule,
|
||||||
|
classifiers,
|
||||||
|
table_prefix,
|
||||||
|
schema_change_policy,
|
||||||
|
recrawl_policy,
|
||||||
|
lineage_configuration,
|
||||||
|
configuration,
|
||||||
|
crawler_security_configuration,
|
||||||
|
tags,
|
||||||
|
):
|
||||||
|
self.name = name
|
||||||
|
self.role = role
|
||||||
|
self.database_name = database_name
|
||||||
|
self.description = description
|
||||||
|
self.targets = targets
|
||||||
|
self.schedule = schedule
|
||||||
|
self.classifiers = classifiers
|
||||||
|
self.table_prefix = table_prefix
|
||||||
|
self.schema_change_policy = schema_change_policy
|
||||||
|
self.recrawl_policy = recrawl_policy
|
||||||
|
self.lineage_configuration = lineage_configuration
|
||||||
|
self.configuration = configuration
|
||||||
|
self.crawler_security_configuration = crawler_security_configuration
|
||||||
|
self.tags = tags
|
||||||
|
self.state = "READY"
|
||||||
|
self.creation_time = datetime.utcnow()
|
||||||
|
self.last_updated = self.creation_time
|
||||||
|
self.version = 1
|
||||||
|
self.crawl_elapsed_time = 0
|
||||||
|
self.last_crawl_info = None
|
||||||
|
|
||||||
|
def as_dict(self):
|
||||||
|
last_crawl = self.last_crawl_info.as_dict() if self.last_crawl_info else None
|
||||||
|
data = {
|
||||||
|
"Name": self.name,
|
||||||
|
"Role": self.role,
|
||||||
|
"Targets": self.targets,
|
||||||
|
"DatabaseName": self.database_name,
|
||||||
|
"Description": self.description,
|
||||||
|
"Classifiers": self.classifiers,
|
||||||
|
"RecrawlPolicy": self.recrawl_policy,
|
||||||
|
"SchemaChangePolicy": self.schema_change_policy,
|
||||||
|
"LineageConfiguration": self.lineage_configuration,
|
||||||
|
"State": self.state,
|
||||||
|
"TablePrefix": self.table_prefix,
|
||||||
|
"CrawlElapsedTime": self.crawl_elapsed_time,
|
||||||
|
"CreationTime": self.creation_time.isoformat(),
|
||||||
|
"LastUpdated": self.last_updated.isoformat(),
|
||||||
|
"LastCrawl": last_crawl,
|
||||||
|
"Version": self.version,
|
||||||
|
"Configuration": self.configuration,
|
||||||
|
"CrawlerSecurityConfiguration": self.crawler_security_configuration,
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.schedule:
|
||||||
|
data["Schedule"] = {
|
||||||
|
"ScheduleExpression": self.schedule,
|
||||||
|
"State": "SCHEDULED",
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.last_crawl_info:
|
||||||
|
data["LastCrawl"] = self.last_crawl_info.as_dict()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class LastCrawlInfo(BaseModel):
|
||||||
|
def __init__(
|
||||||
|
self, error_message, log_group, log_stream, message_prefix, start_time, status,
|
||||||
|
):
|
||||||
|
self.error_message = error_message
|
||||||
|
self.log_group = log_group
|
||||||
|
self.log_stream = log_stream
|
||||||
|
self.message_prefix = message_prefix
|
||||||
|
self.start_time = start_time
|
||||||
|
self.status = status
|
||||||
|
|
||||||
|
def as_dict(self):
|
||||||
|
return {
|
||||||
|
"ErrorMessage": self.error_message,
|
||||||
|
"LogGroup": self.log_group,
|
||||||
|
"LogStream": self.log_stream,
|
||||||
|
"MessagePrefix": self.message_prefix,
|
||||||
|
"StartTime": self.start_time,
|
||||||
|
"Status": self.status,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
glue_backend = GlueBackend()
|
glue_backend = GlueBackend()
|
||||||
|
@ -274,3 +274,38 @@ class GlueResponse(BaseResponse):
|
|||||||
out["Errors"] = errors_output
|
out["Errors"] = errors_output
|
||||||
|
|
||||||
return json.dumps(out)
|
return json.dumps(out)
|
||||||
|
|
||||||
|
def create_crawler(self):
|
||||||
|
self.glue_backend.create_crawler(
|
||||||
|
name=self.parameters.get("Name"),
|
||||||
|
role=self.parameters.get("Role"),
|
||||||
|
database_name=self.parameters.get("DatabaseName"),
|
||||||
|
description=self.parameters.get("Description"),
|
||||||
|
targets=self.parameters.get("Targets"),
|
||||||
|
schedule=self.parameters.get("Schedule"),
|
||||||
|
classifiers=self.parameters.get("Classifiers"),
|
||||||
|
table_prefix=self.parameters.get("TablePrefix"),
|
||||||
|
schema_change_policy=self.parameters.get("SchemaChangePolicy"),
|
||||||
|
recrawl_policy=self.parameters.get("RecrawlPolicy"),
|
||||||
|
lineage_configuration=self.parameters.get("LineageConfiguration"),
|
||||||
|
configuration=self.parameters.get("Configuration"),
|
||||||
|
crawler_security_configuration=self.parameters.get(
|
||||||
|
"CrawlerSecurityConfiguration"
|
||||||
|
),
|
||||||
|
tags=self.parameters.get("Tags"),
|
||||||
|
)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_crawler(self):
|
||||||
|
name = self.parameters.get("Name")
|
||||||
|
crawler = self.glue_backend.get_crawler(name)
|
||||||
|
return json.dumps({"Crawler": crawler.as_dict()})
|
||||||
|
|
||||||
|
def get_crawlers(self):
|
||||||
|
crawlers = self.glue_backend.get_crawlers()
|
||||||
|
return json.dumps({"Crawlers": [crawler.as_dict() for crawler in crawlers]})
|
||||||
|
|
||||||
|
def delete_crawler(self):
|
||||||
|
name = self.parameters.get("Name")
|
||||||
|
self.glue_backend.delete_crawler(name)
|
||||||
|
return ""
|
||||||
|
@ -106,3 +106,43 @@ def get_partition(client, database_name, table_name, values):
|
|||||||
return client.get_partition(
|
return client.get_partition(
|
||||||
DatabaseName=database_name, TableName=table_name, PartitionValues=values
|
DatabaseName=database_name, TableName=table_name, PartitionValues=values
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_crawler(
|
||||||
|
client, crawler_name, crawler_role=None, crawler_targets=None, **kwargs
|
||||||
|
):
|
||||||
|
optional_param_map = {
|
||||||
|
"database_name": "DatabaseName",
|
||||||
|
"description": "Description",
|
||||||
|
"schedule": "Schedule",
|
||||||
|
"classifiers": "Classifiers",
|
||||||
|
"table_prefix": "TablePrefix",
|
||||||
|
"schema_change_policy": "SchemaChangePolicy",
|
||||||
|
"recrawl_policy": "RecrawlPolicy",
|
||||||
|
"lineage_configuration": "LineageConfiguration",
|
||||||
|
"configuration": "Configuration",
|
||||||
|
"crawler_security_configuration": "CrawlerSecurityConfiguration",
|
||||||
|
"tags": "Tags",
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {
|
||||||
|
boto3_key: kwargs.get(key)
|
||||||
|
for key, boto3_key in optional_param_map.items()
|
||||||
|
if kwargs.get(key) is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
if crawler_role is None:
|
||||||
|
crawler_role = "arn:aws:iam::123456789012:role/Glue/Role"
|
||||||
|
|
||||||
|
if crawler_targets is None:
|
||||||
|
crawler_targets = {
|
||||||
|
"S3Targets": [],
|
||||||
|
"JdbcTargets": [],
|
||||||
|
"MongoDBTargets": [],
|
||||||
|
"DynamoDBTargets": [],
|
||||||
|
"CatalogTargets": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
return client.create_crawler(
|
||||||
|
Name=crawler_name, Role=crawler_role, Targets=crawler_targets, **params,
|
||||||
|
)
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import sure # noqa
|
import sure # noqa
|
||||||
import re
|
import re
|
||||||
import pytest
|
import pytest
|
||||||
|
import json
|
||||||
import boto3
|
import boto3
|
||||||
from botocore.client import ClientError
|
from botocore.client import ClientError
|
||||||
|
|
||||||
@ -905,3 +906,247 @@ def test_batch_delete_partition_with_bad_partitions():
|
|||||||
["2018-11-01"].should.be.within(error_partitions)
|
["2018-11-01"].should.be.within(error_partitions)
|
||||||
["2018-11-02"].should.be.within(error_partitions)
|
["2018-11-02"].should.be.within(error_partitions)
|
||||||
["2018-11-03"].should.be.within(error_partitions)
|
["2018-11-03"].should.be.within(error_partitions)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
@freeze_time(FROZEN_CREATE_TIME)
|
||||||
|
def test_create_crawler_scheduled():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name = "my_crawler_name"
|
||||||
|
role = "arn:aws:iam::123456789012:role/Glue/Role"
|
||||||
|
database_name = "my_database_name"
|
||||||
|
description = "my crawler description"
|
||||||
|
targets = {
|
||||||
|
"S3Targets": [{"Path": "s3://my-source-bucket/"}],
|
||||||
|
"JdbcTargets": [],
|
||||||
|
"MongoDBTargets": [],
|
||||||
|
"DynamoDBTargets": [],
|
||||||
|
"CatalogTargets": [],
|
||||||
|
}
|
||||||
|
schedule = "cron(15 12 * * ? *)"
|
||||||
|
classifiers = []
|
||||||
|
table_prefix = "my_table_prefix_"
|
||||||
|
schema_change_policy = {
|
||||||
|
"UpdateBehavior": "LOG",
|
||||||
|
"DeleteBehavior": "LOG",
|
||||||
|
}
|
||||||
|
recrawl_policy = {"RecrawlBehavior": "CRAWL_NEW_FOLDERS_ONLY"}
|
||||||
|
lineage_configuration = {"CrawlerLineageSettings": "DISABLE"}
|
||||||
|
configuration = json.dumps(
|
||||||
|
{
|
||||||
|
"Version": 1.0,
|
||||||
|
"CrawlerOutput": {
|
||||||
|
"Partitions": {"AddOrUpdateBehavior": "InheritFromTable"},
|
||||||
|
},
|
||||||
|
"Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
crawler_security_configuration = "my_security_configuration"
|
||||||
|
tags = {"tag_key": "tag_value"}
|
||||||
|
helpers.create_crawler(
|
||||||
|
client,
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
targets,
|
||||||
|
database_name=database_name,
|
||||||
|
description=description,
|
||||||
|
schedule=schedule,
|
||||||
|
classifiers=classifiers,
|
||||||
|
table_prefix=table_prefix,
|
||||||
|
schema_change_policy=schema_change_policy,
|
||||||
|
recrawl_policy=recrawl_policy,
|
||||||
|
lineage_configuration=lineage_configuration,
|
||||||
|
configuration=configuration,
|
||||||
|
crawler_security_configuration=crawler_security_configuration,
|
||||||
|
tags=tags,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.get_crawler(Name=name)
|
||||||
|
crawler = response["Crawler"]
|
||||||
|
|
||||||
|
crawler.get("Name").should.equal(name)
|
||||||
|
crawler.get("Role").should.equal(role)
|
||||||
|
crawler.get("DatabaseName").should.equal(database_name)
|
||||||
|
crawler.get("Description").should.equal(description)
|
||||||
|
crawler.get("Targets").should.equal(targets)
|
||||||
|
crawler.get("Schedule").should.equal(
|
||||||
|
{"ScheduleExpression": schedule, "State": "SCHEDULED"}
|
||||||
|
)
|
||||||
|
crawler.get("Classifiers").should.equal(classifiers)
|
||||||
|
crawler.get("TablePrefix").should.equal(table_prefix)
|
||||||
|
crawler.get("SchemaChangePolicy").should.equal(schema_change_policy)
|
||||||
|
crawler.get("RecrawlPolicy").should.equal(recrawl_policy)
|
||||||
|
crawler.get("LineageConfiguration").should.equal(lineage_configuration)
|
||||||
|
crawler.get("Configuration").should.equal(configuration)
|
||||||
|
crawler.get("CrawlerSecurityConfiguration").should.equal(
|
||||||
|
crawler_security_configuration
|
||||||
|
)
|
||||||
|
|
||||||
|
crawler.get("State").should.equal("READY")
|
||||||
|
crawler.get("CrawlElapsedTime").should.equal(0)
|
||||||
|
crawler.get("Version").should.equal(1)
|
||||||
|
if not settings.TEST_SERVER_MODE:
|
||||||
|
crawler.get("CreationTime").should.equal(FROZEN_CREATE_TIME)
|
||||||
|
crawler.get("LastUpdated").should.equal(FROZEN_CREATE_TIME)
|
||||||
|
|
||||||
|
crawler.should.not_have.key("LastCrawl")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
@freeze_time(FROZEN_CREATE_TIME)
|
||||||
|
def test_create_crawler_unscheduled():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name = "my_crawler_name"
|
||||||
|
role = "arn:aws:iam::123456789012:role/Glue/Role"
|
||||||
|
database_name = "my_database_name"
|
||||||
|
description = "my crawler description"
|
||||||
|
targets = {
|
||||||
|
"S3Targets": [{"Path": "s3://my-source-bucket/"}],
|
||||||
|
"JdbcTargets": [],
|
||||||
|
"MongoDBTargets": [],
|
||||||
|
"DynamoDBTargets": [],
|
||||||
|
"CatalogTargets": [],
|
||||||
|
}
|
||||||
|
classifiers = []
|
||||||
|
table_prefix = "my_table_prefix_"
|
||||||
|
schema_change_policy = {
|
||||||
|
"UpdateBehavior": "LOG",
|
||||||
|
"DeleteBehavior": "LOG",
|
||||||
|
}
|
||||||
|
recrawl_policy = {"RecrawlBehavior": "CRAWL_NEW_FOLDERS_ONLY"}
|
||||||
|
lineage_configuration = {"CrawlerLineageSettings": "DISABLE"}
|
||||||
|
configuration = json.dumps(
|
||||||
|
{
|
||||||
|
"Version": 1.0,
|
||||||
|
"CrawlerOutput": {
|
||||||
|
"Partitions": {"AddOrUpdateBehavior": "InheritFromTable"},
|
||||||
|
},
|
||||||
|
"Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
crawler_security_configuration = "my_security_configuration"
|
||||||
|
tags = {"tag_key": "tag_value"}
|
||||||
|
helpers.create_crawler(
|
||||||
|
client,
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
targets,
|
||||||
|
database_name=database_name,
|
||||||
|
description=description,
|
||||||
|
classifiers=classifiers,
|
||||||
|
table_prefix=table_prefix,
|
||||||
|
schema_change_policy=schema_change_policy,
|
||||||
|
recrawl_policy=recrawl_policy,
|
||||||
|
lineage_configuration=lineage_configuration,
|
||||||
|
configuration=configuration,
|
||||||
|
crawler_security_configuration=crawler_security_configuration,
|
||||||
|
tags=tags,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.get_crawler(Name=name)
|
||||||
|
crawler = response["Crawler"]
|
||||||
|
|
||||||
|
crawler.get("Name").should.equal(name)
|
||||||
|
crawler.get("Role").should.equal(role)
|
||||||
|
crawler.get("DatabaseName").should.equal(database_name)
|
||||||
|
crawler.get("Description").should.equal(description)
|
||||||
|
crawler.get("Targets").should.equal(targets)
|
||||||
|
crawler.should.not_have.key("Schedule")
|
||||||
|
crawler.get("Classifiers").should.equal(classifiers)
|
||||||
|
crawler.get("TablePrefix").should.equal(table_prefix)
|
||||||
|
crawler.get("SchemaChangePolicy").should.equal(schema_change_policy)
|
||||||
|
crawler.get("RecrawlPolicy").should.equal(recrawl_policy)
|
||||||
|
crawler.get("LineageConfiguration").should.equal(lineage_configuration)
|
||||||
|
crawler.get("Configuration").should.equal(configuration)
|
||||||
|
crawler.get("CrawlerSecurityConfiguration").should.equal(
|
||||||
|
crawler_security_configuration
|
||||||
|
)
|
||||||
|
|
||||||
|
crawler.get("State").should.equal("READY")
|
||||||
|
crawler.get("CrawlElapsedTime").should.equal(0)
|
||||||
|
crawler.get("Version").should.equal(1)
|
||||||
|
if not settings.TEST_SERVER_MODE:
|
||||||
|
crawler.get("CreationTime").should.equal(FROZEN_CREATE_TIME)
|
||||||
|
crawler.get("LastUpdated").should.equal(FROZEN_CREATE_TIME)
|
||||||
|
|
||||||
|
crawler.should.not_have.key("LastCrawl")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_create_crawler_already_exists():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name = "my_crawler_name"
|
||||||
|
helpers.create_crawler(client, name)
|
||||||
|
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
helpers.create_crawler(client, name)
|
||||||
|
|
||||||
|
exc.value.response["Error"]["Code"].should.equal("AlreadyExistsException")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_get_crawler_not_exits():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name = "my_crawler_name"
|
||||||
|
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.get_crawler(Name=name)
|
||||||
|
|
||||||
|
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
|
||||||
|
exc.value.response["Error"]["Message"].should.match(
|
||||||
|
"Crawler my_crawler_name not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_get_crawlers_empty():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
response = client.get_crawlers()
|
||||||
|
response["Crawlers"].should.have.length_of(0)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_get_crawlers_several_items():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name_1, name_2 = "my_crawler_name_1", "my_crawler_name_2"
|
||||||
|
|
||||||
|
helpers.create_crawler(client, name_1)
|
||||||
|
helpers.create_crawler(client, name_2)
|
||||||
|
|
||||||
|
crawlers = sorted(client.get_crawlers()["Crawlers"], key=lambda x: x["Name"])
|
||||||
|
crawlers.should.have.length_of(2)
|
||||||
|
crawlers[0].get("Name").should.equal(name_1)
|
||||||
|
crawlers[1].get("Name").should.equal(name_2)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_delete_crawler():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name = "my_crawler_name"
|
||||||
|
helpers.create_crawler(client, name)
|
||||||
|
|
||||||
|
result = client.delete_crawler(Name=name)
|
||||||
|
result["ResponseMetadata"]["HTTPStatusCode"].should.equal(200)
|
||||||
|
|
||||||
|
# confirm crawler is deleted
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.get_crawler(Name=name)
|
||||||
|
|
||||||
|
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
|
||||||
|
exc.value.response["Error"]["Message"].should.match(
|
||||||
|
"Crawler my_crawler_name not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_delete_crawler_not_exists():
|
||||||
|
client = boto3.client("glue", region_name="us-east-1")
|
||||||
|
name = "my_crawler_name"
|
||||||
|
|
||||||
|
with pytest.raises(ClientError) as exc:
|
||||||
|
client.delete_crawler(Name=name)
|
||||||
|
|
||||||
|
exc.value.response["Error"]["Code"].should.equal("EntityNotFoundException")
|
||||||
|
exc.value.response["Error"]["Message"].should.match(
|
||||||
|
"Crawler my_crawler_name not found"
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user