list crawler implementation (#5123)
This commit is contained in:
parent
31737bc81e
commit
1cb2c80bf2
@ -24,6 +24,12 @@ from ..utilities.paginator import paginate
|
||||
|
||||
class GlueBackend(BaseBackend):
|
||||
PAGINATION_MODEL = {
|
||||
"list_crawlers": {
|
||||
"input_token": "next_token",
|
||||
"limit_key": "max_results",
|
||||
"limit_default": 100,
|
||||
"unique_attribute": "name",
|
||||
},
|
||||
"list_jobs": {
|
||||
"input_token": "next_token",
|
||||
"limit_key": "max_results",
|
||||
@ -143,6 +149,10 @@ class GlueBackend(BaseBackend):
|
||||
def get_crawlers(self):
|
||||
return [self.crawlers[key] for key in self.crawlers] if self.crawlers else []
|
||||
|
||||
@paginate(pagination_model=PAGINATION_MODEL)
|
||||
def list_crawlers(self):
|
||||
return [crawler for _, crawler in self.crawlers.items()]
|
||||
|
||||
def start_crawler(self, name):
|
||||
crawler = self.get_crawler(name)
|
||||
crawler.start_crawler()
|
||||
@ -380,6 +390,9 @@ class FakeCrawler(BaseModel):
|
||||
self.crawl_elapsed_time = 0
|
||||
self.last_crawl_info = None
|
||||
|
||||
def get_name(self):
|
||||
return self.name
|
||||
|
||||
def as_dict(self):
|
||||
last_crawl = self.last_crawl_info.as_dict() if self.last_crawl_info else None
|
||||
data = {
|
||||
|
@ -307,6 +307,30 @@ class GlueResponse(BaseResponse):
|
||||
crawlers = self.glue_backend.get_crawlers()
|
||||
return json.dumps({"Crawlers": [crawler.as_dict() for crawler in crawlers]})
|
||||
|
||||
def list_crawlers(self):
|
||||
next_token = self._get_param("NextToken")
|
||||
max_results = self._get_int_param("MaxResults")
|
||||
tags = self._get_param("Tags")
|
||||
crawlers, next_token = self.glue_backend.list_crawlers(
|
||||
next_token=next_token, max_results=max_results
|
||||
)
|
||||
filtered_crawler_names = self.filter_crawlers_by_tags(crawlers, tags)
|
||||
return json.dumps(
|
||||
dict(
|
||||
CrawlerNames=[crawler_name for crawler_name in filtered_crawler_names],
|
||||
NextToken=next_token,
|
||||
)
|
||||
)
|
||||
|
||||
def filter_crawlers_by_tags(self, crawlers, tags):
|
||||
if not tags:
|
||||
return [crawler.get_name() for crawler in crawlers]
|
||||
return [
|
||||
crawler.get_name()
|
||||
for crawler in crawlers
|
||||
if self.is_tags_match(crawler.tags, tags)
|
||||
]
|
||||
|
||||
def start_crawler(self):
|
||||
name = self.parameters.get("Name")
|
||||
self.glue_backend.start_crawler(name)
|
||||
@ -402,9 +426,9 @@ class GlueResponse(BaseResponse):
|
||||
return [job.get_name() for job in jobs if self.is_tags_match(job.tags, tags)]
|
||||
|
||||
@staticmethod
|
||||
def is_tags_match(job_tags, tags):
|
||||
mutual_keys = set(job_tags).intersection(tags)
|
||||
def is_tags_match(glue_resource_tags, tags):
|
||||
mutual_keys = set(glue_resource_tags).intersection(tags)
|
||||
for key in mutual_keys:
|
||||
if job_tags[key] == tags[key]:
|
||||
if glue_resource_tags[key] == tags[key]:
|
||||
return True
|
||||
return False
|
||||
|
@ -193,7 +193,7 @@ def test_list_jobs_with_tags():
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_next_token_logic_does_not_create_infinite_loop():
|
||||
def test_list_jobs_next_token_logic_does_not_create_infinite_loop():
|
||||
client = create_glue_client()
|
||||
create_test_jobs(client, 4)
|
||||
first_response = client.list_jobs(MaxResults=1)
|
||||
@ -228,3 +228,66 @@ def create_test_job_w_all_attributes(client, **job_attributes):
|
||||
def create_test_jobs(client, number_of_jobs):
|
||||
for _ in range(number_of_jobs):
|
||||
create_test_job(client)
|
||||
|
||||
|
||||
def create_test_crawler(client, tags=None):
|
||||
crawler_name = str(uuid4())
|
||||
client.create_crawler(
|
||||
Name=crawler_name,
|
||||
Role="test_role",
|
||||
Targets={"S3Targets": [{"Path": "s3://tests3target"}]},
|
||||
Tags=tags or {},
|
||||
)
|
||||
return crawler_name
|
||||
|
||||
|
||||
def create_test_crawlers(client, number_of_crawlers):
|
||||
for _ in range(number_of_crawlers):
|
||||
create_test_crawler(client)
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_list_crawlers_with_max_results():
|
||||
client = create_glue_client()
|
||||
create_test_crawlers(client, 4)
|
||||
response = client.list_crawlers(MaxResults=2)
|
||||
response["CrawlerNames"].should.have.length_of(2)
|
||||
response.should.have.key("NextToken")
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_list_crawlers_from_next_token():
|
||||
client = create_glue_client()
|
||||
create_test_crawlers(client, 10)
|
||||
first_response = client.list_crawlers(MaxResults=3)
|
||||
response = client.list_crawlers(NextToken=first_response["NextToken"])
|
||||
response["CrawlerNames"].should.have.length_of(7)
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_list_crawlers_with_max_results_greater_than_actual_results():
|
||||
client = create_glue_client()
|
||||
create_test_crawlers(client, 4)
|
||||
response = client.list_crawlers(MaxResults=10)
|
||||
response["CrawlerNames"].should.have.length_of(4)
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_list_crawlers_with_tags():
|
||||
client = create_glue_client()
|
||||
create_test_crawler(client)
|
||||
create_test_crawler(client, {"string": "string"})
|
||||
response = client.list_crawlers(Tags={"string": "string"})
|
||||
response["CrawlerNames"].should.have.length_of(1)
|
||||
|
||||
|
||||
@mock_glue
|
||||
def test_list_crawlers_next_token_logic_does_not_create_infinite_loop():
|
||||
client = create_glue_client()
|
||||
create_test_crawlers(client, 4)
|
||||
first_response = client.list_crawlers(MaxResults=1)
|
||||
next_token = first_response["NextToken"]
|
||||
while next_token:
|
||||
response = client.list_crawlers(NextToken=next_token)
|
||||
next_token = response.get("NextToken")
|
||||
assert not next_token
|
||||
|
Loading…
Reference in New Issue
Block a user