From 1cb2c80bf2cc5141ec6814d48085b41340f0e312 Mon Sep 17 00:00:00 2001 From: joshuaghezzi <104036091+joshuaghezzi@users.noreply.github.com> Date: Thu, 12 May 2022 22:17:17 +1200 Subject: [PATCH] list crawler implementation (#5123) --- moto/glue/models.py | 13 ++++++++ moto/glue/responses.py | 30 +++++++++++++++-- tests/test_glue/test_glue.py | 65 +++++++++++++++++++++++++++++++++++- 3 files changed, 104 insertions(+), 4 deletions(-) diff --git a/moto/glue/models.py b/moto/glue/models.py index 0c00d817d..08c0542a8 100644 --- a/moto/glue/models.py +++ b/moto/glue/models.py @@ -24,6 +24,12 @@ from ..utilities.paginator import paginate class GlueBackend(BaseBackend): PAGINATION_MODEL = { + "list_crawlers": { + "input_token": "next_token", + "limit_key": "max_results", + "limit_default": 100, + "unique_attribute": "name", + }, "list_jobs": { "input_token": "next_token", "limit_key": "max_results", @@ -143,6 +149,10 @@ class GlueBackend(BaseBackend): def get_crawlers(self): return [self.crawlers[key] for key in self.crawlers] if self.crawlers else [] + @paginate(pagination_model=PAGINATION_MODEL) + def list_crawlers(self): + return [crawler for _, crawler in self.crawlers.items()] + def start_crawler(self, name): crawler = self.get_crawler(name) crawler.start_crawler() @@ -380,6 +390,9 @@ class FakeCrawler(BaseModel): self.crawl_elapsed_time = 0 self.last_crawl_info = None + def get_name(self): + return self.name + def as_dict(self): last_crawl = self.last_crawl_info.as_dict() if self.last_crawl_info else None data = { diff --git a/moto/glue/responses.py b/moto/glue/responses.py index 78a97c651..3160c84ba 100644 --- a/moto/glue/responses.py +++ b/moto/glue/responses.py @@ -307,6 +307,30 @@ class GlueResponse(BaseResponse): crawlers = self.glue_backend.get_crawlers() return json.dumps({"Crawlers": [crawler.as_dict() for crawler in crawlers]}) + def list_crawlers(self): + next_token = self._get_param("NextToken") + max_results = self._get_int_param("MaxResults") + tags = self._get_param("Tags") + crawlers, next_token = self.glue_backend.list_crawlers( + next_token=next_token, max_results=max_results + ) + filtered_crawler_names = self.filter_crawlers_by_tags(crawlers, tags) + return json.dumps( + dict( + CrawlerNames=[crawler_name for crawler_name in filtered_crawler_names], + NextToken=next_token, + ) + ) + + def filter_crawlers_by_tags(self, crawlers, tags): + if not tags: + return [crawler.get_name() for crawler in crawlers] + return [ + crawler.get_name() + for crawler in crawlers + if self.is_tags_match(crawler.tags, tags) + ] + def start_crawler(self): name = self.parameters.get("Name") self.glue_backend.start_crawler(name) @@ -402,9 +426,9 @@ class GlueResponse(BaseResponse): return [job.get_name() for job in jobs if self.is_tags_match(job.tags, tags)] @staticmethod - def is_tags_match(job_tags, tags): - mutual_keys = set(job_tags).intersection(tags) + def is_tags_match(glue_resource_tags, tags): + mutual_keys = set(glue_resource_tags).intersection(tags) for key in mutual_keys: - if job_tags[key] == tags[key]: + if glue_resource_tags[key] == tags[key]: return True return False diff --git a/tests/test_glue/test_glue.py b/tests/test_glue/test_glue.py index 8d39550e5..fa2395bd2 100644 --- a/tests/test_glue/test_glue.py +++ b/tests/test_glue/test_glue.py @@ -193,7 +193,7 @@ def test_list_jobs_with_tags(): @mock_glue -def test_next_token_logic_does_not_create_infinite_loop(): +def test_list_jobs_next_token_logic_does_not_create_infinite_loop(): client = create_glue_client() create_test_jobs(client, 4) first_response = client.list_jobs(MaxResults=1) @@ -228,3 +228,66 @@ def create_test_job_w_all_attributes(client, **job_attributes): def create_test_jobs(client, number_of_jobs): for _ in range(number_of_jobs): create_test_job(client) + + +def create_test_crawler(client, tags=None): + crawler_name = str(uuid4()) + client.create_crawler( + Name=crawler_name, + Role="test_role", + Targets={"S3Targets": [{"Path": "s3://tests3target"}]}, + Tags=tags or {}, + ) + return crawler_name + + +def create_test_crawlers(client, number_of_crawlers): + for _ in range(number_of_crawlers): + create_test_crawler(client) + + +@mock_glue +def test_list_crawlers_with_max_results(): + client = create_glue_client() + create_test_crawlers(client, 4) + response = client.list_crawlers(MaxResults=2) + response["CrawlerNames"].should.have.length_of(2) + response.should.have.key("NextToken") + + +@mock_glue +def test_list_crawlers_from_next_token(): + client = create_glue_client() + create_test_crawlers(client, 10) + first_response = client.list_crawlers(MaxResults=3) + response = client.list_crawlers(NextToken=first_response["NextToken"]) + response["CrawlerNames"].should.have.length_of(7) + + +@mock_glue +def test_list_crawlers_with_max_results_greater_than_actual_results(): + client = create_glue_client() + create_test_crawlers(client, 4) + response = client.list_crawlers(MaxResults=10) + response["CrawlerNames"].should.have.length_of(4) + + +@mock_glue +def test_list_crawlers_with_tags(): + client = create_glue_client() + create_test_crawler(client) + create_test_crawler(client, {"string": "string"}) + response = client.list_crawlers(Tags={"string": "string"}) + response["CrawlerNames"].should.have.length_of(1) + + +@mock_glue +def test_list_crawlers_next_token_logic_does_not_create_infinite_loop(): + client = create_glue_client() + create_test_crawlers(client, 4) + first_response = client.list_crawlers(MaxResults=1) + next_token = first_response["NextToken"] + while next_token: + response = client.list_crawlers(NextToken=next_token) + next_token = response.get("NextToken") + assert not next_token