list crawler implementation (#5123)
This commit is contained in:
parent
31737bc81e
commit
1cb2c80bf2
@ -24,6 +24,12 @@ from ..utilities.paginator import paginate
|
|||||||
|
|
||||||
class GlueBackend(BaseBackend):
|
class GlueBackend(BaseBackend):
|
||||||
PAGINATION_MODEL = {
|
PAGINATION_MODEL = {
|
||||||
|
"list_crawlers": {
|
||||||
|
"input_token": "next_token",
|
||||||
|
"limit_key": "max_results",
|
||||||
|
"limit_default": 100,
|
||||||
|
"unique_attribute": "name",
|
||||||
|
},
|
||||||
"list_jobs": {
|
"list_jobs": {
|
||||||
"input_token": "next_token",
|
"input_token": "next_token",
|
||||||
"limit_key": "max_results",
|
"limit_key": "max_results",
|
||||||
@ -143,6 +149,10 @@ class GlueBackend(BaseBackend):
|
|||||||
def get_crawlers(self):
|
def get_crawlers(self):
|
||||||
return [self.crawlers[key] for key in self.crawlers] if self.crawlers else []
|
return [self.crawlers[key] for key in self.crawlers] if self.crawlers else []
|
||||||
|
|
||||||
|
@paginate(pagination_model=PAGINATION_MODEL)
|
||||||
|
def list_crawlers(self):
|
||||||
|
return [crawler for _, crawler in self.crawlers.items()]
|
||||||
|
|
||||||
def start_crawler(self, name):
|
def start_crawler(self, name):
|
||||||
crawler = self.get_crawler(name)
|
crawler = self.get_crawler(name)
|
||||||
crawler.start_crawler()
|
crawler.start_crawler()
|
||||||
@ -380,6 +390,9 @@ class FakeCrawler(BaseModel):
|
|||||||
self.crawl_elapsed_time = 0
|
self.crawl_elapsed_time = 0
|
||||||
self.last_crawl_info = None
|
self.last_crawl_info = None
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
last_crawl = self.last_crawl_info.as_dict() if self.last_crawl_info else None
|
last_crawl = self.last_crawl_info.as_dict() if self.last_crawl_info else None
|
||||||
data = {
|
data = {
|
||||||
|
@ -307,6 +307,30 @@ class GlueResponse(BaseResponse):
|
|||||||
crawlers = self.glue_backend.get_crawlers()
|
crawlers = self.glue_backend.get_crawlers()
|
||||||
return json.dumps({"Crawlers": [crawler.as_dict() for crawler in crawlers]})
|
return json.dumps({"Crawlers": [crawler.as_dict() for crawler in crawlers]})
|
||||||
|
|
||||||
|
def list_crawlers(self):
|
||||||
|
next_token = self._get_param("NextToken")
|
||||||
|
max_results = self._get_int_param("MaxResults")
|
||||||
|
tags = self._get_param("Tags")
|
||||||
|
crawlers, next_token = self.glue_backend.list_crawlers(
|
||||||
|
next_token=next_token, max_results=max_results
|
||||||
|
)
|
||||||
|
filtered_crawler_names = self.filter_crawlers_by_tags(crawlers, tags)
|
||||||
|
return json.dumps(
|
||||||
|
dict(
|
||||||
|
CrawlerNames=[crawler_name for crawler_name in filtered_crawler_names],
|
||||||
|
NextToken=next_token,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def filter_crawlers_by_tags(self, crawlers, tags):
|
||||||
|
if not tags:
|
||||||
|
return [crawler.get_name() for crawler in crawlers]
|
||||||
|
return [
|
||||||
|
crawler.get_name()
|
||||||
|
for crawler in crawlers
|
||||||
|
if self.is_tags_match(crawler.tags, tags)
|
||||||
|
]
|
||||||
|
|
||||||
def start_crawler(self):
|
def start_crawler(self):
|
||||||
name = self.parameters.get("Name")
|
name = self.parameters.get("Name")
|
||||||
self.glue_backend.start_crawler(name)
|
self.glue_backend.start_crawler(name)
|
||||||
@ -402,9 +426,9 @@ class GlueResponse(BaseResponse):
|
|||||||
return [job.get_name() for job in jobs if self.is_tags_match(job.tags, tags)]
|
return [job.get_name() for job in jobs if self.is_tags_match(job.tags, tags)]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_tags_match(job_tags, tags):
|
def is_tags_match(glue_resource_tags, tags):
|
||||||
mutual_keys = set(job_tags).intersection(tags)
|
mutual_keys = set(glue_resource_tags).intersection(tags)
|
||||||
for key in mutual_keys:
|
for key in mutual_keys:
|
||||||
if job_tags[key] == tags[key]:
|
if glue_resource_tags[key] == tags[key]:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
@ -193,7 +193,7 @@ def test_list_jobs_with_tags():
|
|||||||
|
|
||||||
|
|
||||||
@mock_glue
|
@mock_glue
|
||||||
def test_next_token_logic_does_not_create_infinite_loop():
|
def test_list_jobs_next_token_logic_does_not_create_infinite_loop():
|
||||||
client = create_glue_client()
|
client = create_glue_client()
|
||||||
create_test_jobs(client, 4)
|
create_test_jobs(client, 4)
|
||||||
first_response = client.list_jobs(MaxResults=1)
|
first_response = client.list_jobs(MaxResults=1)
|
||||||
@ -228,3 +228,66 @@ def create_test_job_w_all_attributes(client, **job_attributes):
|
|||||||
def create_test_jobs(client, number_of_jobs):
|
def create_test_jobs(client, number_of_jobs):
|
||||||
for _ in range(number_of_jobs):
|
for _ in range(number_of_jobs):
|
||||||
create_test_job(client)
|
create_test_job(client)
|
||||||
|
|
||||||
|
|
||||||
|
def create_test_crawler(client, tags=None):
|
||||||
|
crawler_name = str(uuid4())
|
||||||
|
client.create_crawler(
|
||||||
|
Name=crawler_name,
|
||||||
|
Role="test_role",
|
||||||
|
Targets={"S3Targets": [{"Path": "s3://tests3target"}]},
|
||||||
|
Tags=tags or {},
|
||||||
|
)
|
||||||
|
return crawler_name
|
||||||
|
|
||||||
|
|
||||||
|
def create_test_crawlers(client, number_of_crawlers):
|
||||||
|
for _ in range(number_of_crawlers):
|
||||||
|
create_test_crawler(client)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_list_crawlers_with_max_results():
|
||||||
|
client = create_glue_client()
|
||||||
|
create_test_crawlers(client, 4)
|
||||||
|
response = client.list_crawlers(MaxResults=2)
|
||||||
|
response["CrawlerNames"].should.have.length_of(2)
|
||||||
|
response.should.have.key("NextToken")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_list_crawlers_from_next_token():
|
||||||
|
client = create_glue_client()
|
||||||
|
create_test_crawlers(client, 10)
|
||||||
|
first_response = client.list_crawlers(MaxResults=3)
|
||||||
|
response = client.list_crawlers(NextToken=first_response["NextToken"])
|
||||||
|
response["CrawlerNames"].should.have.length_of(7)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_list_crawlers_with_max_results_greater_than_actual_results():
|
||||||
|
client = create_glue_client()
|
||||||
|
create_test_crawlers(client, 4)
|
||||||
|
response = client.list_crawlers(MaxResults=10)
|
||||||
|
response["CrawlerNames"].should.have.length_of(4)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_list_crawlers_with_tags():
|
||||||
|
client = create_glue_client()
|
||||||
|
create_test_crawler(client)
|
||||||
|
create_test_crawler(client, {"string": "string"})
|
||||||
|
response = client.list_crawlers(Tags={"string": "string"})
|
||||||
|
response["CrawlerNames"].should.have.length_of(1)
|
||||||
|
|
||||||
|
|
||||||
|
@mock_glue
|
||||||
|
def test_list_crawlers_next_token_logic_does_not_create_infinite_loop():
|
||||||
|
client = create_glue_client()
|
||||||
|
create_test_crawlers(client, 4)
|
||||||
|
first_response = client.list_crawlers(MaxResults=1)
|
||||||
|
next_token = first_response["NextToken"]
|
||||||
|
while next_token:
|
||||||
|
response = client.list_crawlers(NextToken=next_token)
|
||||||
|
next_token = response.get("NextToken")
|
||||||
|
assert not next_token
|
||||||
|
Loading…
Reference in New Issue
Block a user