Batch simple jobs inform log stream name (#5825)

This commit is contained in:
Roc Granada Verdú 2023-01-11 21:45:09 +01:00 committed by GitHub
parent c5a65448f8
commit 6da12892a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 23 additions and 8 deletions

View File

@ -473,6 +473,8 @@ class Job(threading.Thread, BaseModel, DockerModel, ManagedState):
self.name = "MOTO-BATCH-" + self.job_id self.name = "MOTO-BATCH-" + self.job_id
self._log_backend = log_backend self._log_backend = log_backend
self._log_group = "/aws/batch/job"
self._stream_name = f"{self.job_definition.name}/default/{self.job_id}"
self.log_stream_name: Optional[str] = None self.log_stream_name: Optional[str] = None
self.attempts: List[Dict[str, Any]] = [] self.attempts: List[Dict[str, Any]] = []
@ -729,12 +731,12 @@ class Job(threading.Thread, BaseModel, DockerModel, ManagedState):
logs = sorted(logs, key=lambda log: log["timestamp"]) logs = sorted(logs, key=lambda log: log["timestamp"])
# Send to cloudwatch # Send to cloudwatch
log_group = "/aws/batch/job" self.log_stream_name = self._stream_name
stream_name = f"{self.job_definition.name}/default/{self.job_id}" self._log_backend.ensure_log_group(self._log_group, None)
self.log_stream_name = stream_name self._log_backend.create_log_stream(self._log_group, self._stream_name)
self._log_backend.ensure_log_group(log_group, None) self._log_backend.put_log_events(
self._log_backend.create_log_stream(log_group, stream_name) self._log_group, self._stream_name, logs
self._log_backend.put_log_events(log_group, stream_name, logs) )
result = container.wait() or {} result = container.wait() or {}
exit_code = result.get("StatusCode", 0) exit_code = result.get("StatusCode", 0)

View File

@ -79,6 +79,7 @@ class BatchSimpleBackend(BaseBackend):
# We don't want to actually run the job - just mark it as succeeded # We don't want to actually run the job - just mark it as succeeded
job.job_started_at = datetime.datetime.now() job.job_started_at = datetime.datetime.now()
job.log_stream_name = job._stream_name
job._start_attempt() job._start_attempt()
job._mark_stopped(success=True) job._mark_stopped(success=True)

View File

@ -234,6 +234,7 @@ def test_terminate_job():
resp["jobs"][0]["jobName"].should.equal("test1") resp["jobs"][0]["jobName"].should.equal("test1")
resp["jobs"][0]["status"].should.equal("FAILED") resp["jobs"][0]["status"].should.equal("FAILED")
resp["jobs"][0]["statusReason"].should.equal("test_terminate") resp["jobs"][0]["statusReason"].should.equal("test_terminate")
resp["jobs"][0]["container"].should.have.key("logStreamName")
ls_name = f"{job_def_name}/default/{job_id}" ls_name = f"{job_def_name}/default/{job_id}"
@ -308,6 +309,7 @@ def test_cancel_pending_job():
resp = batch_client.describe_jobs(jobs=[job_id]) resp = batch_client.describe_jobs(jobs=[job_id])
resp["jobs"][0]["jobName"].should.equal("test_job_name") resp["jobs"][0]["jobName"].should.equal("test_job_name")
resp["jobs"][0]["statusReason"].should.equal("test_cancel") resp["jobs"][0]["statusReason"].should.equal("test_cancel")
resp["jobs"][0]["container"].shouldnt.have.key("logStreamName")
@mock_logs @mock_logs
@ -341,6 +343,7 @@ def test_cancel_running_job():
resp = batch_client.describe_jobs(jobs=[job_id]) resp = batch_client.describe_jobs(jobs=[job_id])
resp["jobs"][0]["jobName"].should.equal("test_job_name") resp["jobs"][0]["jobName"].should.equal("test_job_name")
resp["jobs"][0].shouldnt.have.key("statusReason") resp["jobs"][0].shouldnt.have.key("statusReason")
resp["jobs"][0]["container"].should.have.key("logStreamName")
@mock_batch @mock_batch
@ -399,7 +402,7 @@ def test_failed_job():
_, _, _, iam_arn = _setup(ec2_client, iam_client) _, _, _, iam_arn = _setup(ec2_client, iam_client)
job_def_name = "exit-1" job_def_name = "exit-1"
commands = ["exit", "1"] commands = ["kill"]
job_def_arn, queue_arn = prepare_job(batch_client, commands, iam_arn, job_def_name) job_def_arn, queue_arn = prepare_job(batch_client, commands, iam_arn, job_def_name)
resp = batch_client.submit_job( resp = batch_client.submit_job(
@ -413,6 +416,7 @@ def test_failed_job():
resp = batch_client.describe_jobs(jobs=[job_id]) resp = batch_client.describe_jobs(jobs=[job_id])
if resp["jobs"][0]["status"] == "FAILED": if resp["jobs"][0]["status"] == "FAILED":
resp["jobs"][0]["container"].should.have.key("logStreamName")
break break
if resp["jobs"][0]["status"] == "SUCCEEDED": if resp["jobs"][0]["status"] == "SUCCEEDED":
raise RuntimeError("Batch job succeeded even though it had exit code 1") raise RuntimeError("Batch job succeeded even though it had exit code 1")
@ -552,7 +556,7 @@ def test_failed_dependencies():
"image": "busybox:latest", "image": "busybox:latest",
"vcpus": 1, "vcpus": 1,
"memory": 128, "memory": 128,
"command": ["exit", "1"], "command": ["kill"],
}, },
) )
job_def_arn_failure = resp["jobDefinitionArn"] job_def_arn_failure = resp["jobDefinitionArn"]
@ -592,6 +596,13 @@ def test_failed_dependencies():
assert resp["jobs"][1]["status"] != "SUCCEEDED", "Job 3 cannot succeed" assert resp["jobs"][1]["status"] != "SUCCEEDED", "Job 3 cannot succeed"
if resp["jobs"][1]["status"] == "FAILED": if resp["jobs"][1]["status"] == "FAILED":
assert resp["jobs"][0]["container"].should.have.key(
"logStreamName"
), "Job 2 should have logStreamName because it FAILED but was in RUNNING state"
assert resp["jobs"][1]["container"].shouldnt.have.key(
"logStreamName"
), "Job 3 shouldn't have logStreamName because it was never in RUNNING state"
break break
time.sleep(0.5) time.sleep(0.5)

View File

@ -70,6 +70,7 @@ def test_submit_job_by_name():
job["status"].should.equal("SUCCEEDED") job["status"].should.equal("SUCCEEDED")
job.should.contain("container") job.should.contain("container")
job["container"].should.contain("command") job["container"].should.contain("command")
job["container"].should.contain("logStreamName")
@mock_batch_simple @mock_batch_simple