From 65e33b947e1539d42f2bde12380de5106239e876 Mon Sep 17 00:00:00 2001 From: odncode Date: Fri, 22 May 2026 14:01:32 +0100 Subject: [PATCH 1/2] fix: make IAM role optional for AWS Batch job submission On EC2 compute environments that use instance profiles, jobRoleArn is not required. The AWS Batch API documents it as optional, but Metaflow raised unconditionally when iam_role was None. Two changes: - Remove the unconditional raise in BatchJob.execute() when iam_role is None - Make jobRoleArn conditional in _register_job_definition so None is not passed to boto3 (which rejects None for string fields) Closes #3208 --- metaflow/plugins/aws/batch/batch_client.py | 8 +-- test/unit/test_batch_optional_iam_role.py | 79 ++++++++++++++++++++++ 2 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 test/unit/test_batch_optional_iam_role.py diff --git a/metaflow/plugins/aws/batch/batch_client.py b/metaflow/plugins/aws/batch/batch_client.py index 8675ad4a7a9..9a7016b1211 100644 --- a/metaflow/plugins/aws/batch/batch_client.py +++ b/metaflow/plugins/aws/batch/batch_client.py @@ -79,11 +79,7 @@ def execute(self): raise BatchJobException( "Unable to launch AWS Batch job. No docker image specified." ) - if self._iam_role is None: - raise BatchJobException( - "Unable to launch AWS Batch job. No IAM role specified." - ) - + # Multinode if getattr(self, "num_parallel", 0) >= 1: num_nodes = self.num_parallel @@ -188,7 +184,7 @@ def _register_job_definition( "type": "container", "containerProperties": { "image": image, - "jobRoleArn": job_role, + **({"jobRoleArn": job_role} if job_role else {}), "command": ["echo", "hello world"], "resourceRequirements": [ {"value": "1", "type": "VCPU"}, diff --git a/test/unit/test_batch_optional_iam_role.py b/test/unit/test_batch_optional_iam_role.py new file mode 100644 index 00000000000..11542d0cf1d --- /dev/null +++ b/test/unit/test_batch_optional_iam_role.py @@ -0,0 +1,79 @@ +""" +Regression test for optional IAM role in AWS Batch job submission. + +On EC2 compute environments that use instance profiles for container +credentials, jobRoleArn is not required. The AWS Batch API documents +it as optional, but Metaflow raised unconditionally when it was None. + +See: https://github.com/Netflix/metaflow/issues/3208 +""" + +import pytest + +from metaflow.plugins.aws.batch.batch_client import BatchJob + + +def test_execute_does_not_raise_when_iam_role_is_none(): + """ + BatchJob.execute() should not raise when iam_role is None. + EC2 compute environments can use instance profiles instead. + """ + job = BatchJob.__new__(BatchJob) + job.payload = { + "containerOverrides": {}, + "tags": {}, + } + job._image = "python:3.10" + job._iam_role = None + job.num_parallel = 0 + + # execute() will fail later (no client, no queue, etc.) but it + # must NOT raise "No IAM role specified" — that's the bug. + # We catch any exception and check it's not the IAM one. + try: + job.execute() + except Exception as e: + assert "No IAM role specified" not in str(e), ( + "execute() still raises when iam_role is None — " + "EC2 instance profile credentials should be allowed" + ) + + +def test_execute_still_requires_image(): + """ + The docker image check should remain — only the IAM role + check was removed. + """ + job = BatchJob.__new__(BatchJob) + job.payload = { + "containerOverrides": {}, + "tags": {}, + } + job._image = None + job._iam_role = None + + with pytest.raises(Exception, match="No docker image specified"): + job.execute() + + +def test_job_definition_omits_job_role_arn_when_none(): + """ + When job_role is None, jobRoleArn should not be present in the + job definition at all. boto3 rejects None for string fields. + """ + from pathlib import Path + + batch_client_path = Path(__file__).resolve().parents[2] / ( + "metaflow/plugins/aws/batch/batch_client.py" + ) + source_text = batch_client_path.read_text() + + assert "jobRoleArn" in source_text, ( + "jobRoleArn reference not found in batch_client.py" + ) + # The fix uses conditional dict unpacking: **({"jobRoleArn": ...} if ... else {}) + # If jobRoleArn is assigned unconditionally, this pattern won't be present + assert "if job_role" in source_text, ( + "jobRoleArn is not conditionally included — it will be passed as None " + "to boto3, which rejects None for string fields" + ) \ No newline at end of file From fcece71914b66d000479c974c6ffca13aeb164be Mon Sep 17 00:00:00 2001 From: odncode Date: Tue, 9 Jun 2026 08:05:27 +0100 Subject: [PATCH 2/2] style: fix pre-commit formatting (black, EOF, trailing whitespace) Signed-off-by: odncode --- metaflow/plugins/aws/batch/batch_client.py | 2 +- test/unit/test_batch_optional_iam_role.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metaflow/plugins/aws/batch/batch_client.py b/metaflow/plugins/aws/batch/batch_client.py index 9a7016b1211..f782ed4133a 100644 --- a/metaflow/plugins/aws/batch/batch_client.py +++ b/metaflow/plugins/aws/batch/batch_client.py @@ -79,7 +79,7 @@ def execute(self): raise BatchJobException( "Unable to launch AWS Batch job. No docker image specified." ) - + # Multinode if getattr(self, "num_parallel", 0) >= 1: num_nodes = self.num_parallel diff --git a/test/unit/test_batch_optional_iam_role.py b/test/unit/test_batch_optional_iam_role.py index 11542d0cf1d..d75b4e8ab77 100644 --- a/test/unit/test_batch_optional_iam_role.py +++ b/test/unit/test_batch_optional_iam_role.py @@ -68,12 +68,12 @@ def test_job_definition_omits_job_role_arn_when_none(): ) source_text = batch_client_path.read_text() - assert "jobRoleArn" in source_text, ( - "jobRoleArn reference not found in batch_client.py" - ) + assert ( + "jobRoleArn" in source_text + ), "jobRoleArn reference not found in batch_client.py" # The fix uses conditional dict unpacking: **({"jobRoleArn": ...} if ... else {}) # If jobRoleArn is assigned unconditionally, this pattern won't be present assert "if job_role" in source_text, ( "jobRoleArn is not conditionally included — it will be passed as None " "to boto3, which rejects None for string fields" - ) \ No newline at end of file + )