From f9e298f01dce614434ce0ba48f27737fa9de0966 Mon Sep 17 00:00:00 2001 From: odncode Date: Fri, 22 May 2026 08:41:16 +0100 Subject: [PATCH 1/3] fix: apply user-defined aws_batch_tags regardless of BATCH_EMIT_TAGS User-defined tags (aws_batch_tags and METAFLOW_BATCH_DEFAULT_TAGS) were trapped inside the 'if BATCH_EMIT_TAGS:' guard, which controls Metaflow's internal observability tags. This caused user tags to be silently dropped when BATCH_EMIT_TAGS was not set. In enterprise deployments with mandatory AWS tagging policies (SCPs), this results in Batch job submission failures with no indication that the root cause is a missing Metaflow config flag. Fix: move the aws_batch_tags block outside the BATCH_EMIT_TAGS guard. Internal tags remain gated; user-defined tags are always applied. Includes an AST-based regression test that verifies aws_batch_tags is not referenced inside any BATCH_EMIT_TAGS conditional block. Closes #3209 --- metaflow/plugins/aws/batch/batch.py | 11 +++--- test/unit/test_batch_user_tags.py | 61 +++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 test/unit/test_batch_user_tags.py diff --git a/metaflow/plugins/aws/batch/batch.py b/metaflow/plugins/aws/batch/batch.py index 865ec9c1ea6..6c8be968453 100644 --- a/metaflow/plugins/aws/batch/batch.py +++ b/metaflow/plugins/aws/batch/batch.py @@ -388,7 +388,6 @@ def create_job( ]: if key in attrs: job.tag(key, attrs.get(key)) - # As some values can be affected by users, sanitize them so they adhere to AWS tagging restrictions. for key in [ "metaflow.version", "metaflow.user", @@ -398,10 +397,12 @@ def create_job( k, v = sanitize_batch_tag(key, attrs.get(key)) job.tag(k, v) - if aws_batch_tags is not None: - for key, value in aws_batch_tags.items(): - job.tag(key, value) - + # User-defined tags (e.g. for cost attribution or compliance) should + # always be applied, regardless of BATCH_EMIT_TAGS which controls + # Metaflow's internal observability tags. + if aws_batch_tags is not None: + for key, value in aws_batch_tags.items(): + job.tag(key, value) return job def launch_job( diff --git a/test/unit/test_batch_user_tags.py b/test/unit/test_batch_user_tags.py new file mode 100644 index 00000000000..7b15e73fb61 --- /dev/null +++ b/test/unit/test_batch_user_tags.py @@ -0,0 +1,61 @@ +""" +Regression test for user-defined AWS Batch tags. + +User-defined tags (aws_batch_tags) and METAFLOW_BATCH_DEFAULT_TAGS should +always be applied to Batch jobs, regardless of the BATCH_EMIT_TAGS setting. +BATCH_EMIT_TAGS controls only Metaflow's internal observability tags +(app=metaflow, metaflow.flow_name, etc.), not user-specified tags. + +See: https://github.com/Netflix/metaflow/issues/3209 +""" + +import ast +import inspect +import textwrap + +import pytest + + +def test_user_tags_not_inside_emit_tags_guard(): + """ + Verify that the aws_batch_tags block in batch.py is NOT inside + the BATCH_EMIT_TAGS conditional. This is a structural test that + checks the source code's AST to ensure user tags are always applied. + """ + from metaflow.plugins.aws.batch.batch import Batch + + source = inspect.getsource(Batch.create_job) + # dedent because inspect.getsource preserves class indentation + source = textwrap.dedent(source) + tree = ast.parse(source) + + # Find all If nodes that test BATCH_EMIT_TAGS + def find_emit_tags_guards(node): + """Find all 'if BATCH_EMIT_TAGS:' blocks and return their AST nodes.""" + guards = [] + for child in ast.walk(node): + if isinstance(child, ast.If): + # Check if the test is just 'BATCH_EMIT_TAGS' + test = child.test + if isinstance(test, ast.Name) and test.id == "BATCH_EMIT_TAGS": + guards.append(child) + return guards + + def contains_aws_batch_tags_usage(node): + """Check if an AST node contains reference to 'aws_batch_tags'.""" + for child in ast.walk(node): + if isinstance(child, ast.Name) and child.id == "aws_batch_tags": + return True + return False + + guards = find_emit_tags_guards(tree) + assert guards, "Could not find 'if BATCH_EMIT_TAGS:' block in create_job" + + for guard in guards: + # Check the body of each BATCH_EMIT_TAGS guard + for stmt in guard.body: + assert not contains_aws_batch_tags_usage(stmt), ( + "aws_batch_tags is used inside 'if BATCH_EMIT_TAGS:' block. " + "User-defined tags should be applied unconditionally, outside " + "the BATCH_EMIT_TAGS guard." + ) \ No newline at end of file From 56a7330afc7e2354a00505ea1e48ac402e36ec3e Mon Sep 17 00:00:00 2001 From: odncode Date: Fri, 22 May 2026 08:56:30 +0100 Subject: [PATCH 2/3] test: add positive assertion for aws_batch_tags presence Addresses review feedback: previous test only checked the negative (tags not inside guard) without verifying they exist at all. New test ensures aws_batch_tags is referenced in create_job, guarding against silent removal of the feature. --- test/unit/test_batch_user_tags.py | 78 ++++++++++++++++++------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/test/unit/test_batch_user_tags.py b/test/unit/test_batch_user_tags.py index 7b15e73fb61..b4e8d96db3e 100644 --- a/test/unit/test_batch_user_tags.py +++ b/test/unit/test_batch_user_tags.py @@ -16,46 +16,60 @@ import pytest -def test_user_tags_not_inside_emit_tags_guard(): - """ - Verify that the aws_batch_tags block in batch.py is NOT inside - the BATCH_EMIT_TAGS conditional. This is a structural test that - checks the source code's AST to ensure user tags are always applied. - """ +def _get_create_job_ast(): + """Parse create_job source into an AST tree.""" from metaflow.plugins.aws.batch.batch import Batch source = inspect.getsource(Batch.create_job) - # dedent because inspect.getsource preserves class indentation source = textwrap.dedent(source) - tree = ast.parse(source) - - # Find all If nodes that test BATCH_EMIT_TAGS - def find_emit_tags_guards(node): - """Find all 'if BATCH_EMIT_TAGS:' blocks and return their AST nodes.""" - guards = [] - for child in ast.walk(node): - if isinstance(child, ast.If): - # Check if the test is just 'BATCH_EMIT_TAGS' - test = child.test - if isinstance(test, ast.Name) and test.id == "BATCH_EMIT_TAGS": - guards.append(child) - return guards - - def contains_aws_batch_tags_usage(node): - """Check if an AST node contains reference to 'aws_batch_tags'.""" - for child in ast.walk(node): - if isinstance(child, ast.Name) and child.id == "aws_batch_tags": - return True - return False - - guards = find_emit_tags_guards(tree) + return ast.parse(source) + + +def _find_emit_tags_guards(tree): + """Find all 'if BATCH_EMIT_TAGS:' blocks.""" + guards = [] + for child in ast.walk(tree): + if isinstance(child, ast.If): + test = child.test + if isinstance(test, ast.Name) and test.id == "BATCH_EMIT_TAGS": + guards.append(child) + return guards + + +def _contains_name(node, name): + """Check if an AST node contains a reference to the given name.""" + for child in ast.walk(node): + if isinstance(child, ast.Name) and child.id == name: + return True + return False + + +def test_user_tags_not_inside_emit_tags_guard(): + """ + Verify that the aws_batch_tags block in batch.py is NOT inside + the BATCH_EMIT_TAGS conditional. + """ + tree = _get_create_job_ast() + guards = _find_emit_tags_guards(tree) assert guards, "Could not find 'if BATCH_EMIT_TAGS:' block in create_job" for guard in guards: - # Check the body of each BATCH_EMIT_TAGS guard for stmt in guard.body: - assert not contains_aws_batch_tags_usage(stmt), ( + assert not _contains_name(stmt, "aws_batch_tags"), ( "aws_batch_tags is used inside 'if BATCH_EMIT_TAGS:' block. " "User-defined tags should be applied unconditionally, outside " "the BATCH_EMIT_TAGS guard." - ) \ No newline at end of file + ) + + +def test_user_tags_are_applied_in_create_job(): + """ + Verify that aws_batch_tags is actually referenced in create_job + at the top level (not just absent from the guard). Guards against + silent removal of the entire feature. + """ + tree = _get_create_job_ast() + assert _contains_name(tree, "aws_batch_tags"), ( + "aws_batch_tags is not referenced anywhere in create_job. " + "User-defined tag application may have been accidentally removed." + ) \ No newline at end of file From 9ac9ecafff0cd63a55cad3c44fdfedb37acaf2db Mon Sep 17 00:00:00 2001 From: odncode Date: Tue, 9 Jun 2026 08:37:07 +0100 Subject: [PATCH 3/3] style: add missing EOF newline (pre-commit) Signed-off-by: odncode --- test/unit/test_batch_user_tags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_batch_user_tags.py b/test/unit/test_batch_user_tags.py index b4e8d96db3e..8fc34f4208d 100644 --- a/test/unit/test_batch_user_tags.py +++ b/test/unit/test_batch_user_tags.py @@ -72,4 +72,4 @@ def test_user_tags_are_applied_in_create_job(): assert _contains_name(tree, "aws_batch_tags"), ( "aws_batch_tags is not referenced anywhere in create_job. " "User-defined tag application may have been accidentally removed." - ) \ No newline at end of file + )