From 79a8d070a3b6f64839f0e72835ed2e4086bb22c5 Mon Sep 17 00:00:00 2001 From: Dheeren Mohta Date: Thu, 11 Jun 2026 14:25:31 +0530 Subject: [PATCH] fix(client): allow non-numeric run/task IDs for Argo and Step Functions Run IDs are not always numeric. Orchestrators produce prefixed string IDs: - Argo Workflows: "argo-" (e.g. "argo-moviestatsflow-68z2h") - AWS Step Functions: "sfn-" The numeric-only check would cause Run('FlowName/argo-...') and Run('FlowName/sfn-...') to raise MetaflowInvalidPathspec, breaking all users who access runs created via orchestrators through the Client API. Replace the strict numeric pattern with a permissive alphanumeric pattern that also allows hyphens and underscores, matching all current ID formats. Fixes #948 (partial) --- metaflow/client/core.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/metaflow/client/core.py b/metaflow/client/core.py index 76b4a472316..b8601d30cfe 100644 --- a/metaflow/client/core.py +++ b/metaflow/client/core.py @@ -2,6 +2,7 @@ import json import os +import re import tarfile from collections import namedtuple from datetime import datetime @@ -60,6 +61,13 @@ current_metadata = False +# Run IDs can be plain integers (local runs) or prefixed strings for orchestrators: +# "argo-" for Argo Workflows +# "sfn-" for AWS Step Functions +# Task IDs follow the same pattern — numeric by default but may be prefixed by +# orchestrators that pass their own identifiers. +_RUN_ID_PATTERN = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\-_]*$') + def metadata(ms: str) -> str: """ @@ -336,6 +344,24 @@ def __init__( "Expects DataArtifact('FlowName/RunID/StepName/TaskID/ArtifactName')" ) + # Validate run ID and task ID format. + # Run IDs are numeric for local runs but orchestrators (Argo Workflows, + # AWS Step Functions) produce prefixed string IDs like "argo-*" and "sfn-*". + # The same applies to task IDs. + if len(ids) >= 2 and not _RUN_ID_PATTERN.match(ids[1]): + raise MetaflowInvalidPathspec( + "Invalid run ID '%s' in pathspec '%s'. " + "Run IDs must be alphanumeric and may contain hyphens or " + "underscores (e.g. '123', 'argo-myflow-abc12', 'sfn-exec')." + % (ids[1], pathspec) + ) + if len(ids) >= 4 and not _RUN_ID_PATTERN.match(ids[3]): + raise MetaflowInvalidPathspec( + "Invalid task ID '%s' in pathspec '%s'. " + "Task IDs must be alphanumeric and may contain hyphens or underscores." + % (ids[3], pathspec) + ) + self.id = ids[-1] self._pathspec = pathspec self._object = self._get_object(*ids)