diff --git a/metaflow/client/core.py b/metaflow/client/core.py index 76b4a472316..b8601d30cfe 100644 --- a/metaflow/client/core.py +++ b/metaflow/client/core.py @@ -2,6 +2,7 @@ import json import os +import re import tarfile from collections import namedtuple from datetime import datetime @@ -60,6 +61,13 @@ current_metadata = False +# Run IDs can be plain integers (local runs) or prefixed strings for orchestrators: +# "argo-" for Argo Workflows +# "sfn-" for AWS Step Functions +# Task IDs follow the same pattern — numeric by default but may be prefixed by +# orchestrators that pass their own identifiers. +_RUN_ID_PATTERN = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9\-_]*$') + def metadata(ms: str) -> str: """ @@ -336,6 +344,24 @@ def __init__( "Expects DataArtifact('FlowName/RunID/StepName/TaskID/ArtifactName')" ) + # Validate run ID and task ID format. + # Run IDs are numeric for local runs but orchestrators (Argo Workflows, + # AWS Step Functions) produce prefixed string IDs like "argo-*" and "sfn-*". + # The same applies to task IDs. + if len(ids) >= 2 and not _RUN_ID_PATTERN.match(ids[1]): + raise MetaflowInvalidPathspec( + "Invalid run ID '%s' in pathspec '%s'. " + "Run IDs must be alphanumeric and may contain hyphens or " + "underscores (e.g. '123', 'argo-myflow-abc12', 'sfn-exec')." + % (ids[1], pathspec) + ) + if len(ids) >= 4 and not _RUN_ID_PATTERN.match(ids[3]): + raise MetaflowInvalidPathspec( + "Invalid task ID '%s' in pathspec '%s'. " + "Task IDs must be alphanumeric and may contain hyphens or underscores." + % (ids[3], pathspec) + ) + self.id = ids[-1] self._pathspec = pathspec self._object = self._get_object(*ids)