Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/11625.enhance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Preserve the original `str` form of `start_command` in model definitions all the way to the kernel runner so the kernel runner's existing `[shell, "-c", str]` wrapping handles shell semantics (line continuations, `$VAR` expansion, pipes) — eliminating the need to manually strip backslashes from copy-pasted multi-line vendor recipes (e.g. vLLM).
12 changes: 8 additions & 4 deletions docs/manager/graphql-reference/supergraph.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -9839,8 +9839,10 @@ type ModelServiceConfig
"""
preStartActions: [PreStartAction!]!

"""Command to start the model service."""
startCommand: [String!]
"""
Command to start the model service. A list is exec'ed directly as argv; a string is wrapped as ``[shell, '-c', str]`` by the kernel runner so shell semantics (line continuations, ``$VAR`` expansion, pipes) apply.
"""
startCommand: JSON

"""Shell configured for the model service."""
shell: String!
Expand All @@ -9863,8 +9865,10 @@ input ModelServiceConfigInput
"""
preStartActions: [PreStartActionInput!] = null

"""Command to start the model service."""
startCommand: [String!] = null
"""
Command to start the model service. A JSON array (``list[str]``) is exec'ed directly as argv; a JSON string is wrapped as ``[shell, '-c', str]`` by the kernel runner so shell semantics (line continuations, ``$VAR`` expansion, pipes) apply.
"""
startCommand: JSON = null

"""Shell configured for the model service."""
shell: String = null
Expand Down
12 changes: 8 additions & 4 deletions docs/manager/graphql-reference/v2-schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -6592,8 +6592,10 @@ type ModelServiceConfig {
"""
preStartActions: [PreStartAction!]!

"""Command to start the model service."""
startCommand: [String!]
"""
Command to start the model service. A list is exec'ed directly as argv; a string is wrapped as ``[shell, '-c', str]`` by the kernel runner so shell semantics (line continuations, ``$VAR`` expansion, pipes) apply.
"""
startCommand: JSON

"""Shell configured for the model service."""
shell: String!
Expand All @@ -6614,8 +6616,10 @@ input ModelServiceConfigInput {
"""
preStartActions: [PreStartActionInput!] = null

"""Command to start the model service."""
startCommand: [String!] = null
"""
Command to start the model service. A JSON array (``list[str]``) is exec'ed directly as argv; a JSON string is wrapped as ``[shell, '-c', str]`` by the kernel runner so shell semantics (line continuations, ``$VAR`` expansion, pipes) apply.
"""
startCommand: JSON = null

"""Shell configured for the model service."""
shell: String = null
Expand Down
75 changes: 40 additions & 35 deletions src/ai/backend/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
AliasChoices,
ConfigDict,
Field,
field_validator,
)

from . import validators as tx
Expand Down Expand Up @@ -144,16 +143,6 @@ def snake_to_kebab_case(string: str) -> str:
agent_selector_config_iv = t.Dict({}) | agent_selector_globalconfig_iv


def _normalize_start_command(value: Any) -> Any:
"""Coerce legacy ``str`` ``start_command`` into argv list via
:func:`shlex.split`. Lists, ``None``, and other types pass through so
the schema's strict check rejects them.
"""
if isinstance(value, str):
return shlex.split(value)
return value


model_definition_iv = t.Dict({
t.Key("models"): t.List(
t.Dict({
Expand All @@ -170,8 +159,9 @@ def _normalize_start_command(value: Any) -> Any:
t.Key("args"): t.Dict().allow_extra("*"),
})
),
t.Key("start_command", default=None): (t.Null | t.String | t.List(t.String))
>> _normalize_start_command,
# A ``str`` ``start_command`` is preserved as-is so the
# kernel runner can wrap it as ``[shell, "-c", str]``.
t.Key("start_command", default=None): t.Null | t.String | t.List(t.String),
t.Key("shell", default="/bin/bash"): t.String,
t.Key("port"): t.ToInt[1:],
t.Key("health_check", default=None): t.Null
Expand Down Expand Up @@ -258,14 +248,23 @@ class ModelServiceConfig(BaseConfigModel):
default_factory=list,
description="List of pre-start actions to execute before starting the model service.",
)
start_command: list[str] | None = Field(
start_command: str | list[str] | None = Field(
default=None,
description=(
"Argv list to start the model service. ``{model_path}`` in any "
"token is replaced per-token with the resolved ``model_path`` "
"before launch. ``None`` falls back to the image's default CMD."
"Command to start the model service. Two forms are accepted: "
"an argv list (``list[str]``) which is exec'ed directly, or a "
"single shell script string which the kernel runner wraps as "
"``[shell, '-c', str]`` (giving the user full shell semantics "
"such as line continuations, ``$VAR`` expansion, and pipes). "
"``{model_path}`` is replaced with the resolved ``model_path`` "
"before launch — per-token for the list form, in-place for the "
"string form. ``None`` falls back to the image's default CMD."
),
examples=[["python", "service.py"], ["vllm", "serve", "{model_path}"]],
examples=[
["python", "service.py"],
["vllm", "serve", "{model_path}"],
"vllm serve {model_path} --tensor-parallel-size 2",
],
)
shell: str = Field(
default="/bin/bash",
Expand All @@ -282,11 +281,6 @@ class ModelServiceConfig(BaseConfigModel):
description="Health check configuration for the model service.",
)

@field_validator("start_command", mode="before")
@classmethod
def _coerce_start_command(cls, value: Any) -> Any:
return _normalize_start_command(value)


class ModelMetadata(BaseConfigModel):
author: str | None = Field(
Expand Down Expand Up @@ -498,7 +492,12 @@ def health_check_config(self) -> ModelHealthCheck | None:

def with_args_appended(self, args: list[str]) -> ModelDefinition:
"""Return a copy with ``args`` appended to each model's
``service.start_command`` as separate argv tokens.
``service.start_command``.

For the list form, ``args`` are concatenated as separate argv
tokens. For the string form, ``args`` are shell-quoted via
:func:`shlex.join` and appended after a single space so they are
parsed by the same shell that runs the user's script.

Models with ``service is None`` are passed through unchanged;
a model whose ``start_command`` is ``None`` receives ``args``
Expand All @@ -512,9 +511,14 @@ def with_args_appended(self, args: list[str]) -> ModelDefinition:
if model.service is None:
new_models.append(model)
continue
existing = model.service.start_command or []
existing = model.service.start_command
merged: str | list[str]
if isinstance(existing, str):
merged = f"{existing} {shlex.join(args)}"
else:
merged = (existing or []) + args
new_service = model.service.model_copy(
update={"start_command": existing + args},
update={"start_command": merged},
)
new_models.append(model.model_copy(update={"service": new_service}))
return self.model_copy(update={"models": new_models})
Expand Down Expand Up @@ -548,16 +552,11 @@ def to_resolved(self) -> ModelHealthCheck:

class ModelServiceConfigDraft(BaseConfigModel):
pre_start_actions: list[PreStartAction] | None = None
start_command: list[str] | None = None
start_command: str | list[str] | None = None
shell: str | None = None
port: int | None = None
health_check: ModelHealthCheckDraft | None = None

@field_validator("start_command", mode="before")
@classmethod
def _coerce_start_command(cls, value: Any) -> Any:
return _normalize_start_command(value)

def to_resolved(self) -> ModelServiceConfig:
# Drop unset (None) scalars so the strict type's ``Field(default=...)``
# declarations remain the single source of truth for default values;
Expand All @@ -583,9 +582,15 @@ def to_resolved(self) -> ModelConfig:
# ``start_command`` are resolved here, at the same moment the
# draft becomes a strict ``ModelConfig`` and ``model_path`` is
# finalized. Placeholders therefore never propagate downstream.
service.start_command = [
token.replace("{model_path}", self.model_path) for token in service.start_command
]
if isinstance(service.start_command, str):
service.start_command = service.start_command.replace(
"{model_path}", self.model_path
)
else:
service.start_command = [
token.replace("{model_path}", self.model_path)
for token in service.start_command
]
payload = self.model_dump(exclude_none=True, exclude={"service"})
payload["service"] = service
return ModelConfig.model_validate(payload)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class ModelMetadataInput(BaseRequestModel):

class ModelServiceConfigInput(BaseRequestModel):
pre_start_actions: list[PreStartAction] | None = None
start_command: list[str] | None = None
start_command: str | list[str] | None = None
shell: str | None = None
port: int | None = None
health_check: ModelHealthCheckInput | None = None
Expand Down
10 changes: 8 additions & 2 deletions src/ai/backend/common/dto/manager/v2/deployment/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,14 @@ class ModelServiceConfigInfoDTO(BaseResponseModel):
default_factory=list,
description="List of pre-start actions to execute before starting the model service.",
)
start_command: list[str] | None = Field(
default=None, description="Command to start the model service."
start_command: str | list[str] | None = Field(
default=None,
description=(
"Command to start the model service. A list is exec'ed directly "
"as argv; a string is wrapped as ``[shell, '-c', str]`` by the "
"kernel runner so shell semantics (line continuations, ``$VAR`` "
"expansion, pipes) apply."
),
)
shell: str = Field(
default="/bin/bash",
Expand Down
20 changes: 16 additions & 4 deletions src/ai/backend/manager/api/gql/deployment/types/revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,14 @@ class ModelServiceConfigGQL:
pre_start_actions: list[PreStartActionGQL] = gql_field(
description="List of pre-start actions to execute before starting the model service."
)
start_command: list[str] | None = gql_field(
description="Command to start the model service.", default=None
start_command: JSON | None = gql_field(
description=(
"Command to start the model service. A JSON array (``list[str]``) "
"is exec'ed directly as argv; a JSON string is wrapped as "
"``[shell, '-c', str]`` by the kernel runner so shell semantics "
"(line continuations, ``$VAR`` expansion, pipes) apply."
),
default=None,
)
shell: str = gql_field(description="Shell configured for the model service.")
port: int = gql_field(description="Port number for the model service.")
Expand Down Expand Up @@ -891,8 +897,14 @@ class ModelServiceConfigInputGQL(PydanticInputMixin[ModelServiceConfigInputDTO])
description="List of pre-start actions to execute before starting the model service.",
default=None,
)
start_command: list[str] | None = gql_field(
description="Command to start the model service.", default=None
start_command: JSON | None = gql_field(
description=(
"Command to start the model service. A JSON array (``list[str]``) "
"is exec'ed directly as argv; a JSON string is wrapped as "
"``[shell, '-c', str]`` by the kernel runner so shell semantics "
"(line continuations, ``$VAR`` expansion, pipes) apply."
),
default=None,
)
shell: str | None = gql_field(
description="Shell configured for the model service.", default=None
Expand Down
63 changes: 63 additions & 0 deletions tests/unit/common/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,45 @@ def test_to_resolved_leaves_start_command_with_no_placeholder_unchanged(self) ->
assert service is not None
assert service.start_command == ["my-server", "--bind", "0.0.0.0"]

def test_to_resolved_preserves_str_start_command(self) -> None:
# When ``start_command`` is given as a single shell-script string,
# the validator must NOT split it into argv tokens; the kernel
# runner is responsible for wrapping it via ``[shell, "-c", str]``.
# This protects copy-pasted multi-line vendor recipes (e.g. vLLM)
# where backslash-newline must be interpreted by the shell, not
# by ``shlex.split``.
script = "vllm serve {model_path} \\\n --tensor-parallel-size 2"
draft = ModelDefinitionDraft.model_validate({
"models": [
{
"name": "demo",
"model_path": "/data",
"service": {
"start_command": script,
"port": 8000,
},
}
]
})

resolved = draft.to_resolved()

service = resolved.models[0].service
assert service is not None
assert isinstance(service.start_command, str)
assert service.start_command == ("vllm serve /data \\\n --tensor-parallel-size 2")

def test_model_service_config_accepts_str_start_command(self) -> None:
# Strict ``ModelServiceConfig`` (post-merge) accepts the str form
# too — required so the resolved revision can carry it all the
# way to the agent and kernel runner unchanged.
service = ModelServiceConfig.model_validate({
"start_command": "python -m foo --bar",
"port": 8000,
})

assert service.start_command == "python -m foo --bar"


class TestModelDefinitionWithArgsAppended:
@pytest.fixture
Expand Down Expand Up @@ -438,3 +477,27 @@ async def test_each_model_receives_args(
assert first is not None and second is not None
assert first.start_command == ["a", "--shared", "true"]
assert second.start_command == ["b", "--shared", "true"]

async def test_appends_args_to_str_start_command_with_shell_quoting(self) -> None:
# Preset ARGS are appended to a string ``start_command`` after a
# single space, with each argument passed through ``shlex.join``
# so values containing whitespace or quotes are safely parsed by
# the same shell that will run the script.
definition = ModelDefinition(
models=[
ModelConfig(
name="demo",
model_path="/models",
service=ModelServiceConfig(
port=8000,
start_command="vllm serve /models",
),
)
]
)

result = definition.with_args_appended(["--prompt", "hello world"])

service = result.models[0].service
assert service is not None
assert service.start_command == "vllm serve /models --prompt 'hello world'"
Loading