Skip to content
Merged
1 change: 1 addition & 0 deletions changes/11863.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add an `enable` flag to model-deployment health checks (default off), making health checks opt-in. The custom runtime variant ships with a disabled default health check, and a model-definition file that declares a health check is treated as enabled.
10 changes: 10 additions & 0 deletions docs/manager/graphql-reference/supergraph.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -9673,6 +9673,11 @@ input ModelDeploymentNetworkAccessInput
type ModelHealthCheck
@join__type(graph: STRAWBERRY)
{
"""
Whether the route is health-checked. When false the route activates immediately.
"""
enable: Boolean!

"""Interval in seconds between health checks."""
interval: Float!

Expand All @@ -9696,6 +9701,11 @@ type ModelHealthCheck
input ModelHealthCheckInput
@join__type(graph: STRAWBERRY)
{
"""
Whether the route should be health-checked. When false the route activates immediately and the remaining fields are ignored.
"""
enable: Boolean! = false

"""Interval in seconds between health checks."""
interval: Float = null

Expand Down
10 changes: 10 additions & 0 deletions docs/manager/graphql-reference/v2-schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -6424,6 +6424,11 @@ input ModelDeploymentNetworkAccessInput {

"""Added in 26.4.2. Health check configuration for a model service."""
type ModelHealthCheck {
"""
Whether the route is health-checked. When false the route activates immediately.
"""
enable: Boolean!

"""Interval in seconds between health checks."""
interval: Float!

Expand All @@ -6445,6 +6450,11 @@ type ModelHealthCheck {

"""Added in 26.4.0. Health check configuration for a model service."""
input ModelHealthCheckInput {
"""
Whether the route should be health-checked. When false the route activates immediately and the remaining fields are ignored.
"""
enable: Boolean! = false

"""Interval in seconds between health checks."""
interval: Float = null

Expand Down
20 changes: 19 additions & 1 deletion fixtures/manager/example-runtime-variants.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"start_command": ["vllm", "serve", "{model_path}"],
"port": 8000,
"health_check": {
"enable": true,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
Expand All @@ -33,6 +34,7 @@
"service": {
"port": 8000,
"health_check": {
"enable": true,
"path": "/v1/health/ready",
"interval": 10.0,
"max_retries": 10,
Expand Down Expand Up @@ -70,6 +72,7 @@
"start_command": ["text-generation-launcher", "--model-id", "{model_path}"],
"port": 3000,
"health_check": {
"enable": true,
"path": "/info",
"interval": 10.0,
"max_retries": 10,
Expand Down Expand Up @@ -98,6 +101,7 @@
],
"port": 9001,
"health_check": {
"enable": true,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
Expand All @@ -120,6 +124,7 @@
"start_command": ["max", "serve", "--model", "{model_path}"],
"port": 8000,
"health_check": {
"enable": true,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
Expand All @@ -135,7 +140,20 @@
"description": "Custom (Default)",
"reads_vfolder_config_files": true,
"default_model_definition": {
"models": null
"models": [
{
"name": "custom-model",
"service": {
"health_check": {
"enable": false,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
"initial_delay": 1800.0
}
}
}
]
}
}
]
Expand Down
48 changes: 39 additions & 9 deletions src/ai/backend/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,21 @@ class PreStartAction(BaseConfigModel):


class ModelHealthCheck(BaseConfigModel):
enable: bool = Field(
default=False,
description=(
"Whether the route should be health-checked. When false the route "
"becomes active immediately and the remaining fields are ignored."
),
examples=[False],
)
interval: float = Field(
default=10.0,
description="Interval in seconds between health checks.",
examples=[10.0],
)
path: str = Field(
default="/health",
description="Path to check for health status.",
examples=["/health"],
)
Expand Down Expand Up @@ -351,6 +360,7 @@ def _merge_service_config(
hb, ho = base.health_check, override.health_check
hs = ho.model_fields_set
health_check = ModelHealthCheck.model_construct(
enable=_pick(hb.enable, ho.enable, "enable" in hs),
interval=_pick(hb.interval, ho.interval, "interval" in hs),
path=_pick(hb.path, ho.path, "path" in hs),
max_retries=_pick(hb.max_retries, ho.max_retries, "max_retries" in hs),
Expand Down Expand Up @@ -441,9 +451,8 @@ def merge(self, override: ModelDefinition) -> ModelDefinition:

def health_check_config(self) -> ModelHealthCheck | None:
for model in self.models:
if model.service and model.service.health_check:
if model.service.health_check is not None:
return model.service.health_check
if model.service and model.service.health_check and model.service.health_check.enable:
return model.service.health_check
return None

def with_args_appended(self, args: list[str]) -> ModelDefinition:
Expand Down Expand Up @@ -481,6 +490,7 @@ def with_args_appended(self, args: list[str]) -> ModelDefinition:


class ModelHealthCheckDraft(BaseConfigModel):
enable: bool | None = None
interval: float | None = None
path: str | None = None
max_retries: int | None = None
Expand All @@ -491,8 +501,6 @@ class ModelHealthCheckDraft(BaseConfigModel):
def to_resolved(self) -> ModelHealthCheck:
# Drop unset (None) fields so the strict type's ``Field(default=...)``
# declarations remain the single source of truth for default values.
# Missing required fields (e.g. ``path``) surface as the strict
# type's ``BackendAISchemaValidationFailed`` via ``model_validate``.
return ModelHealthCheck.model_validate(self.model_dump(exclude_none=True))


Expand All @@ -511,10 +519,9 @@ def _wrap_str_start_command(cls, data: Any) -> Any:
def to_resolved(self) -> ModelServiceConfig:
# Drop unset (None) scalars so the strict type's ``Field(default=...)``
# declarations remain the single source of truth for default values;
# resolve the nested ``health_check`` draft explicitly so its own
# required-field check (``path``) fires through its own
# ``model_validate``. Missing required fields (e.g. ``port``)
# surface as ``BackendAISchemaValidationFailed``.
# resolve the nested ``health_check`` draft explicitly. Missing
# required fields (e.g. ``port``) surface as
# ``BackendAISchemaValidationFailed``.
payload = self.model_dump(exclude_none=True, exclude={"health_check"})
payload["health_check"] = self.health_check.to_resolved() if self.health_check else None
return ModelServiceConfig.model_validate(payload)
Expand Down Expand Up @@ -547,6 +554,7 @@ def _merge_health_check_draft(
) -> ModelHealthCheckDraft:
s = override.model_fields_set
return ModelHealthCheckDraft.model_construct(
enable=_pick(base.enable, override.enable, "enable" in s),
interval=_pick(base.interval, override.interval, "interval" in s),
path=_pick(base.path, override.path, "path" in s),
max_retries=_pick(base.max_retries, override.max_retries, "max_retries" in s),
Expand Down Expand Up @@ -650,6 +658,28 @@ def to_resolved(self) -> ModelDefinition:
"models": [m.to_resolved() for m in (self.models or [])],
})

@classmethod
def from_file_payload(cls, payload: Mapping[str, Any]) -> ModelDefinitionDraft:
"""Parse a model-definition file into a draft, normalizing the ``health_check`` block."""
# Dump by field name so the keys below match regardless of snake/kebab input.
data = cls.model_validate(dict(payload)).model_dump(exclude_unset=True, by_alias=False)
for model in data.get("models") or []:
service = model.get("service")
if service is None:
continue
if "health_check" not in service:
continue
health_check = service["health_check"]
# An empty health_check (null or {}) is an explicit opt-out; disable it so it
# overrides any enabled baseline instead of inheriting one.
if not health_check:
service["health_check"] = {"enable": False}
continue
# A non-empty block opts in; default enable to True when unset.
if health_check.get("enable") is None:
health_check["enable"] = True
return cls.model_validate(data)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this draft resolved?


def find_config_file(daemon_name: str) -> Path:
toml_path_from_env = os.environ.get("BACKEND_CONFIG_FILE", None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@


class ModelHealthCheckInput(BaseRequestModel):
enable: bool = False
interval: float | None = None
path: str | None = None
max_retries: int | None = None
Expand Down
4 changes: 4 additions & 0 deletions src/ai/backend/common/dto/manager/v2/deployment/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ class PreStartActionInfoDTO(BaseResponseModel):
class ModelHealthCheckInfoDTO(BaseResponseModel):
"""Output DTO for model health check configuration."""

enable: bool = Field(
default=False,
description="Whether the route is health-checked. When false the route activates immediately.",
)
interval: float = Field(description="Interval in seconds between health checks.")
path: str = Field(description="Path to check for health status.")
max_retries: int = Field(description="Maximum number of retries for health check.")
Expand Down
20 changes: 19 additions & 1 deletion src/ai/backend/install/fixtures/example-runtime-variants.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"start_command": ["vllm", "serve", "{model_path}"],
"port": 8000,
"health_check": {
"enable": true,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
Expand All @@ -33,6 +34,7 @@
"service": {
"port": 8000,
"health_check": {
"enable": true,
"path": "/v1/health/ready",
"interval": 10.0,
"max_retries": 10,
Expand Down Expand Up @@ -70,6 +72,7 @@
"start_command": ["text-generation-launcher", "--model-id", "{model_path}"],
"port": 3000,
"health_check": {
"enable": true,
"path": "/info",
"interval": 10.0,
"max_retries": 10,
Expand Down Expand Up @@ -98,6 +101,7 @@
],
"port": 9001,
"health_check": {
"enable": true,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
Expand All @@ -120,6 +124,7 @@
"start_command": ["max", "serve", "--model", "{model_path}"],
"port": 8000,
"health_check": {
"enable": true,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
Expand All @@ -135,7 +140,20 @@
"description": "Custom (Default)",
"reads_vfolder_config_files": true,
"default_model_definition": {
"models": null
"models": [
{
"name": "custom-model",
"service": {
"health_check": {
"enable": false,
"path": "/health",
"interval": 10.0,
"max_retries": 10,
"initial_delay": 1800.0
}
}
}
]
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _pre_start_action_to_dto(action: PreStartAction) -> PreStartActionInfoDTO:

def _model_health_check_to_dto(check: ModelHealthCheck) -> ModelHealthCheckInfoDTO:
return ModelHealthCheckInfoDTO(
enable=check.enable,
interval=check.interval,
path=check.path,
max_retries=check.max_retries,
Expand Down
17 changes: 17 additions & 0 deletions src/ai/backend/manager/api/gql/deployment/types/revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,16 @@ class PreStartActionGQL:
name="ModelHealthCheck",
)
class ModelHealthCheckGQL:
enable: bool = gql_added_field(
BackendAIGQLMeta(
added_version=NEXT_RELEASE_VERSION,
description=(
"Whether the route is health-checked. When false the route activates "
"immediately and the remaining fields are ignored."
),
),
default=False,
)
interval: float = gql_field(description="Interval in seconds between health checks.")
path: str = gql_field(description="Path to check for health status.")
max_retries: int = gql_field(description="Maximum number of retries for health check.")
Expand Down Expand Up @@ -861,6 +871,13 @@ class PreStartActionInputGQL(PydanticInputMixin[PreStartActionDTO]):
name="ModelHealthCheckInput",
)
class ModelHealthCheckInputGQL(PydanticInputMixin[ModelHealthCheckInputDTO]):
enable: bool = gql_field(
description=(
"Whether the route should be health-checked. When false the route activates "
"immediately and the remaining fields are ignored."
),
default=False,
)
interval: float | None = gql_field(
description="Interval in seconds between health checks.", default=None
)
Expand Down
Loading
Loading