diff --git a/changes/11863.feature.md b/changes/11863.feature.md new file mode 100644 index 00000000000..597bb4e6a4f --- /dev/null +++ b/changes/11863.feature.md @@ -0,0 +1 @@ +Add an `enable` flag to model-deployment health checks (default off), making health checks opt-in. The custom runtime variant ships with a disabled default health check, and a model-definition file that declares a health check is treated as enabled. diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 3b51f010246..a3620e4e6e1 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -9673,6 +9673,11 @@ input ModelDeploymentNetworkAccessInput type ModelHealthCheck @join__type(graph: STRAWBERRY) { + """ + Whether the route is health-checked. When false the route activates immediately. + """ + enable: Boolean! + """Interval in seconds between health checks.""" interval: Float! @@ -9696,6 +9701,11 @@ type ModelHealthCheck input ModelHealthCheckInput @join__type(graph: STRAWBERRY) { + """ + Whether the route should be health-checked. When false the route activates immediately and the remaining fields are ignored. + """ + enable: Boolean! = false + """Interval in seconds between health checks.""" interval: Float = null diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index 5178f2dbf48..24271549e60 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -6424,6 +6424,11 @@ input ModelDeploymentNetworkAccessInput { """Added in 26.4.2. Health check configuration for a model service.""" type ModelHealthCheck { + """ + Whether the route is health-checked. When false the route activates immediately. + """ + enable: Boolean! + """Interval in seconds between health checks.""" interval: Float! @@ -6445,6 +6450,11 @@ type ModelHealthCheck { """Added in 26.4.0. Health check configuration for a model service.""" input ModelHealthCheckInput { + """ + Whether the route should be health-checked. When false the route activates immediately and the remaining fields are ignored. + """ + enable: Boolean! = false + """Interval in seconds between health checks.""" interval: Float = null diff --git a/fixtures/manager/example-runtime-variants.json b/fixtures/manager/example-runtime-variants.json index 34e5aaabfaf..aeae8d2f9d7 100644 --- a/fixtures/manager/example-runtime-variants.json +++ b/fixtures/manager/example-runtime-variants.json @@ -12,6 +12,7 @@ "start_command": ["vllm", "serve", "{model_path}"], "port": 8000, "health_check": { + "enable": false, "path": "/health", "interval": 10.0, "max_retries": 10, @@ -33,6 +34,7 @@ "service": { "port": 8000, "health_check": { + "enable": false, "path": "/v1/health/ready", "interval": 10.0, "max_retries": 10, @@ -70,6 +72,7 @@ "start_command": ["text-generation-launcher", "--model-id", "{model_path}"], "port": 3000, "health_check": { + "enable": false, "path": "/info", "interval": 10.0, "max_retries": 10, @@ -98,6 +101,7 @@ ], "port": 9001, "health_check": { + "enable": false, "path": "/health", "interval": 10.0, "max_retries": 10, @@ -120,6 +124,7 @@ "start_command": ["max", "serve", "--model", "{model_path}"], "port": 8000, "health_check": { + "enable": false, "path": "/health", "interval": 10.0, "max_retries": 10, @@ -135,7 +140,20 @@ "description": "Custom (Default)", "reads_vfolder_config_files": true, "default_model_definition": { - "models": null + "models": [ + { + "name": "custom-model", + "service": { + "health_check": { + "enable": false, + "path": "/health", + "interval": 10.0, + "max_retries": 10, + "initial_delay": 1800.0 + } + } + } + ] } } ] diff --git a/src/ai/backend/common/config.py b/src/ai/backend/common/config.py index a7f84ce9562..c56445f3644 100644 --- a/src/ai/backend/common/config.py +++ b/src/ai/backend/common/config.py @@ -170,12 +170,21 @@ class PreStartAction(BaseConfigModel): class ModelHealthCheck(BaseConfigModel): + enable: bool = Field( + default=False, + description=( + "Whether the route should be health-checked. When false the route " + "becomes active immediately and the remaining fields are ignored." + ), + examples=[False], + ) interval: float = Field( default=10.0, description="Interval in seconds between health checks.", examples=[10.0], ) path: str = Field( + default="/health", description="Path to check for health status.", examples=["/health"], ) @@ -351,6 +360,7 @@ def _merge_service_config( hb, ho = base.health_check, override.health_check hs = ho.model_fields_set health_check = ModelHealthCheck.model_construct( + enable=_pick(hb.enable, ho.enable, "enable" in hs), interval=_pick(hb.interval, ho.interval, "interval" in hs), path=_pick(hb.path, ho.path, "path" in hs), max_retries=_pick(hb.max_retries, ho.max_retries, "max_retries" in hs), @@ -441,9 +451,8 @@ def merge(self, override: ModelDefinition) -> ModelDefinition: def health_check_config(self) -> ModelHealthCheck | None: for model in self.models: - if model.service and model.service.health_check: - if model.service.health_check is not None: - return model.service.health_check + if model.service and model.service.health_check and model.service.health_check.enable: + return model.service.health_check return None def with_args_appended(self, args: list[str]) -> ModelDefinition: @@ -481,6 +490,7 @@ def with_args_appended(self, args: list[str]) -> ModelDefinition: class ModelHealthCheckDraft(BaseConfigModel): + enable: bool | None = None interval: float | None = None path: str | None = None max_retries: int | None = None @@ -491,8 +501,6 @@ class ModelHealthCheckDraft(BaseConfigModel): def to_resolved(self) -> ModelHealthCheck: # Drop unset (None) fields so the strict type's ``Field(default=...)`` # declarations remain the single source of truth for default values. - # Missing required fields (e.g. ``path``) surface as the strict - # type's ``BackendAISchemaValidationFailed`` via ``model_validate``. return ModelHealthCheck.model_validate(self.model_dump(exclude_none=True)) @@ -511,10 +519,9 @@ def _wrap_str_start_command(cls, data: Any) -> Any: def to_resolved(self) -> ModelServiceConfig: # Drop unset (None) scalars so the strict type's ``Field(default=...)`` # declarations remain the single source of truth for default values; - # resolve the nested ``health_check`` draft explicitly so its own - # required-field check (``path``) fires through its own - # ``model_validate``. Missing required fields (e.g. ``port``) - # surface as ``BackendAISchemaValidationFailed``. + # resolve the nested ``health_check`` draft explicitly. Missing + # required fields (e.g. ``port``) surface as + # ``BackendAISchemaValidationFailed``. payload = self.model_dump(exclude_none=True, exclude={"health_check"}) payload["health_check"] = self.health_check.to_resolved() if self.health_check else None return ModelServiceConfig.model_validate(payload) @@ -547,6 +554,7 @@ def _merge_health_check_draft( ) -> ModelHealthCheckDraft: s = override.model_fields_set return ModelHealthCheckDraft.model_construct( + enable=_pick(base.enable, override.enable, "enable" in s), interval=_pick(base.interval, override.interval, "interval" in s), path=_pick(base.path, override.path, "path" in s), max_retries=_pick(base.max_retries, override.max_retries, "max_retries" in s), @@ -650,6 +658,34 @@ def to_resolved(self) -> ModelDefinition: "models": [m.to_resolved() for m in (self.models or [])], }) + @classmethod + def from_file_payload(cls, payload: Mapping[str, Any]) -> ModelDefinitionDraft: + """Parse a model-definition file into a draft. + + A declared ``health_check`` block implies opt-in, so ``enable`` is + defaulted to ``True`` when unset. + """ + draft = cls.model_validate(dict(payload)) + if not draft.models: + return draft + new_models: list[ModelConfigDraft] = [] + changed = False + for model in draft.models: + service = model.service + if ( + service is not None + and service.health_check is not None + and (service.health_check.enable is None) + ): + new_health_check = service.health_check.model_copy(update={"enable": True}) + new_service = service.model_copy(update={"health_check": new_health_check}) + model = model.model_copy(update={"service": new_service}) + changed = True + new_models.append(model) + if not changed: + return draft + return draft.model_copy(update={"models": new_models}) + def find_config_file(daemon_name: str) -> Path: toml_path_from_env = os.environ.get("BACKEND_CONFIG_FILE", None) diff --git a/src/ai/backend/common/dto/manager/v2/deployment/request.py b/src/ai/backend/common/dto/manager/v2/deployment/request.py index 4c0f91af314..6c0f49a2f81 100644 --- a/src/ai/backend/common/dto/manager/v2/deployment/request.py +++ b/src/ai/backend/common/dto/manager/v2/deployment/request.py @@ -122,6 +122,7 @@ class ModelHealthCheckInput(BaseRequestModel): + enable: bool = False interval: float | None = None path: str | None = None max_retries: int | None = None diff --git a/src/ai/backend/common/dto/manager/v2/deployment/types.py b/src/ai/backend/common/dto/manager/v2/deployment/types.py index 80412484835..944d91ee2e1 100644 --- a/src/ai/backend/common/dto/manager/v2/deployment/types.py +++ b/src/ai/backend/common/dto/manager/v2/deployment/types.py @@ -278,6 +278,10 @@ class PreStartActionInfoDTO(BaseResponseModel): class ModelHealthCheckInfoDTO(BaseResponseModel): """Output DTO for model health check configuration.""" + enable: bool = Field( + default=False, + description="Whether the route is health-checked. When false the route activates immediately.", + ) interval: float = Field(description="Interval in seconds between health checks.") path: str = Field(description="Path to check for health status.") max_retries: int = Field(description="Maximum number of retries for health check.") diff --git a/src/ai/backend/install/fixtures/example-runtime-variants.json b/src/ai/backend/install/fixtures/example-runtime-variants.json index 34e5aaabfaf..aeae8d2f9d7 100644 --- a/src/ai/backend/install/fixtures/example-runtime-variants.json +++ b/src/ai/backend/install/fixtures/example-runtime-variants.json @@ -12,6 +12,7 @@ "start_command": ["vllm", "serve", "{model_path}"], "port": 8000, "health_check": { + "enable": false, "path": "/health", "interval": 10.0, "max_retries": 10, @@ -33,6 +34,7 @@ "service": { "port": 8000, "health_check": { + "enable": false, "path": "/v1/health/ready", "interval": 10.0, "max_retries": 10, @@ -70,6 +72,7 @@ "start_command": ["text-generation-launcher", "--model-id", "{model_path}"], "port": 3000, "health_check": { + "enable": false, "path": "/info", "interval": 10.0, "max_retries": 10, @@ -98,6 +101,7 @@ ], "port": 9001, "health_check": { + "enable": false, "path": "/health", "interval": 10.0, "max_retries": 10, @@ -120,6 +124,7 @@ "start_command": ["max", "serve", "--model", "{model_path}"], "port": 8000, "health_check": { + "enable": false, "path": "/health", "interval": 10.0, "max_retries": 10, @@ -135,7 +140,20 @@ "description": "Custom (Default)", "reads_vfolder_config_files": true, "default_model_definition": { - "models": null + "models": [ + { + "name": "custom-model", + "service": { + "health_check": { + "enable": false, + "path": "/health", + "interval": 10.0, + "max_retries": 10, + "initial_delay": 1800.0 + } + } + } + ] } } ] diff --git a/src/ai/backend/manager/api/adapters/deployment_revision_preset/adapter.py b/src/ai/backend/manager/api/adapters/deployment_revision_preset/adapter.py index c165dca3712..02efd69aacf 100644 --- a/src/ai/backend/manager/api/adapters/deployment_revision_preset/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment_revision_preset/adapter.py @@ -135,6 +135,7 @@ def _pre_start_action_to_dto(action: PreStartAction) -> PreStartActionInfoDTO: def _model_health_check_to_dto(check: ModelHealthCheck) -> ModelHealthCheckInfoDTO: return ModelHealthCheckInfoDTO( + enable=check.enable, interval=check.interval, path=check.path, max_retries=check.max_retries, diff --git a/src/ai/backend/manager/api/gql/deployment/types/revision.py b/src/ai/backend/manager/api/gql/deployment/types/revision.py index 5b4c4b8b12f..264a3e0ac62 100644 --- a/src/ai/backend/manager/api/gql/deployment/types/revision.py +++ b/src/ai/backend/manager/api/gql/deployment/types/revision.py @@ -357,6 +357,16 @@ class PreStartActionGQL: name="ModelHealthCheck", ) class ModelHealthCheckGQL: + enable: bool = gql_added_field( + BackendAIGQLMeta( + added_version=NEXT_RELEASE_VERSION, + description=( + "Whether the route is health-checked. When false the route activates " + "immediately and the remaining fields are ignored." + ), + ), + default=False, + ) interval: float = gql_field(description="Interval in seconds between health checks.") path: str = gql_field(description="Path to check for health status.") max_retries: int = gql_field(description="Maximum number of retries for health check.") @@ -861,6 +871,13 @@ class PreStartActionInputGQL(PydanticInputMixin[PreStartActionDTO]): name="ModelHealthCheckInput", ) class ModelHealthCheckInputGQL(PydanticInputMixin[ModelHealthCheckInputDTO]): + enable: bool = gql_field( + description=( + "Whether the route should be health-checked. When false the route activates " + "immediately and the remaining fields are ignored." + ), + default=False, + ) interval: float | None = gql_field( description="Interval in seconds between health checks.", default=None ) diff --git a/src/ai/backend/manager/models/alembic/versions/ed42bc179b91_set_custom_runtime_variant_default_definition.py b/src/ai/backend/manager/models/alembic/versions/ed42bc179b91_set_custom_runtime_variant_default_definition.py new file mode 100644 index 00000000000..4f639e91ab7 --- /dev/null +++ b/src/ai/backend/manager/models/alembic/versions/ed42bc179b91_set_custom_runtime_variant_default_definition.py @@ -0,0 +1,74 @@ +"""seed the custom runtime_variant default model definition + +``ModelHealthCheck`` gained an ``enable`` flag (default ``False``), so health +checks are now opt-in. The ``custom`` runtime variant shipped with an empty +definition (``{"models": null}``); seed it with a default model definition whose +health check is present but disabled, so a custom deployment can opt in later by +flipping ``enable``. Pre-existing health_check blocks need no backfill: without +the ``enable`` key they read back as disabled, matching the new opt-in default. + +Revision ID: ed42bc179b91 +Revises: eb9d9c018e85 +Create Date: 2026-05-29 + +""" + +# Part of: 26.6.0 + +import json +from typing import Any + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "ed42bc179b91" +down_revision = "eb9d9c018e85" +branch_labels = None +depends_on = None + + +_CUSTOM_DEFINITION: dict[str, Any] = { + "models": [ + { + "name": "custom-model", + "service": { + "health_check": { + "enable": False, + "path": "/health", + "interval": 10.0, + "max_retries": 10, + "initial_delay": 1800.0, + }, + }, + } + ] +} + +_EMPTY_DEFINITION: dict[str, Any] = {"models": None} + + +def upgrade() -> None: + op.get_bind().execute( + sa.text( + "UPDATE runtime_variants " + "SET default_model_definition = CAST(:seed AS JSONB) " + "WHERE name = 'custom' AND default_model_definition = CAST(:empty AS JSONB)" + ).bindparams( + seed=json.dumps(_CUSTOM_DEFINITION), + empty=json.dumps(_EMPTY_DEFINITION), + ) + ) + + +def downgrade() -> None: + op.get_bind().execute( + sa.text( + "UPDATE runtime_variants " + "SET default_model_definition = CAST(:empty AS JSONB) " + "WHERE name = 'custom' AND default_model_definition = CAST(:seed AS JSONB)" + ).bindparams( + seed=json.dumps(_CUSTOM_DEFINITION), + empty=json.dumps(_EMPTY_DEFINITION), + ) + ) diff --git a/src/ai/backend/manager/repositories/deployment/storage_source/storage_source.py b/src/ai/backend/manager/repositories/deployment/storage_source/storage_source.py index ff09cbd5e2f..f3ba9598899 100644 --- a/src/ai/backend/manager/repositories/deployment/storage_source/storage_source.py +++ b/src/ai/backend/manager/repositories/deployment/storage_source/storage_source.py @@ -87,7 +87,7 @@ async def fetch_model_definition( return None return FetchedModelDefinition( path=raw.filename, - model_definition=ModelDefinitionDraft.model_validate(dict(raw.payload)), + model_definition=ModelDefinitionDraft.from_file_payload(raw.payload), ) async def _fetch_config_file_in_candidates( diff --git a/tests/unit/common/test_config.py b/tests/unit/common/test_config.py index 0b63836731d..6fb10f4d284 100644 --- a/tests/unit/common/test_config.py +++ b/tests/unit/common/test_config.py @@ -9,6 +9,7 @@ ModelDefinition, ModelDefinitionDraft, ModelHealthCheck, + ModelHealthCheckDraft, ModelMetadata, ModelServiceConfig, _merge_config, @@ -179,6 +180,100 @@ def test_merge_definition(self) -> None: assert not missing, f"_merge_definition() does not handle: {missing}" +class TestHealthCheckEnable: + """Tests for the ``enable`` flag on ModelHealthCheck.""" + + def test_health_check_config_returns_none_when_disabled(self) -> None: + definition = ModelDefinition.model_validate({ + "models": [ + { + "name": "m", + "model_path": "/m", + "service": {"port": 8080, "health_check": {"path": "/health", "enable": False}}, + } + ] + }) + assert definition.health_check_config() is None + + def test_health_check_config_returns_check_when_enabled(self) -> None: + definition = ModelDefinition.model_validate({ + "models": [ + { + "name": "m", + "model_path": "/m", + "service": {"port": 8080, "health_check": {"path": "/health", "enable": True}}, + } + ] + }) + check = definition.health_check_config() + assert check is not None + assert check.path == "/health" + + def test_enable_defaults_to_false(self) -> None: + check = ModelHealthCheck.model_validate({"path": "/health"}) + assert check.enable is False + + def test_to_resolved_without_path_uses_default(self) -> None: + resolved = ModelHealthCheckDraft(enable=True).to_resolved() + assert resolved.enable is True + assert resolved.path == "/health" + + def test_file_normalization_enables_present_health_check(self) -> None: + normalized = ModelDefinitionDraft.from_file_payload({ + "models": [ + {"name": "m", "service": {"health_check": {"path": "/health"}}}, + ] + }) + assert normalized.models is not None + service = normalized.models[0].service + assert service is not None + hc = service.health_check + assert hc is not None + assert hc.enable is True + + def test_file_normalization_respects_explicit_enable_false(self) -> None: + normalized = ModelDefinitionDraft.from_file_payload({ + "models": [ + {"name": "m", "service": {"health_check": {"path": "/health", "enable": False}}}, + ] + }) + assert normalized.models is not None + service = normalized.models[0].service + assert service is not None + hc = service.health_check + assert hc is not None + assert hc.enable is False + + def test_file_normalization_no_health_check_unchanged(self) -> None: + normalized = ModelDefinitionDraft.from_file_payload({ + "models": [{"name": "m", "service": {"port": 8080}}] + }) + assert normalized.models is not None + service = normalized.models[0].service + assert service is not None + assert service.health_check is None + + def test_merge_override_enables_from_request(self) -> None: + """A disabled baseline is opted in when a higher-priority draft sets enable=True.""" + base = ModelServiceConfig.model_construct( + _fields_set={"health_check"}, + start_command=[], + port=2, + health_check=ModelHealthCheck.model_construct( + _fields_set=set(ModelHealthCheck.model_fields), enable=False, path="/health" + ), + ) + override = ModelServiceConfig.model_construct( + _fields_set={"health_check"}, + start_command=[], + port=2, + health_check=ModelHealthCheck.model_construct(_fields_set={"enable"}, enable=True), + ) + result = _merge_service_config(base, override) + assert result.health_check is not None + assert result.health_check.enable is True + + class TestModelConfigs: def test_sanitize_inline_dicts(self) -> None: sample = """ diff --git a/tests/unit/manager/test_registry.py b/tests/unit/manager/test_registry.py index 1b780047a9a..0a6233ead52 100644 --- a/tests/unit/manager/test_registry.py +++ b/tests/unit/manager/test_registry.py @@ -209,6 +209,7 @@ def test_returns_stored_health_check(self) -> None: start_command=["run"], port=8000, health_check=ModelHealthCheck( + enable=True, path="/custom", interval=5.0, max_retries=3,