From dc67b1d68f4ad7ae59130947ada0b1c1c11eb6bb Mon Sep 17 00:00:00 2001 From: kt Date: Thu, 4 Jun 2026 19:33:35 -0700 Subject: [PATCH 1/5] add checkpoint deployment cli support --- packages/prime/src/prime_cli/api/deployments.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/prime/src/prime_cli/api/deployments.py b/packages/prime/src/prime_cli/api/deployments.py index dee213593..69f0ea58b 100644 --- a/packages/prime/src/prime_cli/api/deployments.py +++ b/packages/prime/src/prime_cli/api/deployments.py @@ -82,6 +82,16 @@ def deploy_adapter(self, adapter_id: str) -> Adapter: raise APIError(f"Failed to deploy adapter: {e.response.text}") raise APIError(f"Failed to deploy adapter: {str(e)}") + def deploy_checkpoint(self, checkpoint_id: str) -> Adapter: + """Deploy a checkpoint by preparing it as an adapter for inference.""" + try: + response = self.client.post(f"/rft/checkpoints/{checkpoint_id}/deploy") + return Adapter.model_validate(response.get("adapter")) + except Exception as e: + if hasattr(e, "response") and hasattr(e.response, "text"): + raise APIError(f"Failed to deploy checkpoint: {e.response.text}") + raise APIError(f"Failed to deploy checkpoint: {str(e)}") + def unload_adapter(self, adapter_id: str) -> Adapter: """Unload an adapter from inference.""" try: From b7e80fd4a35e4f2d6fdbf41f10819829bca771a4 Mon Sep 17 00:00:00 2001 From: kt Date: Thu, 4 Jun 2026 19:34:29 -0700 Subject: [PATCH 2/5] add checkpoint deployment cli support --- .../src/prime_cli/commands/deployments.py | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/packages/prime/src/prime_cli/commands/deployments.py b/packages/prime/src/prime_cli/commands/deployments.py index c3fabe1d2..72dbb563e 100644 --- a/packages/prime/src/prime_cli/commands/deployments.py +++ b/packages/prime/src/prime_cli/commands/deployments.py @@ -172,20 +172,31 @@ def list_deployments( def create_deployment( ctx: typer.Context, model_id: Optional[str] = typer.Argument(None, help="Model ID to deploy"), + checkpoint_id: Optional[str] = typer.Option( + None, + "--checkpoint-id", + help="Deploy a Hosted Training checkpoint by checkpoint ID", + ), yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"), ) -> None: """Deploy a model for inference. Makes the trained model available for inference requests. - Model must be in READY status. + Model must be in READY status. To deploy a checkpoint, pass --checkpoint-id. Example: prime deployments create prime deployments create --yes + + prime deployments create --checkpoint-id """ - if model_id is None: + if model_id and checkpoint_id: + console.print("[red]Error:[/red] Use either MODEL_ID or --checkpoint-id, not both.") + raise typer.Exit(1) + + if model_id is None and checkpoint_id is None: console.print(ctx.get_help()) raise typer.Exit(0) @@ -193,6 +204,30 @@ def create_deployment( api_client = APIClient() deployments_client = DeploymentsClient(api_client) + if checkpoint_id: + console.print("[bold]Deploying checkpoint:[/bold]") + console.print(f" Checkpoint ID: {checkpoint_id}") + console.print() + + if not yes: + confirm = typer.confirm("Are you sure you want to deploy this checkpoint?") + if not confirm: + console.print("Cancelled.") + raise typer.Exit(0) + + adapter = deployments_client.deploy_checkpoint(checkpoint_id) + + console.print("[green]Deployment initiated successfully![/green]") + console.print(f"Adapter ID: [cyan]{adapter.id}[/cyan]") + console.print(f"Status: [yellow]{adapter.deployment_status}[/yellow]") + console.print("\n[dim]The model is being deployed. This may take a few minutes.[/dim]") + console.print("[dim]Use 'prime deployments list' to check deployment status.[/dim]") + + _print_inference_usage(adapter.base_model, adapter.id) + return + + assert model_id is not None + # Get model to validate status model = deployments_client.get_adapter(model_id) From e8636f1a27558557f44888b352863ee182bb78df Mon Sep 17 00:00:00 2001 From: kt Date: Thu, 4 Jun 2026 19:35:01 -0700 Subject: [PATCH 3/5] add checkpoint deployment cli support --- packages/prime/tests/test_deployments.py | 115 +++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/packages/prime/tests/test_deployments.py b/packages/prime/tests/test_deployments.py index 759e5de39..122eaee8f 100644 --- a/packages/prime/tests/test_deployments.py +++ b/packages/prime/tests/test_deployments.py @@ -1,12 +1,42 @@ from types import SimpleNamespace from typing import Any +from prime_cli.api.deployments import DeploymentsClient +from prime_cli.client import APIError from prime_cli.main import app from prime_cli.utils import strip_ansi from typer.testing import CliRunner runner = CliRunner() +TEST_ENV = {"PRIME_API_KEY": "dummy", "PRIME_DISABLE_VERSION_CHECK": "1", "COLUMNS": "200"} + + +def _adapter_response( + *, + adapter_id: str = "adapter-123", + base_model: str = "meta-llama/Llama-3.1-8B-Instruct", + deployment_status: str = "DEPLOYING", +) -> dict[str, Any]: + return { + "adapter": { + "id": adapter_id, + "displayName": "Checkpoint Adapter", + "userId": "user-123", + "teamId": None, + "rftRunId": "run-123", + "baseModel": base_model, + "step": 20, + "status": "READY", + "deploymentStatus": deployment_status, + "deployedAt": None, + "deploymentError": None, + "createdAt": "2026-01-01T00:00:00Z", + "updatedAt": "2026-01-01T00:00:00Z", + }, + "message": "Checkpoint adapter deployment started", + } + def test_deployments_create_prints_chat_and_api_key_commands(monkeypatch) -> None: monkeypatch.setenv("PRIME_API_KEY", "dummy") @@ -57,3 +87,88 @@ def deploy_adapter(self, model_id: str) -> Any: assert "export PRIME_API_KEY=" in output assert "PRIME_API_KEY" in output assert "curl -X POST" in output + + +def test_deployments_client_deploy_checkpoint_posts_endpoint() -> None: + captured: dict[str, Any] = {} + + class DummyAPIClient: + def post(self, endpoint: str, json: dict[str, Any] | None = None) -> dict: + captured["endpoint"] = endpoint + captured["json"] = json + return _adapter_response() + + adapter = DeploymentsClient(DummyAPIClient()).deploy_checkpoint("ckpt-123") + + assert captured["endpoint"] == "/rft/checkpoints/ckpt-123/deploy" + assert captured["json"] is None + assert adapter.id == "adapter-123" + + +def test_deployments_create_checkpoint_prints_adapter_result(monkeypatch) -> None: + monkeypatch.setattr("prime_cli.main.check_for_update", lambda: (False, None)) + + adapter = SimpleNamespace( + id="adapter-456", + base_model="Qwen/Qwen3.5-0.8B", + deployment_status="DEPLOYING", + ) + + class DummyDeploymentsClient: + def __init__(self, api_client: Any) -> None: + self.api_client = api_client + + def deploy_checkpoint(self, checkpoint_id: str) -> Any: + assert checkpoint_id == "ckpt-456" + return adapter + + monkeypatch.setattr("prime_cli.commands.deployments.APIClient", lambda: object()) + monkeypatch.setattr( + "prime_cli.commands.deployments.DeploymentsClient", + DummyDeploymentsClient, + ) + + result = runner.invoke( + app, + ["deployments", "create", "--checkpoint-id", "ckpt-456", "--yes"], + env=TEST_ENV, + ) + output = strip_ansi(result.output) + + assert result.exit_code == 0, result.output + assert "Deploying checkpoint:" in output + assert "Checkpoint ID: ckpt-456" in output + assert "Deployment initiated successfully!" in output + assert "Adapter ID: adapter-456" in output + assert "Status: DEPLOYING" in output + assert '"Qwen/Qwen3.5-0.8B:adapter-456"' in output + assert "prime deployments list" in output + + +def test_deployments_create_checkpoint_surfaces_conflict_errors(monkeypatch) -> None: + monkeypatch.setattr("prime_cli.main.check_for_update", lambda: (False, None)) + + class DummyDeploymentsClient: + def __init__(self, api_client: Any) -> None: + self.api_client = api_client + + def deploy_checkpoint(self, checkpoint_id: str) -> Any: + assert checkpoint_id == "ckpt-busy" + raise APIError("HTTP 409: Checkpoint adapter preparation is already in progress") + + monkeypatch.setattr("prime_cli.commands.deployments.APIClient", lambda: object()) + monkeypatch.setattr( + "prime_cli.commands.deployments.DeploymentsClient", + DummyDeploymentsClient, + ) + + result = runner.invoke( + app, + ["deployments", "create", "--checkpoint-id", "ckpt-busy", "--yes"], + env=TEST_ENV, + ) + output = strip_ansi(result.output) + + assert result.exit_code == 1 + assert "Error: HTTP 409" in output + assert "Checkpoint adapter preparation is already in progress" in output From cacdd7c5b8770523de11b48faae20199f13489cd Mon Sep 17 00:00:00 2001 From: Kevin Thomas Date: Sun, 21 Jun 2026 14:56:46 -0700 Subject: [PATCH 4/5] fix checkpoint deployment cli validation --- .../src/prime_cli/commands/deployments.py | 16 ++++++++++++++-- packages/prime/tests/test_deployments.py | 18 ++++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/packages/prime/src/prime_cli/commands/deployments.py b/packages/prime/src/prime_cli/commands/deployments.py index 72dbb563e..b63d202c6 100644 --- a/packages/prime/src/prime_cli/commands/deployments.py +++ b/packages/prime/src/prime_cli/commands/deployments.py @@ -192,7 +192,19 @@ def create_deployment( prime deployments create --checkpoint-id """ - if model_id and checkpoint_id: + if model_id is not None: + model_id = model_id.strip() + if not model_id: + console.print("[red]Error:[/red] MODEL_ID cannot be empty.") + raise typer.Exit(1) + + if checkpoint_id is not None: + checkpoint_id = checkpoint_id.strip() + if not checkpoint_id: + console.print("[red]Error:[/red] --checkpoint-id cannot be empty.") + raise typer.Exit(1) + + if model_id is not None and checkpoint_id is not None: console.print("[red]Error:[/red] Use either MODEL_ID or --checkpoint-id, not both.") raise typer.Exit(1) @@ -204,7 +216,7 @@ def create_deployment( api_client = APIClient() deployments_client = DeploymentsClient(api_client) - if checkpoint_id: + if checkpoint_id is not None: console.print("[bold]Deploying checkpoint:[/bold]") console.print(f" Checkpoint ID: {checkpoint_id}") console.print() diff --git a/packages/prime/tests/test_deployments.py b/packages/prime/tests/test_deployments.py index 122eaee8f..dd50072a5 100644 --- a/packages/prime/tests/test_deployments.py +++ b/packages/prime/tests/test_deployments.py @@ -1,5 +1,5 @@ from types import SimpleNamespace -from typing import Any +from typing import Any, cast from prime_cli.api.deployments import DeploymentsClient from prime_cli.client import APIError @@ -98,7 +98,7 @@ def post(self, endpoint: str, json: dict[str, Any] | None = None) -> dict: captured["json"] = json return _adapter_response() - adapter = DeploymentsClient(DummyAPIClient()).deploy_checkpoint("ckpt-123") + adapter = DeploymentsClient(cast(Any, DummyAPIClient())).deploy_checkpoint("ckpt-123") assert captured["endpoint"] == "/rft/checkpoints/ckpt-123/deploy" assert captured["json"] is None @@ -145,6 +145,20 @@ def deploy_checkpoint(self, checkpoint_id: str) -> Any: assert "prime deployments list" in output +def test_deployments_create_checkpoint_rejects_empty_checkpoint_id(monkeypatch) -> None: + monkeypatch.setattr("prime_cli.main.check_for_update", lambda: (False, None)) + + result = runner.invoke( + app, + ["deployments", "create", "--checkpoint-id", "", "--yes"], + env=TEST_ENV, + ) + output = strip_ansi(result.output) + + assert result.exit_code == 1 + assert "Error: --checkpoint-id cannot be empty." in output + + def test_deployments_create_checkpoint_surfaces_conflict_errors(monkeypatch) -> None: monkeypatch.setattr("prime_cli.main.check_for_update", lambda: (False, None)) From 6fecae21f6a9148705ae972956d4bccc3b34d722 Mon Sep 17 00:00:00 2001 From: Kevin Thomas Date: Sun, 21 Jun 2026 15:07:49 -0700 Subject: [PATCH 5/5] clarify checkpoint deployment status output --- .../src/prime_cli/commands/deployments.py | 28 +++++++++++--- packages/prime/tests/test_deployments.py | 38 +++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/packages/prime/src/prime_cli/commands/deployments.py b/packages/prime/src/prime_cli/commands/deployments.py index b63d202c6..8ed62f82b 100644 --- a/packages/prime/src/prime_cli/commands/deployments.py +++ b/packages/prime/src/prime_cli/commands/deployments.py @@ -56,6 +56,24 @@ def _print_inference_usage(base_model: str, adapter_id: str) -> None: ) +def _print_deployment_followup(deployment_status: str) -> None: + console.print() + if deployment_status == "DEPLOYED": + console.print("[dim]The model is deployed and ready for inference.[/dim]") + elif deployment_status == "DEPLOYING": + console.print("[dim]The model is being deployed. This may take a few minutes.[/dim]") + else: + console.print(f"[dim]Deployment status: {deployment_status}[/dim]") + console.print("[dim]Use 'prime deployments list' to check deployment status.[/dim]") + + +def _print_deployment_success(deployment_status: str) -> None: + if deployment_status == "DEPLOYED": + console.print("[green]Deployment is ready![/green]") + else: + console.print("[green]Deployment initiated successfully![/green]") + + @app.command(name="list", epilog=LIST_DEPLOYMENTS_JSON_HELP) def list_deployments( team: Optional[str] = typer.Option(None, "--team", "-t", help="Filter by team ID"), @@ -229,11 +247,10 @@ def create_deployment( adapter = deployments_client.deploy_checkpoint(checkpoint_id) - console.print("[green]Deployment initiated successfully![/green]") + _print_deployment_success(adapter.deployment_status) console.print(f"Adapter ID: [cyan]{adapter.id}[/cyan]") console.print(f"Status: [yellow]{adapter.deployment_status}[/yellow]") - console.print("\n[dim]The model is being deployed. This may take a few minutes.[/dim]") - console.print("[dim]Use 'prime deployments list' to check deployment status.[/dim]") + _print_deployment_followup(adapter.deployment_status) _print_inference_usage(adapter.base_model, adapter.id) return @@ -289,10 +306,9 @@ def create_deployment( # Deploy the model updated_model = deployments_client.deploy_adapter(model_id) - console.print("[green]Deployment initiated successfully![/green]") + _print_deployment_success(updated_model.deployment_status) console.print(f"Status: [yellow]{updated_model.deployment_status}[/yellow]") - console.print("\n[dim]The model is being deployed. This may take a few minutes.[/dim]") - console.print("[dim]Use 'prime deployments list' to check deployment status.[/dim]") + _print_deployment_followup(updated_model.deployment_status) _print_inference_usage(model.base_model, model.id) diff --git a/packages/prime/tests/test_deployments.py b/packages/prime/tests/test_deployments.py index dd50072a5..07b25d79a 100644 --- a/packages/prime/tests/test_deployments.py +++ b/packages/prime/tests/test_deployments.py @@ -145,6 +145,44 @@ def deploy_checkpoint(self, checkpoint_id: str) -> Any: assert "prime deployments list" in output +def test_deployments_create_checkpoint_reports_already_deployed_status(monkeypatch) -> None: + monkeypatch.setattr("prime_cli.main.check_for_update", lambda: (False, None)) + + adapter = SimpleNamespace( + id="adapter-deployed", + base_model="Qwen/Qwen3.5-4B", + deployment_status="DEPLOYED", + ) + + class DummyDeploymentsClient: + def __init__(self, api_client: Any) -> None: + self.api_client = api_client + + def deploy_checkpoint(self, checkpoint_id: str) -> Any: + assert checkpoint_id == "ckpt-deployed" + return adapter + + monkeypatch.setattr("prime_cli.commands.deployments.APIClient", lambda: object()) + monkeypatch.setattr( + "prime_cli.commands.deployments.DeploymentsClient", + DummyDeploymentsClient, + ) + + result = runner.invoke( + app, + ["deployments", "create", "--checkpoint-id", "ckpt-deployed", "--yes"], + env=TEST_ENV, + ) + output = strip_ansi(result.output) + + assert result.exit_code == 0, result.output + assert "Deployment is ready!" in output + assert "Deployment initiated successfully!" not in output + assert "Status: DEPLOYED" in output + assert "The model is deployed and ready for inference." in output + assert "The model is being deployed." not in output + + def test_deployments_create_checkpoint_rejects_empty_checkpoint_id(monkeypatch) -> None: monkeypatch.setattr("prime_cli.main.check_for_update", lambda: (False, None))