Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions docs/agents/runtime-diagnostics.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
---
title: "Runtime diagnostics"
description: "Inspect resolved agent runtime metadata without exposing prompts, tool names, headers, or secrets."
---

Runtime diagnostics help explain which agent runtime configuration Tracecat used for a run.

When an agent run completes, its result may include `runtime_resolution`. The same metadata is also emitted as a `runtime_resolution` stream event before the model call starts, so operators can inspect the selected runtime even if the model call or a later tool call fails.

## What is included

The diagnostic block includes metadata such as:

- runtime type
- selected model provider, model name, and runtime route
- whether custom-provider passthrough was enabled
- instruction and system prompt lengths
- output type shape
- tool, MCP server, approval policy, subagent, and skill counts
- thinking, internet access, resume, fork, and approval-continuation flags

## What is not included

Runtime diagnostics do not include prompt bodies, tool names, MCP headers, OAuth tokens, secret values, or resolved variables.

This keeps the field safe to show in run results and stream events while still making configuration issues easier to debug.

## Example

```json
{
"runtime": "claude_code",
"model_provider": "anthropic",
"model_name": "claude-3-5-sonnet-latest",
"model_route": "anthropic/claude-3-5-sonnet-latest",
"instructions_present": true,
"instructions_length": 482,
"system_prompt_length": 1880,
"actions_count": 6,
"allowed_tools_count": 8,
"mcp_server_count": 2,
"subagent_count": 1,
"skills_count": 3,
"approval_continuation": false
}
```

Use this when a run behaves differently than expected, for example when an instruction override appears missing, a custom provider route is not selected, or the available tool surface is larger than intended.
1 change: 1 addition & 0 deletions docs/docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
"pages": [
"agents/ai-action",
"agents/ai-agent",
"agents/runtime-diagnostics",
"agents/custom-llm-providers",
"agents/skills",
"agents/secrets-variables"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,7 @@ async def _run_with_agent_executor(
output_tokens=(result.result_usage or {}).get("output_tokens", 0),
),
session_id=self.session_id,
runtime_resolution=result.runtime_resolution,
)

async def _load_terminal_message_history(
Expand Down
149 changes: 149 additions & 0 deletions tests/unit/test_agent_runtime_resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from __future__ import annotations

import uuid
from pathlib import Path
from unittest.mock import MagicMock

from tracecat.agent.common.protocol import RuntimeEventEnvelope, RuntimeInitPayload
from tracecat.agent.common.stream_types import StreamEventType, UnifiedStreamEvent
from tracecat.agent.common.types import RuntimeResolution, SandboxAgentConfig
from tracecat.agent.runtime.claude_code.runtime import ClaudeAgentRuntime


def test_runtime_resolution_is_metadata_only() -> None:
resolution = RuntimeResolution(
runtime="claude_code",
model_provider="anthropic",
model_name="claude-3-5-sonnet",
model_route="anthropic/claude-3-5-sonnet",
instructions_present=True,
instructions_length=120,
system_prompt_length=500,
actions_count=4,
allowed_tools_count=6,
mcp_server_count=2,
)

metadata = resolution.to_metadata()

assert metadata["runtime"] == "claude_code"
assert metadata["instructions_length"] == 120
assert "system_prompt" not in metadata
assert "tools" not in metadata
assert "headers" not in metadata


def test_runtime_resolution_round_trips_through_result_envelope() -> None:
resolution = RuntimeResolution(
runtime="claude_code",
model_provider="anthropic",
model_name="claude-3-5-sonnet",
model_route="anthropic/claude-3-5-sonnet",
user_prompt_length=9,
allowed_tools_count=1,
)
envelope = RuntimeEventEnvelope.from_result(
usage={"input_tokens": 10, "output_tokens": 5},
num_turns=1,
output="done",
runtime_resolution=resolution,
)

serialized = envelope.to_dict()
restored = RuntimeEventEnvelope.from_dict(serialized)

assert restored.runtime_resolution == resolution
assert restored.result_output == "done"


def test_runtime_resolution_stream_event_is_metadata_event() -> None:
resolution = RuntimeResolution(
runtime="pydantic_ai",
model_provider="openai",
model_name="gpt-4.1-mini",
)

event = UnifiedStreamEvent.runtime_resolution_event(resolution.to_metadata())

assert event.type is StreamEventType.RUNTIME_RESOLUTION
assert event.metadata == {
"runtime": "pydantic_ai",
"model_provider": "openai",
"model_name": "gpt-4.1-mini",
"passthrough": False,
"base_url_configured": False,
"instructions_present": False,
"instructions_length": 0,
"system_prompt_fragment_count": 0,
"user_prompt_length": 0,
"output_type_kind": "none",
"approval_policy_count": 0,
"approvals_enabled": False,
"mcp_server_count": 0,
"stdio_mcp_server_count": 0,
"subagent_count": 0,
"skills_count": 0,
"resumed": False,
"forked": False,
"approval_continuation": False,
}


def test_claude_runtime_resolution_counts_resolved_runtime_shape() -> None:
payload = RuntimeInitPayload(
session_id=uuid.uuid4(),
mcp_auth_token="mcp-token",
config=SandboxAgentConfig(
model_name="claude-3-5-sonnet",
model_provider="anthropic",
instructions="Investigate alerts.",
tool_approvals={"core.http_request": True},
mcp_servers=[
{
"type": "stdio",
"name": "local-tools",
"command": "npx",
}
],
enable_thinking=True,
enable_internet_access=False,
),
user_prompt="Analyze this alert",
llm_gateway_auth_token="llm-token",
allowed_actions={},
sdk_session_id="previous-session",
is_fork=True,
is_approval_continuation=True,
)
runtime = ClaudeAgentRuntime(
MagicMock(),
transport_factory=lambda _: MagicMock(),
cwd=Path("/tmp/tracecat-agent-test"),
)
runtime._configure_runtime_state(payload)
prepared = runtime._runtime_resolution(
payload=payload,
options=MagicMock(
model="anthropic/claude-3-5-sonnet",
system_prompt="system prompt",
allowed_tools=["mcp__tracecat-registry__core__http_request"],
disallowed_tools=["WebSearch", "WebFetch"],
),
resume_session_id="previous-session",
fork_session=True,
mcp_servers={"tracecat-registry": {"type": "http", "url": "http://mcp"}},
stdio_mcp_servers={"local-tools": {"type": "stdio", "command": "npx"}},
agent_definitions=None,
)

assert prepared.runtime == "claude_code"
assert prepared.model_route == "anthropic/claude-3-5-sonnet"
assert prepared.instructions_length == len("Investigate alerts.")
assert prepared.allowed_tools_count == 1
assert prepared.disallowed_tools_count == 2
assert prepared.mcp_server_count == 1
assert prepared.stdio_mcp_server_count == 1
assert prepared.approval_policy_count == 1
assert prepared.resumed is True
assert prepared.forked is True
assert prepared.approval_continuation is True
13 changes: 13 additions & 0 deletions tracecat/agent/common/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tracecat.agent.common.stream_types import UnifiedStreamEvent
from tracecat.agent.common.types import (
MCPToolDefinition,
RuntimeResolution,
SandboxAgentConfig,
SandboxSubagentConfig,
)
Expand Down Expand Up @@ -157,6 +158,7 @@ class RuntimeEventEnvelope:
result_num_turns: int | None = None
result_duration_ms: int | None = None
result_output: Any = None
runtime_resolution: RuntimeResolution | None = None
# For type="log" - structured log forwarding from sandbox
log_level: str | None = None # "debug", "info", "warning", "error"
log_message: str | None = None
Expand Down Expand Up @@ -185,6 +187,11 @@ def from_dict(cls, data: dict[str, Any]) -> RuntimeEventEnvelope:
"result_output",
data.get("result_structured_output", data.get("result_result")),
),
runtime_resolution=(
RuntimeResolution.model_validate(data["runtime_resolution"])
if data.get("runtime_resolution")
else None
),
log_level=data.get("log_level"),
log_message=data.get("log_message"),
log_extra=data.get("log_extra"),
Expand Down Expand Up @@ -215,6 +222,10 @@ def to_dict(self) -> dict[str, Any]:
result["result_duration_ms"] = self.result_duration_ms
if self.result_output is not None:
result["result_output"] = self.result_output
if self.runtime_resolution is not None:
result["runtime_resolution"] = self.runtime_resolution.model_dump(
mode="json", exclude_none=True
)
if self.log_level is not None:
result["log_level"] = self.log_level
if self.log_message is not None:
Expand Down Expand Up @@ -282,6 +293,7 @@ def from_result(
num_turns: int | None = None,
duration_ms: int | None = None,
output: Any = None,
runtime_resolution: RuntimeResolution | None = None,
) -> RuntimeEventEnvelope:
"""Create a result envelope with usage data from Claude SDK ResultMessage."""
return cls(
Expand All @@ -290,6 +302,7 @@ def from_result(
result_num_turns=num_turns,
result_duration_ms=duration_ms,
result_output=output,
runtime_resolution=runtime_resolution,
)

@classmethod
Expand Down
3 changes: 3 additions & 0 deletions tracecat/agent/common/socket_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from tracecat.agent.common.protocol import RuntimeEventEnvelope
from tracecat.agent.common.stream_types import UnifiedStreamEvent
from tracecat.agent.common.types import RuntimeResolution

# Header size: 1 byte msg_type + 4 bytes length
HEADER_SIZE = 5
Expand Down Expand Up @@ -170,6 +171,7 @@ async def send_result(
num_turns: int | None = None,
duration_ms: int | None = None,
output: Any = None,
runtime_resolution: RuntimeResolution | None = None,
) -> None:
"""Send final result with usage data from Claude SDK ResultMessage."""
await self._send(
Expand All @@ -178,6 +180,7 @@ async def send_result(
num_turns=num_turns,
duration_ms=duration_ms,
output=output,
runtime_resolution=runtime_resolution,
)
)

Expand Down
12 changes: 12 additions & 0 deletions tracecat/agent/common/stream_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class StreamEventType(StrEnum):
# System/status events
COMPACTION = "compaction"
ARTIFACT = "artifact"
RUNTIME_RESOLUTION = "runtime_resolution"

# Control events
ERROR = "error"
Expand Down Expand Up @@ -240,6 +241,17 @@ def compaction_event(
metadata=event_metadata,
)

@classmethod
def runtime_resolution_event(
cls,
metadata: dict[str, Any],
) -> UnifiedStreamEvent:
"""Create a metadata-only event describing resolved runtime config."""
return cls(
type=StreamEventType.RUNTIME_RESOLUTION,
metadata=metadata,
)

@classmethod
def tool_result_event(
cls,
Expand Down
60 changes: 60 additions & 0 deletions tracecat/agent/common/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ class SandboxAgentConfig(BaseModel):
"""Whether to enable extended thinking for the Claude Code CLI."""
enable_internet_access: bool = False
"""Whether to enable internet access tools (WebSearch, WebFetch)."""
skills_count: int = 0
"""Number of resolved skills staged into the sandbox for this turn."""

@classmethod
def from_agent_config(cls, config: AgentConfig) -> SandboxAgentConfig:
Expand All @@ -193,6 +195,7 @@ def from_agent_config(cls, config: AgentConfig) -> SandboxAgentConfig:
output_type=config.output_type,
enable_thinking=config.enable_thinking,
enable_internet_access=config.enable_internet_access,
skills_count=len(config.resolved_skills or []),
)


Expand All @@ -213,3 +216,60 @@ class SandboxSubagentConfig(BaseModel):
model_route: str | None = None
max_turns: int | None = None
allowed_actions: dict[str, MCPToolDefinition] | None = None


class RuntimeResolution(BaseModel):
"""Metadata describing the runtime configuration selected for an agent turn.

This intentionally records shape and routing metadata, not prompt bodies,
tool names, headers, or secret-bearing values.
"""

model_config = ConfigDict(extra="forbid")

runtime: Literal["claude_code", "pydantic_ai"]
model_provider: str | None = None
model_name: str | None = None
model_route: str | None = None
passthrough: bool = False
base_url_configured: bool = False

instructions_present: bool = False
instructions_length: int = 0
system_prompt_length: int | None = None
system_prompt_fragment_count: int = 0
user_prompt_length: int = 0
output_type_kind: Literal["none", "primitive", "json_schema"] = "none"

actions_count: int | None = None
namespaces_count: int | None = None
allowed_tools_count: int | None = None
disallowed_tools_count: int | None = None
approval_policy_count: int = 0
approvals_enabled: bool = False

mcp_server_count: int = 0
stdio_mcp_server_count: int = 0
subagent_count: int = 0
skills_count: int = 0

thinking_enabled: bool | None = None
internet_access_enabled: bool | None = None
resumed: bool = False
forked: bool = False
approval_continuation: bool = False

def to_metadata(self) -> dict[str, Any]:
"""Return JSON metadata safe to include in runtime stream events."""
return self.model_dump(mode="json", exclude_none=True)


def output_type_kind(
output_type: str | dict[str, Any] | None,
) -> Literal["none", "primitive", "json_schema"]:
"""Classify output type without exposing a full schema in diagnostics."""
if output_type is None:
return "none"
if isinstance(output_type, dict):
return "json_schema"
return "primitive"
Loading