diff --git a/interface/api/container.py b/interface/api/container.py index a623d45..1d15bf1 100644 --- a/interface/api/container.py +++ b/interface/api/container.py @@ -230,6 +230,7 @@ def __init__(self, settings: Settings | None = None) -> None: self.run_council_debate = RunCouncilDebateUseCase( debate_port=self.council_debate, event_bus=self.council_event_bus, + timeout_seconds=self.settings.council_debate_timeout_seconds, ) # Engine routing use case (Sprint 4.3, enhanced Sprint 7.4, BUG-002/003 Sprint 23.1) diff --git a/interface/cli/commands/council.py b/interface/cli/commands/council.py new file mode 100644 index 0000000..55dc4ad --- /dev/null +++ b/interface/cli/commands/council.py @@ -0,0 +1,63 @@ +"""Council subcommand — run a two-engine debate and let a judge decide. + +Two candidate engines argue (capability / cost / risk / approach) over a goal +and a resolver model resolves the debate with an explicit rationale. The +resolver model is config-driven (``MORPHIC_COUNCIL_RESOLVER_MODEL``); the +candidate engines are chosen with ``--engines``. +""" + +from __future__ import annotations + +import typer + +from domain.entities.council import SubtaskBrief +from domain.value_objects.agent_engine import AgentEngineType +from domain.value_objects.model_tier import TaskType +from interface.cli._utils import _get_container, _run +from interface.cli.formatters import console, print_council_debate, print_error + +council_app = typer.Typer() + + +@council_app.command("debate") +def debate( + goal: str = typer.Argument(..., help="The goal/question the engines debate over."), + engines: str = typer.Option( + "ollama,claude_code", + "--engines", + "-e", + help="Exactly two candidate engines, comma-separated (e.g. ollama,claude_code).", + ), + task_type: str = typer.Option( + "simple_qa", "--type", "-t", help="Task type hint for the brief." + ), +) -> None: + """Run a two-engine council debate over GOAL and print the judge's verdict. + + The resolver (judge) model comes from MORPHIC_COUNCIL_RESOLVER_MODEL. + """ + names = [e.strip() for e in engines.split(",") if e.strip()] + if len(names) != 2: + print_error("--engines requires exactly two engines (e.g. ollama,claude_code).") + raise typer.Exit(code=1) + try: + candidates = [AgentEngineType(n) for n in names] + except ValueError: + valid = ", ".join(e.value for e in AgentEngineType) + print_error(f"Unknown engine in {names!r}. Valid: {valid}.") + raise typer.Exit(code=1) from None + try: + tt = TaskType(task_type) + except ValueError: + valid = ", ".join(t.value for t in TaskType) + print_error(f"Unknown task type {task_type!r}. Valid: {valid}.") + raise typer.Exit(code=1) from None + + c = _get_container() + subtask = SubtaskBrief(id="council-cli", description=goal, task_type=tt) + + console.print(f"\n[bold yellow]Council debate:[/] {goal}") + console.print(f"[dim]candidates: {', '.join(names)}[/]\n") + + decision = _run(c.run_council_debate.execute(subtask, candidates)) + print_council_debate(decision, list(c.council_event_bus.events)) diff --git a/interface/cli/formatters.py b/interface/cli/formatters.py index aeaa337..d0c2716 100644 --- a/interface/cli/formatters.py +++ b/interface/cli/formatters.py @@ -8,11 +8,14 @@ from typing import TYPE_CHECKING from rich.console import Console +from rich.panel import Panel +from rich.rule import Rule from rich.table import Table from rich.tree import Tree if TYPE_CHECKING: from application.use_cases.route_to_engine import EngineStatus + from domain.entities.council import Argument from domain.entities.execution_record import ExecutionRecord from domain.entities.fractal_learning import ErrorPattern, SuccessfulPath from domain.entities.memory import MemoryEntry @@ -20,6 +23,7 @@ from domain.entities.tool_candidate import ToolCandidate from domain.ports.agent_engine import AgentEngineResult from domain.ports.execution_record_repository import ExecutionStats + from domain.value_objects.council_events import DebateEvent console = Console() @@ -865,3 +869,64 @@ def print_learning_stats( table.add_row("Avg path cost", f"${avg_cost:.4f}") console.print(table) + + +# Engine display labels for council output (engine value → friendly label/style) +_COUNCIL_ENGINE_STYLE: dict[str, tuple[str, str]] = { + "ollama": ("ollama (local, $0)", "cyan"), + "claude_code": ("claude_code (cloud)", "magenta"), + "gemini_cli": ("gemini_cli (cloud)", "blue"), + "codex_cli": ("codex_cli (cloud)", "yellow"), + "openhands": ("openhands (sandbox)", "green"), +} + + +def _council_argument_panel(arg: Argument) -> Panel: + label, style = _COUNCIL_ENGINE_STYLE.get(arg.engine.value, (arg.engine.value, "white")) + body = ( + f"[bold]Capability[/] {arg.capability_claim}\n\n" + f"[bold]Cost[/] {arg.cost_claim}\n\n" + f"[bold]Risk[/] {arg.risk_claim}\n\n" + f"[bold]Approach[/] {arg.recommended_approach}" + ) + return Panel(body, title=label, border_style=style, padding=(1, 2)) + + +def print_council_debate(decision, events: list[DebateEvent]) -> None: # type: ignore[no-untyped-def] + """Render a council debate: each engine's argument + the judge's verdict. + + ``decision`` is None when the debate was abandoned (e.g. resolver model + unavailable); the abandon reason is surfaced from the event stream. + """ + if decision is None: + reason = next( + (getattr(e, "reason", "") for e in events if e.kind == "debate_abandoned"), + "unknown reason", + ) + console.print(f"[red]Debate abandoned:[/] {reason}") + return + + arguments: list = [] + for e in events: + if e.kind == "decision_resolved": + arguments = list(e.arguments) + if not arguments: + arguments = [e.argument for e in events if e.kind == "argument_submitted"] + + console.print(Rule("[bold]Arguments[/]")) + for arg in arguments: + console.print(_council_argument_panel(arg)) + + label, style = _COUNCIL_ENGINE_STYLE.get( + decision.agent_engine.value, (decision.agent_engine.value, "green") + ) + console.print(Rule("[bold]Verdict[/]")) + console.print( + Panel( + f"[bold {style}]-> {label}[/]\n\n{decision.rationale}", + title="Judge's decision", + border_style="green", + padding=(1, 2), + ) + ) + console.print(f"[dim]events: {' -> '.join(e.kind for e in events)}[/]") diff --git a/interface/cli/main.py b/interface/cli/main.py index ccd62d6..36b3165 100644 --- a/interface/cli/main.py +++ b/interface/cli/main.py @@ -51,6 +51,7 @@ def _register_commands() -> None: from interface.cli.commands.cognitive import cognitive_app from interface.cli.commands.context import context_app from interface.cli.commands.cost import cost_app + from interface.cli.commands.council import council_app from interface.cli.commands.doctor import doctor_app from interface.cli.commands.engine import engine_app from interface.cli.commands.evolution import evolution_app @@ -70,6 +71,11 @@ def _register_commands() -> None: app.add_typer(cost_app, name="cost", help="View cost tracking and budget.") app.add_typer(mcp_app, name="mcp", help="Manage MCP server.") app.add_typer(engine_app, name="engine", help="Manage agent execution engines.") + app.add_typer( + council_app, + name="council", + help="Run a two-engine council debate; a judge resolves with a rationale.", + ) app.add_typer( fallback_app, name="fallback", diff --git a/shared/config.py b/shared/config.py index 880f518..1da33d8 100644 --- a/shared/config.py +++ b/shared/config.py @@ -182,6 +182,12 @@ class Settings(BaseSettings): default="gemini/gemini-2.5-flash", validation_alias="MORPHIC_COUNCIL_RESOLVER_MODEL", ) + # Two local argument generations + one resolver call run sequentially; a + # cold Ollama needs headroom beyond the original 15s pilot budget. + council_debate_timeout_seconds: float = Field( + default=30.0, + validation_alias="MORPHIC_COUNCIL_TIMEOUT_SECONDS", + ) # ── Evolution ── evolution_enabled: bool = True diff --git a/tests/unit/infrastructure/test_mcp_server.py b/tests/unit/infrastructure/test_mcp_server.py index ecc3a4a..8de7480 100644 --- a/tests/unit/infrastructure/test_mcp_server.py +++ b/tests/unit/infrastructure/test_mcp_server.py @@ -94,6 +94,7 @@ class _FakeSettings: # Council pilot (TD-194) council_debate_enabled = False council_resolver_model = "gemini/gemini-2.5-flash" + council_debate_timeout_seconds = 30.0 @property def marketplace_safety_threshold_tier(self): # type: ignore[no-untyped-def] diff --git a/tests/unit/interface/test_council_cli.py b/tests/unit/interface/test_council_cli.py new file mode 100644 index 0000000..6bc4188 --- /dev/null +++ b/tests/unit/interface/test_council_cli.py @@ -0,0 +1,102 @@ +"""CLI tests for `morphic council debate` (TD-194 surface).""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +from typer.testing import CliRunner + +from domain.entities.cognitive import Decision +from domain.entities.council import Argument, SubtaskBrief +from domain.value_objects.agent_engine import AgentEngineType +from domain.value_objects.council_events import ( + ArgumentSubmitted, + DebateStarted, + DecisionResolved, +) +from domain.value_objects.model_tier import TaskType +from interface.cli import _utils as cli_utils +from interface.cli.main import app + +runner = CliRunner() + + +def _arg(engine: AgentEngineType) -> Argument: + return Argument( + engine=engine, + capability_claim="cap", + cost_claim="cost", + risk_claim="risk", + recommended_approach="approach", + ) + + +def _make_container(decision: Decision | None, events: list) -> SimpleNamespace: + run_council = SimpleNamespace(execute=AsyncMock(return_value=decision)) + return SimpleNamespace( + run_council_debate=run_council, + council_event_bus=SimpleNamespace(events=events), + ) + + +def test_council_debate_prints_verdict() -> None: + decision = Decision( + description="x", + rationale="claude_code is more decisive for this task", + agent_engine=AgentEngineType.CLAUDE_CODE, + ) + args = [_arg(AgentEngineType.OLLAMA), _arg(AgentEngineType.CLAUDE_CODE)] + events = [ + DebateStarted( + subtask=SubtaskBrief(id="s", description="g", task_type=TaskType.SIMPLE_QA), + candidates=[AgentEngineType.OLLAMA, AgentEngineType.CLAUDE_CODE], + ), + ArgumentSubmitted(argument=args[0]), + ArgumentSubmitted(argument=args[1]), + DecisionResolved(decision=decision, arguments=args), + ] + cli_utils._set_container(_make_container(decision, events)) + try: + result = runner.invoke(app, ["council", "debate", "Python or Go for an MVP?"]) + finally: + cli_utils._set_container(None) + + assert result.exit_code == 0, result.output + assert "claude_code" in result.output + assert "Verdict" in result.output + + +def test_council_debate_abandoned_shows_reason() -> None: + from domain.value_objects.council_events import DebateAbandoned + + events = [DebateAbandoned(reason="resolver model unavailable: bad key")] + cli_utils._set_container(_make_container(None, events)) + try: + result = runner.invoke(app, ["council", "debate", "x"]) + finally: + cli_utils._set_container(None) + + assert result.exit_code == 0, result.output + assert "abandoned" in result.output.lower() + assert "resolver model unavailable" in result.output + + +def test_council_debate_rejects_wrong_engine_count() -> None: + cli_utils._set_container(_make_container(None, [])) + try: + result = runner.invoke(app, ["council", "debate", "x", "--engines", "ollama"]) + finally: + cli_utils._set_container(None) + assert result.exit_code == 1 + + +def test_council_debate_rejects_unknown_engine() -> None: + cli_utils._set_container(_make_container(None, [])) + try: + result = runner.invoke( + app, ["council", "debate", "x", "--engines", "ollama,nope"] + ) + finally: + cli_utils._set_container(None) + assert result.exit_code == 1 diff --git a/tests/unit/interface/test_fractal_container_wiring.py b/tests/unit/interface/test_fractal_container_wiring.py index 582fcf7..670c483 100644 --- a/tests/unit/interface/test_fractal_container_wiring.py +++ b/tests/unit/interface/test_fractal_container_wiring.py @@ -94,6 +94,7 @@ class _FakeSettings: # Council pilot (TD-194) council_debate_enabled = False council_resolver_model = "gemini/gemini-2.5-flash" + council_debate_timeout_seconds = 30.0 # Planner router (TD-195) planner_router_mode = "disabled" planner_router_haiku_confidence_threshold = 0.7