diff --git a/CLAUDE.md b/CLAUDE.md index fdefadeabd..d76435e441 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -22,18 +22,20 @@ This repo contains the CLI for Entire. ### Command Layout -The CLI is organized around five noun groups plus a small set of top-level -verbs. The groups are the canonical home for each verb; legacy top-level -shortcuts remain functional but hidden, and emit a deprecation hint pointing -at the canonical group form. - -- `session` (alias: `sessions`): `list`, `info`, `stop`, `attach`, `resume`, `current`. +The visible CLI is organized around five noun groups plus a small set of +top-level verbs. The groups are the canonical home for each verb; legacy +top-level shortcuts remain functional but hidden, and emit a deprecation hint +pointing at the canonical group form. Newer experimental command families are +discoverable through `entire labs` and may remain hidden from root help while +their canonical paths are still runnable. + +- `session` (alias: `sessions`): `list`, `info`, `tokens`, `stop`, `attach`, `resume`, `current`. `resume` with a branch arg switches to it and resumes its session; with no arg it opens an interactive picker of stopped sessions (across all worktrees), resolving each to its branch and pointing at the owning worktree when the branch is checked out elsewhere. Resume keeps an existing local session log as-is by default (`--force` overwrites it from the checkpoint). -- `checkpoint` (aliases: `cp`, `checkpoints`): `list`, `explain`, `search`, plus +- `checkpoint` (aliases: `cp`, `checkpoints`): `list`, `explain`, `tokens`, `search`, plus the deprecated `rewind` (functional, prints a cobra deprecation message, will be removed in a future release) - `agent`: bare opens the interactive agent selector, plus `list`, `add`, `remove` @@ -45,6 +47,10 @@ at the canonical group form. current one) and `--all-contexts` (log out of every saved login) - `doctor`: bare runs the scan-and-fix flow, plus `trace`, `logs`, `bundle` +Experimental command families advertised through `entire labs`: + +- `tokens`: `profile` (hidden from root help while token diagnostics mature) + Top-level lifecycle and standalone commands: `enable`, `disable`, `status`, `login`, `logout`, `clean`, `version`, `dispatch`, `activity`, `help`, `configure`. diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 4db959b202..246556ffc7 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -3,6 +3,7 @@ package cli import ( "context" "encoding/json" + "errors" "fmt" "io" "strconv" @@ -37,7 +38,10 @@ type checkpointTokensComparison struct { TargetCheckpointID string `json:"target_checkpoint_id"` Status string `json:"status"` Total *checkpointTokensMetricDelta `json:"total,omitempty"` + Input *checkpointTokensMetricDelta `json:"input,omitempty"` CacheRead *checkpointTokensMetricDelta `json:"cache_read,omitempty"` + CacheWrite *checkpointTokensMetricDelta `json:"cache_write,omitempty"` + Output *checkpointTokensMetricDelta `json:"output,omitempty"` APICalls *checkpointTokensMetricDelta `json:"api_calls,omitempty"` Qualification string `json:"qualification"` Limitations []string `json:"limitations,omitempty"` @@ -56,11 +60,16 @@ const ( checkpointComparisonStatusObservedReduction = "observed_reduction" checkpointComparisonStatusObservedIncrease = "observed_increase" checkpointComparisonStatusObservedNoChange = "observed_no_change" + + checkpointDeltaDirectionDown = "down" + checkpointDeltaDirectionUp = "up" + checkpointDeltaDirectionUnchanged = "unchanged" ) func newCheckpointTokensCmd() *cobra.Command { var jsonFlag bool var compareFlag string + var agentBriefFlag bool cmd := &cobra.Command{ Use: "tokens ", @@ -77,16 +86,20 @@ Use --compare to compare this checkpoint against a previous checkpoint and qualify observed token reduction or increase.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - return runCheckpointTokens(cmd.Context(), cmd, args[0], jsonFlag, compareFlag) + if jsonFlag && agentBriefFlag { + return errors.New("--json and --agent-brief are mutually exclusive") + } + return runCheckpointTokens(cmd.Context(), cmd, args[0], jsonFlag, compareFlag, agentBriefFlag) }, } cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") cmd.Flags().StringVar(&compareFlag, "compare", "", "Compare against a baseline checkpoint ID") + cmd.Flags().BoolVar(&agentBriefFlag, "agent-brief", false, "Output compact next-step guidance for agents") return cmd } -func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string, jsonOutput bool, comparePrefix string) error { +func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string, jsonOutput bool, comparePrefix string, agentBrief bool) error { report, lookup, err := loadCheckpointTokensReport(ctx, cmd, checkpointIDPrefix) if lookup != nil { defer lookup.Close() @@ -103,12 +116,20 @@ func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPr if err != nil { return tokenCommandError(err) } + if baselineReport.CheckpointID == report.CheckpointID { + cmd.SilenceUsage = true + return fmt.Errorf("cannot compare checkpoint %s to itself", report.CheckpointID) + } report.Comparison = buildCheckpointTokensComparison(report, baselineReport) } if jsonOutput { return writeCheckpointTokensJSON(cmd.OutOrStdout(), report) } + if agentBrief { + writeCheckpointTokensAgentBrief(cmd.OutOrStdout(), report) + return nil + } writeCheckpointTokensText(cmd.OutOrStdout(), report) return nil } @@ -374,19 +395,53 @@ func buildCheckpointTokensComparison(target, baseline checkpointTokensReport) *c } comparison.Total = buildCheckpointMetricDelta(baseline.Tokens.Total, target.Tokens.Total) + comparison.Input = buildCheckpointMetricDelta(baseline.Tokens.Input, target.Tokens.Input) comparison.CacheRead = buildCheckpointMetricDelta(baseline.Tokens.CacheRead, target.Tokens.CacheRead) + comparison.CacheWrite = buildCheckpointMetricDelta(baseline.Tokens.CacheWrite, target.Tokens.CacheWrite) + comparison.Output = buildCheckpointMetricDelta(baseline.Tokens.Output, target.Tokens.Output) comparison.APICalls = buildCheckpointMetricDelta(baseline.Tokens.APICalls, target.Tokens.APICalls) comparison.Status = checkpointComparisonStatus(comparison.Total) comparison.Qualification = checkpointComparisonQualification(comparison.Status) + if classes := checkpointCostProxyPressureIncreased(comparison); len(classes) > 0 { + comparison.Qualification += fmt.Sprintf(" Cost-proxy pressure increased for %s even though total tokens decreased.", formatTokenClassList(classes)) + } return comparison } +func checkpointCostProxyPressureIncreased(comparison *checkpointTokensComparison) []string { + if comparison == nil || comparison.Total == nil || comparison.Total.Change >= 0 { + return nil + } + var classes []string + if comparison.CacheWrite != nil && comparison.CacheWrite.Change > 0 { + classes = append(classes, "cache write") + } + if comparison.Output != nil && comparison.Output.Change > 0 { + classes = append(classes, "output") + } + return classes +} + +func formatTokenClassList(classes []string) string { + switch len(classes) { + case 0: + return "" + case 1: + return classes[0] + case 2: + return classes[0] + " and " + classes[1] + default: + return strings.Join(classes[:len(classes)-1], ", ") + ", and " + classes[len(classes)-1] + } +} + func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDelta { + change := saturatingIntSub(current, baseline) delta := &checkpointTokensMetricDelta{ Baseline: baseline, Current: current, - Change: current - baseline, - Direction: checkpointDeltaDirection(current - baseline), + Change: change, + Direction: checkpointDeltaDirection(change), } if baseline != 0 { percent := float64(delta.Change) * 100 / float64(baseline) @@ -395,14 +450,32 @@ func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDe return delta } +func saturatingIntSub(a, b int) int { + if b < 0 && a > maxInt()+b { + return maxInt() + } + if b > 0 && a < minInt()+b { + return minInt() + } + return a - b +} + +func maxInt() int { + return int(^uint(0) >> 1) +} + +func minInt() int { + return -maxInt() - 1 +} + func checkpointDeltaDirection(change int) string { switch { case change < 0: - return "down" + return checkpointDeltaDirectionDown case change > 0: - return "up" + return checkpointDeltaDirectionUp default: - return "unchanged" + return checkpointDeltaDirectionUnchanged } } @@ -411,9 +484,9 @@ func checkpointComparisonStatus(total *checkpointTokensMetricDelta) string { return checkpointComparisonStatusUnavailable } switch total.Direction { - case "down": + case checkpointDeltaDirectionDown: return checkpointComparisonStatusObservedReduction - case "up": + case checkpointDeltaDirectionUp: return checkpointComparisonStatusObservedIncrease default: return checkpointComparisonStatusObservedNoChange @@ -479,6 +552,45 @@ func writeCheckpointTokensText(w io.Writer, report checkpointTokensReport) { writeTokenLimitations(w, report.Limitations) } +func writeCheckpointTokensAgentBrief(w io.Writer, report checkpointTokensReport) { + fmt.Fprintln(w, "Checkpoint token brief") + fmt.Fprintf(w, "Checkpoint: %s\n", report.CheckpointID) + fmt.Fprintln(w) + fmt.Fprintln(w, agentBriefUsageLine(report.Tokens)) + fmt.Fprintln(w) + fmt.Fprintln(w, "Next best action:") + fmt.Fprintln(w, checkpointAgentBriefNextAction(report)) + + signals := agentBriefSignals(checkpointAgentBriefSessionReport(report)) + if len(signals) > 0 { + fmt.Fprintln(w) + fmt.Fprintln(w, "Signals:") + for _, signal := range signals { + fmt.Fprintf(w, "- %s\n", signal) + } + } +} + +func checkpointAgentBriefNextAction(report checkpointTokensReport) string { + sessionReport := checkpointAgentBriefSessionReport(report) + if hasTokenRecommendation(sessionReport, "no-token-data") { + return "Do not spend extra commands on token optimization for this checkpoint. Continue with the task and capture a newer checkpoint before rechecking tokens." + } + if action, ok := agentBriefOptimizationAction(sessionReport); ok { + return action + } + return "Continue normally; no high-signal token optimization is available from this checkpoint." +} + +func checkpointAgentBriefSessionReport(report checkpointTokensReport) sessionTokensReport { + return sessionTokensReport{ + Tokens: report.Tokens, + Context: report.Context, + Recommendations: report.Recommendations, + Limitations: report.Limitations, + } +} + func writeCheckpointTokenComparison(w io.Writer, comparison *checkpointTokensComparison) { if comparison == nil { return @@ -489,7 +601,10 @@ func writeCheckpointTokenComparison(w io.Writer, comparison *checkpointTokensCom fmt.Fprintf(w, "Baseline: %s\n", comparison.BaselineCheckpointID) if comparison.Status != checkpointComparisonStatusUnavailable { fmt.Fprintf(w, "Total tokens: %s\n", formatCheckpointMetricDelta(comparison.Total, formatTokenCount)) + fmt.Fprintf(w, "Input: %s\n", formatCheckpointMetricDelta(comparison.Input, formatTokenCount)) fmt.Fprintf(w, "Cache/context replay: %s\n", formatCheckpointMetricDelta(comparison.CacheRead, formatTokenCount)) + fmt.Fprintf(w, "Cache write: %s\n", formatCheckpointMetricDelta(comparison.CacheWrite, formatTokenCount)) + fmt.Fprintf(w, "Output: %s\n", formatCheckpointMetricDelta(comparison.Output, formatTokenCount)) fmt.Fprintf(w, "API calls: %s\n", formatCheckpointMetricDelta(comparison.APICalls, formatPlainCount)) } fmt.Fprintln(w) @@ -503,7 +618,7 @@ func formatCheckpointMetricDelta(delta *checkpointTokensMetricDelta, formatValue } from := formatValue(delta.Baseline) to := formatValue(delta.Current) - if delta.Direction == "unchanged" { + if delta.Direction == checkpointDeltaDirectionUnchanged { return fmt.Sprintf("unchanged (%s -> %s)", from, to) } if delta.ChangePercent == nil { diff --git a/cmd/entire/cli/labs.go b/cmd/entire/cli/labs.go index 6143b9c633..3933515f07 100644 --- a/cmd/entire/cli/labs.go +++ b/cmd/entire/cli/labs.go @@ -9,51 +9,61 @@ import ( ) type experimentalCommandInfo struct { - Name string - Invocation string - Summary string + CommandPath []string + Invocation string + Summary string } var experimentalCommands = []experimentalCommandInfo{ { - Name: "review", - Invocation: "entire review", - Summary: "Run configured review skills against the current branch", + CommandPath: []string{"review"}, + Invocation: "entire review", + Summary: "Run configured review skills against the current branch", }, { - Name: "investigate", - Invocation: "entire investigate", - Summary: "Run a multi-agent investigation against a topic, issue, or seed doc", + CommandPath: []string{"investigate"}, + Invocation: "entire investigate", + Summary: "Run a multi-agent investigation against a topic, issue, or seed doc", }, { - Name: "org", - Invocation: "entire org", - Summary: "Manage Entire organizations (create, list)", + CommandPath: []string{"tokens"}, + Invocation: "entire tokens", + Summary: "Analyze experimental token usage diagnostics", }, { - Name: "project", - Invocation: "entire project", - Summary: "Manage Entire projects (create, list)", + CommandPath: []string{"tokens", "profile"}, + Invocation: "entire tokens profile", + Summary: "Aggregate token usage across committed checkpoints", }, { - Name: "repo", - Invocation: "entire repo", - Summary: "Manage Entire repositories (create, list, get, delete)", + CommandPath: []string{"org"}, + Invocation: "entire org", + Summary: "Manage Entire organizations (create, list)", }, { - Name: "grant", - Invocation: "entire grant", - Summary: "Manage access grants and org membership (org, project, repo)", + CommandPath: []string{"project"}, + Invocation: "entire project", + Summary: "Manage Entire projects (create, list)", }, { - Name: "blame", - Invocation: "entire blame", - Summary: "Show which lines came from Entire checkpoints", + CommandPath: []string{"repo"}, + Invocation: "entire repo", + Summary: "Manage Entire repositories (create, list, get, delete)", }, { - Name: "why", - Invocation: "entire why", - Summary: "Show why a line exists (commit, checkpoint, prompt, session)", + CommandPath: []string{"grant"}, + Invocation: "entire grant", + Summary: "Manage access grants and org membership (org, project, repo)", + }, + { + CommandPath: []string{"blame"}, + Invocation: "entire blame", + Summary: "Show which lines came from Entire checkpoints", + }, + { + CommandPath: []string{"why"}, + Invocation: "entire why", + Summary: "Show why a line exists (commit, checkpoint, prompt, session)", }, } @@ -97,6 +107,8 @@ Available experimental commands: Try: entire review --help entire investigate --help + entire tokens --help + entire tokens profile --help entire org --help entire project --help entire repo --help diff --git a/cmd/entire/cli/labs_test.go b/cmd/entire/cli/labs_test.go index 7ec74b7f70..8121fa9e7f 100644 --- a/cmd/entire/cli/labs_test.go +++ b/cmd/entire/cli/labs_test.go @@ -26,6 +26,9 @@ func TestLabsCmd_PrintsExperimentalCommandList(t *testing.T) { "Available experimental commands", "entire review", "entire review --help", + "entire tokens", + "entire tokens profile", + "entire tokens profile --help", } { if !strings.Contains(got, want) { t.Fatalf("entire labs output missing %q:\n%s", want, got) @@ -93,8 +96,13 @@ func TestRootHelp_ShowsLabsButHidesReview(t *testing.T) { if !strings.Contains(got, "labs") || !strings.Contains(got, "Explore experimental Entire workflows") { t.Fatalf("root help should include labs command, got:\n%s", got) } - if strings.Contains(got, "review") { - t.Fatalf("root help should not include review while it is listed in labs, got:\n%s", got) + for _, hiddenExperimentalCommand := range []string{ + "review", + "tokens Analyze token usage across sessions and checkpoints", + } { + if strings.Contains(got, hiddenExperimentalCommand) { + t.Fatalf("root help should not include %q while it is listed in labs, got:\n%s", hiddenExperimentalCommand, got) + } } } @@ -143,12 +151,12 @@ func TestRenderExperimentalCommands_ColumnWidthAdjustsToLongest(t *testing.T) { t.Parallel() short := []experimentalCommandInfo{ - {Name: "a", Invocation: "entire a", Summary: "first"}, - {Name: "b", Invocation: "entire b", Summary: "second"}, + {Invocation: "entire a", Summary: "first"}, + {Invocation: "entire b", Summary: "second"}, } long := []experimentalCommandInfo{ - {Name: "a", Invocation: "entire a", Summary: "first"}, - {Name: "verylongcommand", Invocation: "entire verylongcommand", Summary: "second"}, + {Invocation: "entire a", Summary: "first"}, + {Invocation: "entire verylongcommand", Summary: "second"}, } shortCol := summaryColumns(t, short)[0] @@ -173,8 +181,8 @@ func TestRenderExperimentalCommands_MultiByteInvocationAligns(t *testing.T) { // padding, len("entire ▶▶") == 13 >= 12 would skip padding and misalign the // row; rune-based padding correctly adds 3 spaces. commands := []experimentalCommandInfo{ - {Name: "long", Invocation: "entire aaaaa", Summary: "first"}, - {Name: "multibyte", Invocation: "entire ▶▶", Summary: "second"}, + {Invocation: "entire aaaaa", Summary: "first"}, + {Invocation: "entire ▶▶", Summary: "second"}, } if got := len("entire ▶▶"); got < 12 { @@ -192,12 +200,12 @@ func TestLabsRegistryCommandsExistAtCanonicalPaths(t *testing.T) { root := NewRootCmd() for _, info := range experimentalCommands { - cmd, _, err := root.Find([]string{info.Name}) + cmd, _, err := root.Find(info.CommandPath) if err != nil { - t.Fatalf("labs command %q should exist at canonical path: %v", info.Name, err) + t.Fatalf("labs command %q should exist at canonical path: %v", info.Invocation, err) } if cmd == nil { - t.Fatalf("labs command %q resolved to nil command", info.Name) + t.Fatalf("labs command %q resolved to nil command", info.Invocation) } } } diff --git a/cmd/entire/cli/root.go b/cmd/entire/cli/root.go index b4c26447b5..7145ae14e9 100644 --- a/cmd/entire/cli/root.go +++ b/cmd/entire/cli/root.go @@ -84,6 +84,7 @@ func NewRootCmd() *cobra.Command { // Noun groups (canonical homes for subcommands). cmd.AddCommand(newSessionsCmd()) // 'session' (with 'sessions' as Cobra alias) cmd.AddCommand(newCheckpointGroupCmd()) // 'checkpoint' / 'cp' / 'checkpoints' + cmd.AddCommand(newTokensGroupCmd()) // 'tokens' cmd.AddCommand(newAgentGroupCmd()) // 'agent' cmd.AddCommand(newAuthCmd()) // 'auth' cmd.AddCommand(newDoctorCmd()) // 'doctor' (group: trace/logs/bundle) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index 95160f921b..719adea105 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -66,6 +66,8 @@ type tokenRecommendationSignals struct { CheckpointCount int } +const agentBriefCostProxyBatchAction = "Use at most 3 batched reads before answering. Continue only if a named file or test can change the verdict; otherwise answer now. Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight." + func newTokensCmd() *cobra.Command { var jsonFlag bool var currentFlag bool @@ -300,6 +302,24 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco Signals: []string{"api_call_count"}, }) } + if signals.Tokens != nil && signals.Tokens.Total > 0 && + tokenClassPressure(signals.Tokens.CacheWrite, signals.Tokens.Total, 5000, 10, 50_000) { + recs = append(recs, sessionTokensRecommendation{ + ID: "cache-write-pressure", + Severity: "medium", + Message: "Cache write is elevated; avoid broad new context and narrow the next read before continuing.", + Signals: []string{"cache_write_tokens"}, + }) + } + if signals.Tokens != nil && signals.Tokens.Total > 0 && + tokenClassPressure(signals.Tokens.Output, signals.Tokens.Total, 3000, 2, 10_000) { + recs = append(recs, sessionTokensRecommendation{ + ID: "output-pressure", + Severity: "medium", + Message: "Output tokens are elevated; keep the next answer tight and avoid restating evidence.", + Signals: []string{"output_tokens"}, + }) + } if signals.Tokens != nil && signals.Tokens.SubagentTotal > 0 && signals.Tokens.SubagentTotal*100 >= signals.Tokens.Total*10 { recs = append(recs, sessionTokensRecommendation{ ID: "subagent-heavy", @@ -336,6 +356,16 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco return recs } +func tokenClassPressure(value, total, minTokens int, minPercent float64, highTokens int) bool { + if value <= 0 || total <= 0 { + return false + } + if value >= highTokens { + return true + } + return value >= minTokens && tokenPercent(value, total) >= minPercent +} + func tokenPercent(value, total int) float64 { if total <= 0 { return 0 @@ -442,23 +472,40 @@ func formatAPICalls(count int) string { } func agentBriefNextAction(report sessionTokensReport) string { + if hasTokenRecommendation(report, "no-token-data") { + return "Token usage is not available yet. Use this as a context check, not a spend diagnosis; continue after the next checkpoint captures usage." + } + if action, ok := agentBriefOptimizationAction(report); ok { + return action + } + return "Continue normally; no high-signal token optimization is available from this session yet." +} + +func agentBriefOptimizationAction(report sessionTokensReport) (string, bool) { switch { + case (hasTokenRecommendation(report, "cache-write-pressure") || hasTokenRecommendation(report, "output-pressure")) && + (hasTokenRecommendation(report, "context-replay-hotspot") || hasTokenRecommendation(report, "api-call-amplification")): + return agentBriefCostProxyBatchAction, true + case hasTokenRecommendation(report, "cache-write-pressure") && hasTokenRecommendation(report, "output-pressure"): + return agentBriefCostProxyBatchAction, true + case hasTokenRecommendation(report, "cache-write-pressure"): + return "Use at most 3 batched reads and avoid broad new context until you have one narrowed hypothesis.", true + case hasTokenRecommendation(report, "output-pressure"): + return "Keep the next answer tight; cite only necessary evidence and avoid restating prior context.", true case hasTokenRecommendation(report, "context-replay-hotspot") && hasTokenRecommendation(report, "api-call-amplification"): - return "Summarize the useful findings, then batch the next diagnostic step. Avoid more exploratory reads until you have a narrowed hypothesis." + return agentBriefCostProxyBatchAction, true case hasTokenRecommendation(report, "api-call-amplification"): - return "Batch the next diagnostic step around one narrowed hypothesis before making more tool calls." + return agentBriefCostProxyBatchAction, true case hasTokenRecommendation(report, "context-replay-hotspot"): - return "Summarize the current useful findings before continuing, and keep the next prompt narrow." - case hasTokenRecommendation(report, "no-token-data"): - return "Token usage is not available yet. Use this as a context check, not a spend diagnosis; continue after the next checkpoint captures usage." + return "Use at most 2 focused reads only if a named file or test can change the answer; otherwise answer now. Avoid broad grep, broad diffs, and broad tests.", true case hasTokenRecommendation(report, "subagent-heavy"): - return "Keep the next agent or subagent task narrow with a concrete expected output; avoid broad parallel exploration." + return "Do not launch broad subagents. Use one narrowly scoped check with a concrete expected output.", true case hasTokenRecommendation(report, "high-context-pressure"): - return "Preserve the useful findings and compact or restart before adding more broad context." + return "Preserve useful findings, then answer with at most 2 focused reads if more evidence is required.", true case hasTokenRecommendation(report, "long-session"): - return "Compact or restart after summarizing useful findings if older context is no longer needed." + return "Summarize useful findings and stop unless one focused read can change the answer.", true default: - return "Continue normally; no high-signal token optimization is available from this session yet." + return "", false } } @@ -470,6 +517,12 @@ func agentBriefSignals(report sessionTokensReport) []string { if hasTokenRecommendation(report, "api-call-amplification") { signals = append(signals, "API call count is high for one session.") } + if hasTokenRecommendation(report, "cache-write-pressure") { + signals = append(signals, "Cache write/new context pressure is elevated.") + } + if hasTokenRecommendation(report, "output-pressure") { + signals = append(signals, "Output pressure is elevated.") + } if hasTokenRecommendation(report, "subagent-heavy") { signals = append(signals, "Subagent usage is a meaningful part of total tokens.") } @@ -506,8 +559,12 @@ func writeTokenRecommendations(w io.Writer, recs []sessionTokensRecommendation) } func writeTokenUsageSection(w io.Writer, tokens *sessionTokensUsage) { + writeTokenUsageSectionWithTitle(w, "Token usage", tokens) +} + +func writeTokenUsageSectionWithTitle(w io.Writer, title string, tokens *sessionTokensUsage) { fmt.Fprintln(w) - fmt.Fprintln(w, "Token usage") + fmt.Fprintln(w, title) if tokens != nil { fmt.Fprintf(w, "Total: %s tokens\n", formatTokenCount(tokens.Total)) parts := []string{ diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 14a5338232..57d3ef7bfb 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1165,6 +1165,61 @@ func reportHasSessionRecommendation(report sessionTokensReport, id string) bool return false } +func TestRecommendationRules_CacheWritePressure(t *testing.T) { + t.Parallel() + + recs := recommendationRules(tokenRecommendationSignals{ + Tokens: &sessionTokensUsage{ + Total: 50_000, + CacheWrite: 6_000, + }, + }) + + if !recommendationsIncludeID(recs, "cache-write-pressure") { + t.Fatalf("expected cache-write-pressure recommendation, got %+v", recs) + } +} + +func TestRecommendationRules_OutputPressure(t *testing.T) { + t.Parallel() + + recs := recommendationRules(tokenRecommendationSignals{ + Tokens: &sessionTokensUsage{ + Total: 100_000, + Output: 3_500, + }, + }) + + if !recommendationsIncludeID(recs, "output-pressure") { + t.Fatalf("expected output-pressure recommendation, got %+v", recs) + } +} + +func TestRecommendationRules_OutputPressureWithLargeCacheReplay(t *testing.T) { + t.Parallel() + + recs := recommendationRules(tokenRecommendationSignals{ + Tokens: &sessionTokensUsage{ + Total: 10_000_000, + CacheRead: 9_800_000, + Output: 100_000, + }, + }) + + if !recommendationsIncludeID(recs, "output-pressure") { + t.Fatalf("expected output-pressure recommendation for high absolute output, got %+v", recs) + } +} + +func recommendationsIncludeID(recs []sessionTokensRecommendation, id string) bool { + for _, rec := range recs { + if rec.ID == id { + return true + } + } + return false +} + func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { setupStopTestRepo(t) @@ -1198,10 +1253,14 @@ func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { "Session: test-tokens-brief", "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", "Next best action:", - "Summarize the useful findings, then batch the next diagnostic step.", + "Use at most 3 batched reads before answering.", + "Continue only if a named file or test can change the verdict; otherwise answer now.", + "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", "Signals:", "- Cache/context replay dominates token volume.", "- API call count is high for one session.", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", } for _, check := range checks { if !strings.Contains(out, check) { @@ -1245,7 +1304,8 @@ func TestTokensCmd_AgentBriefHighCacheReplayWithoutHighAPICalls(t *testing.T) { out := stdout.String() checks := []string{ "Token usage: 637.7k total; 95.5% cache/context replay; 3 API calls.", - "Summarize the current useful findings before continuing, and keep the next prompt narrow.", + "Use at most 2 focused reads only if a named file or test can change the answer; otherwise answer now.", + "Avoid broad grep, broad diffs, and broad tests.", "- Cache/context replay dominates token volume.", } for _, check := range checks { @@ -1286,7 +1346,8 @@ func TestTokensCmd_AgentBriefHighAPICallsWithoutCacheReplay(t *testing.T) { out := stdout.String() checks := []string{ "Token usage: 11k total; 25 API calls.", - "Batch the next diagnostic step around one narrowed hypothesis before making more tool calls.", + "Use at most 3 batched reads before answering.", + "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", "- API call count is high for one session.", } for _, check := range checks { @@ -1339,6 +1400,112 @@ func TestTokensCmd_AgentBriefNoTokenData(t *testing.T) { } } +func TestSessionTokensAgentBriefClassAwareCostProxy(t *testing.T) { + t.Parallel() + + tokens := &sessionTokensUsage{ + Total: 50_000, + CacheWrite: 6_000, + Output: 3_500, + APICalls: 4, + } + report := sessionTokensReport{ + SessionID: "test-cost-proxy-brief", + Tokens: tokens, + Recommendations: recommendationRules(tokenRecommendationSignals{Tokens: tokens}), + } + + var stdout bytes.Buffer + writeSessionTokensAgentBrief(&stdout, report) + + out := stdout.String() + checks := []string{ + "Session token brief", + "Session: test-cost-proxy-brief", + "Use at most 3 batched reads", + "Avoid broad grep, broad diffs, broad tests", + "otherwise answer now", + "keep the answer tight", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensAgentBriefClassAwareCostProxy(t *testing.T) { + t.Parallel() + + tokens := &sessionTokensUsage{ + Total: 50_000, + CacheWrite: 6_000, + Output: 3_500, + APICalls: 4, + } + report := checkpointTokensReport{ + CheckpointID: "c05e500cafe0", + Tokens: tokens, + Recommendations: recommendationRules(tokenRecommendationSignals{Tokens: tokens}), + } + + var stdout bytes.Buffer + writeCheckpointTokensAgentBrief(&stdout, report) + + out := stdout.String() + checks := []string{ + "Checkpoint token brief", + "Checkpoint: c05e500cafe0", + "Use at most 3 batched reads", + "Avoid broad grep, broad diffs, broad tests", + "otherwise answer now", + "keep the answer tight", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensAgentBriefCombinesOutputAndReplayPressure(t *testing.T) { + t.Parallel() + + tokens := &sessionTokensUsage{ + Total: 10_000_000, + CacheRead: 9_800_000, + Output: 100_000, + APICalls: 25, + } + report := checkpointTokensReport{ + CheckpointID: "c05e501cafe0", + Tokens: tokens, + Recommendations: recommendationRules(tokenRecommendationSignals{Tokens: tokens}), + } + + var stdout bytes.Buffer + writeCheckpointTokensAgentBrief(&stdout, report) + + out := stdout.String() + checks := []string{ + "Use at most 3 batched reads", + "Avoid broad grep, broad diffs, broad tests", + "keep the answer tight", + "- Cache/context replay dominates token volume.", + "- API call count is high for one session.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + func TestSessionsCmd_TokensSubcommand(t *testing.T) { setupStopTestRepo(t) @@ -1438,6 +1605,21 @@ func TestTokensCmd_JSONAndAgentBriefAreMutuallyExclusive(t *testing.T) { } } +func TestCheckpointTokensCmd_JSONAndAgentBriefAreMutuallyExclusive(t *testing.T) { + t.Parallel() + + cmd := newCheckpointGroupCmd() + cmd.SetArgs([]string{"tokens", "abc123", "--json", "--agent-brief"}) + + err := cmd.ExecuteContext(context.Background()) + if err == nil { + t.Fatal("expected error for --json with --agent-brief") + } + if !strings.Contains(err.Error(), "mutually exclusive") { + t.Fatalf("expected mutually exclusive error, got: %v", err) + } +} + func TestTokensCmd_PrioritizesContextReplayHotspot(t *testing.T) { setupStopTestRepo(t) @@ -1641,6 +1823,97 @@ func TestCheckpointTokensCmd_TextOutputWithRealCheckpointShape(t *testing.T) { } } +func TestCheckpointTokensCmd_AgentBriefGivesOperationalBudget(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + cpID := id.MustCheckpointID("b1efbeefcafe") + if err := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()).WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "checkpoint-token-brief", + Strategy: strategy.StrategyNameManualCommit, + Branch: "e2e-triage-fix", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"why is slack failing"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: &agent.TokenUsage{ + InputTokens: 94, + CacheCreationTokens: 122171, + CacheReadTokens: 6052424, + OutputTokens: 38956, + APICallCount: 70, + }, + }); err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "b1efbeef", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoint token brief", + "Checkpoint: b1efbeefcafe", + "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", + "Next best action:", + "Use at most 3 batched reads before answering.", + "Continue only if a named file or test can change the verdict; otherwise answer now.", + "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", + "Signals:", + "- Cache/context replay dominates token volume.", + "- API call count is high for one session.", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + for _, verboseSection := range []string{"Recommendations", "Likely contributors", "Limitations"} { + if strings.Contains(out, verboseSection) { + t.Fatalf("expected agent brief to omit %s section, got:\n%s", verboseSection, out) + } + } +} + +func TestCheckpointTokensCmd_AgentBriefMissingTokenData(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + cpID := id.MustCheckpointID("deadcafebeef") + writeCommittedTokenCheckpoint(ctx, t, store, cpID, "checkpoint-token-missing-brief", nil) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "deadcafe", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoint token brief", + "Checkpoint: deadcafebeef", + "Token usage: unavailable.", + "Do not spend extra commands on token optimization for this checkpoint.", + "- Token usage is unavailable for this session.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + func TestCheckpointTokensCmd_TextOutputWithMultipleSessionsUsesAggregateScope(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() @@ -1889,6 +2162,7 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { InputTokens: 200_000, CacheCreationTokens: 50_000, CacheReadTokens: 750_000, + OutputTokens: 10_000, APICallCount: 10, }, }); err != nil { @@ -1905,8 +2179,9 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { AuthorEmail: "test@example.com", TokenUsage: &agent.TokenUsage{ InputTokens: 150_000, - CacheCreationTokens: 50_000, + CacheCreationTokens: 25_000, CacheReadTokens: 300_000, + OutputTokens: 25_000, APICallCount: 4, }, }); err != nil { @@ -1930,12 +2205,16 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { "Total: 500k tokens", "Comparison", "Baseline: aaa111bbb222", - "Total tokens: down 50% (1000k -> 500k)", + "Total tokens: down 50.5% (1010k -> 500k)", + "Input: down 25% (200k -> 150k)", "Cache/context replay: down 60% (750k -> 300k)", + "Cache write: down 50% (50k -> 25k)", + "Output: up 150% (10k -> 25k)", "API calls: down 60% (10 -> 4)", "Qualification", "Observed token use decreased for this checkpoint comparison.", "This does not prove quality was preserved", + "Cost-proxy pressure increased for output", } for _, check := range checks { if !strings.Contains(out, check) { @@ -1944,6 +2223,35 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { } } +func TestCheckpointTokensCmd_RejectsSelfComparison(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + cpID := id.MustCheckpointID("abc222abc222") + + writeCommittedTokenCheckpoint(ctx, t, store, cpID, "checkpoint-token-self-compare", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 50, + APICallCount: 1, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "abc222", "--compare", "abc222abc222"}) + + err := cmd.ExecuteContext(ctx) + if err == nil { + t.Fatal("expected self-comparison error, got nil") + } + if !strings.Contains(err.Error(), "cannot compare checkpoint abc222abc222 to itself") { + t.Fatalf("expected self-comparison error, got: %v", err) + } + if stdout.Len() != 0 { + t.Fatalf("expected no report output for self-comparison, got:\n%s", stdout.String()) + } +} + func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() @@ -1960,10 +2268,11 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { AuthorName: "Test", AuthorEmail: "test@example.com", TokenUsage: &agent.TokenUsage{ - InputTokens: 100, - CacheReadTokens: 300, - OutputTokens: 100, - APICallCount: 5, + InputTokens: 100, + CacheCreationTokens: 50, + CacheReadTokens: 300, + OutputTokens: 100, + APICallCount: 5, }, }); err != nil { t.Fatalf("WriteCommitted() baseline error = %v", err) @@ -1977,10 +2286,11 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { AuthorName: "Test", AuthorEmail: "test@example.com", TokenUsage: &agent.TokenUsage{ - InputTokens: 120, - CacheReadTokens: 480, - OutputTokens: 200, - APICallCount: 8, + InputTokens: 120, + CacheCreationTokens: 80, + CacheReadTokens: 480, + OutputTokens: 200, + APICallCount: 8, }, }); err != nil { t.Fatalf("WriteCommitted() current error = %v", err) @@ -2014,18 +2324,77 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { if result.Comparison.Total == nil { t.Fatalf("expected total delta, got nil") } - if result.Comparison.Total.Baseline != 500 || result.Comparison.Total.Current != 800 { + if result.Comparison.Total.Baseline != 550 || result.Comparison.Total.Current != 880 { t.Fatalf("unexpected total delta: %+v", result.Comparison.Total) } - if result.Comparison.Total.Change != 300 { - t.Fatalf("expected total change 300, got %+v", result.Comparison.Total) + if result.Comparison.Total.Change != 330 { + t.Fatalf("expected total change 330, got %+v", result.Comparison.Total) } - if result.Comparison.Total.Direction != "up" { + if result.Comparison.Total.Direction != checkpointDeltaDirectionUp { t.Fatalf("expected total direction up, got %+v", result.Comparison.Total) } if result.Comparison.Total.ChangePercent == nil || *result.Comparison.Total.ChangePercent != 60 { t.Fatalf("expected total change percent 60, got %+v", result.Comparison.Total) } + if result.Comparison.Input == nil || result.Comparison.Input.Change != 20 { + t.Fatalf("expected input change 20, got %+v", result.Comparison.Input) + } + if result.Comparison.CacheWrite == nil || result.Comparison.CacheWrite.Change != 30 { + t.Fatalf("expected cache write change 30, got %+v", result.Comparison.CacheWrite) + } + if result.Comparison.Output == nil || result.Comparison.Output.Change != 100 { + t.Fatalf("expected output change 100, got %+v", result.Comparison.Output) + } +} + +func TestCheckpointTokensCmd_JSONComparisonQualifiesCostProxyPressure(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + baselineID := id.MustCheckpointID("c0a111c0a111") + currentID := id.MustCheckpointID("c0a222c0a222") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-cost-proxy-baseline", &agent.TokenUsage{ + InputTokens: 100_000, + CacheReadTokens: 100_000, + APICallCount: 6, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-cost-proxy-current", &agent.TokenUsage{ + InputTokens: 50_000, + CacheCreationTokens: 30_000, + OutputTokens: 30_000, + APICallCount: 4, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "c0a222", "--compare", "c0a111", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result checkpointTokensReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.Comparison == nil { + t.Fatalf("expected comparison, got nil") + } + if result.Comparison.Status != checkpointComparisonStatusObservedReduction { + t.Fatalf("expected observed reduction, got %q", result.Comparison.Status) + } + checks := []string{ + "Cost-proxy pressure increased", + "cache write", + "output", + } + for _, check := range checks { + if !strings.Contains(result.Comparison.Qualification, check) { + t.Fatalf("expected %q in qualification, got %q", check, result.Comparison.Qualification) + } + } } func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { @@ -2070,6 +2439,29 @@ func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { } } +func TestBuildCheckpointMetricDeltaClampsChangeOverflow(t *testing.T) { + t.Parallel() + + maxInt := int(^uint(0) >> 1) + minInt := -maxInt - 1 + + up := buildCheckpointMetricDelta(minInt, maxInt) + if up.Change != maxInt { + t.Fatalf("upward overflow change = %d, want %d", up.Change, maxInt) + } + if up.Direction != checkpointDeltaDirectionUp { + t.Fatalf("upward overflow direction = %q, want up", up.Direction) + } + + down := buildCheckpointMetricDelta(maxInt, minInt) + if down.Change != minInt { + t.Fatalf("downward overflow change = %d, want %d", down.Change, minInt) + } + if down.Direction != checkpointDeltaDirectionDown { + t.Fatalf("downward overflow direction = %q, want down", down.Direction) + } +} + func TestCheckpointTokensCmd_ComparisonUnavailableWhenBaselineTokenDataMissing(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go new file mode 100644 index 0000000000..c888db7506 --- /dev/null +++ b/cmd/entire/cli/tokens_profile.go @@ -0,0 +1,416 @@ +package cli + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/spf13/cobra" +) + +type tokensProfileReport struct { + Source string `json:"source"` + UsageScope string `json:"usage_scope"` + CheckpointsAvailable int `json:"checkpoints_available"` + CheckpointsAnalyzed int `json:"checkpoints_analyzed"` + CheckpointsWithTokenData int `json:"checkpoints_with_token_data"` + MissingTokenData int `json:"missing_token_data"` + MetadataReadWarnings int `json:"metadata_read_warnings,omitempty"` + Tokens *sessionTokensUsage `json:"tokens,omitempty"` + Signals []tokensProfileSignal `json:"signals,omitempty"` + Recommendations []sessionTokensRecommendation `json:"recommendations,omitempty"` + Limitations []string `json:"limitations,omitempty"` +} + +type tokensProfileSignal struct { + ID string `json:"id"` + Label string `json:"label"` + Count int `json:"count"` + Percent int `json:"percent"` + CheckpointIDs []string `json:"checkpoint_ids,omitempty"` +} + +type tokensProfileSignalDefinition struct { + id string + label string +} + +var tokensProfileSignalDefinitions = []tokensProfileSignalDefinition{ + {id: "context-replay-hotspot", label: "Cache/context replay hotspot"}, + {id: "api-call-amplification", label: "API call amplification"}, + {id: "subagent-heavy", label: "Subagent-heavy sessions"}, + {id: "missing-token-data", label: "Missing token data"}, +} + +const tokensProfileUsageScopeCheckpointObserved = "checkpoint_observed" + +func newTokensGroupCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "tokens", + Short: "Analyze token usage across sessions and checkpoints", + Hidden: true, + Long: `Analyze token usage across sessions and checkpoints. + +Commands: + profile Aggregate token usage across committed checkpoints + +Examples: + entire tokens profile + entire tokens profile --json`, + RunE: func(cmd *cobra.Command, _ []string) error { + return cmd.Help() + }, + } + + cmd.AddCommand(newTokensProfileCmd()) + return cmd +} + +func newTokensProfileCmd() *cobra.Command { + var jsonFlag bool + var limitFlag int + var allFlag bool + + cmd := &cobra.Command{ + Use: "profile", + Short: "Aggregate token usage and recommendations across checkpoint history", + Long: `Aggregate token usage and recommendations across committed checkpoint history. + +The profile reads committed checkpoint metadata only. It does not inspect +transcripts or source files, so it is deterministic and avoids adding token +cost while diagnosing token usage. By default it scans the latest 50 committed +checkpoints; use --limit or --all to change the scope.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + limit := limitFlag + if allFlag { + limit = 0 + } else if limit <= 0 { + return errors.New("--limit must be positive unless --all is used") + } + return runTokensProfile(cmd.Context(), cmd, jsonFlag, limit) + }, + } + + cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") + cmd.Flags().IntVar(&limitFlag, "limit", 50, "Maximum committed checkpoints to analyze") + cmd.Flags().BoolVar(&allFlag, "all", false, "Analyze all committed checkpoints") + cmd.MarkFlagsMutuallyExclusive("limit", "all") + return cmd +} + +func runTokensProfile(ctx context.Context, cmd *cobra.Command, jsonOutput bool, limit int) error { + repo, err := openRepository(ctx) + if err != nil { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Not a git repository.") + return NewSilentError(err) + } + defer repo.Close() + + store := checkpoint.NewGitStore(repo, checkpoint.ResolveCommittedRefs(ctx)) + store.SetBlobFetcher(FetchBlobsByHash) + infos, err := store.ListCommitted(ctx) + if err != nil { + return fmt.Errorf("failed to list checkpoints: %w", err) + } + + report, err := buildTokensProfileReport(ctx, store, infos, limit) + if err != nil { + return err + } + + if jsonOutput { + return writeTokensProfileJSON(cmd.OutOrStdout(), report) + } + writeTokensProfileText(cmd.OutOrStdout(), report) + return nil +} + +func buildTokensProfileReport(ctx context.Context, store *checkpoint.GitStore, infos []checkpoint.CommittedInfo, limit int) (tokensProfileReport, error) { + checkpointsAvailable := len(infos) + infos = limitTokensProfileCheckpoints(infos, limit) + report := tokensProfileReport{ + Source: "committed_checkpoints", + UsageScope: tokensProfileUsageScopeCheckpointObserved, + CheckpointsAvailable: checkpointsAvailable, + CheckpointsAnalyzed: len(infos), + } + signals := make(map[string]*tokensProfileSignal, len(tokensProfileSignalDefinitions)) + var aggregate *agent.TokenUsage + + for _, info := range infos { + if err := ctx.Err(); err != nil { + return tokensProfileReport{}, err //nolint:wrapcheck // Propagating context cancellation. + } + + summary, err := store.ReadCommitted(ctx, info.CheckpointID) + if err != nil { + return tokensProfileReport{}, fmt.Errorf("failed to read checkpoint %s: %w", info.CheckpointID, err) + } + if summary == nil { + report.MissingTokenData++ + addTokensProfileSignal(signals, "missing-token-data", info.CheckpointID, report.CheckpointsAnalyzed) + continue + } + + usage, metadataReadWarning, err := tokensProfileCheckpointUsage(ctx, store, info.CheckpointID, summary) + if err != nil { + return tokensProfileReport{}, err + } + if metadataReadWarning { + report.MetadataReadWarnings++ + } + tokens := buildSessionTokensUsage(usage) + if tokens == nil { + report.MissingTokenData++ + addTokensProfileSignal(signals, "missing-token-data", info.CheckpointID, report.CheckpointsAnalyzed) + continue + } + + report.CheckpointsWithTokenData++ + aggregate = addCheckpointTokenUsage(aggregate, usage) + addTokensProfileTokenSignals(signals, info.CheckpointID, tokens, report.CheckpointsAnalyzed) + } + + report.Tokens = buildSessionTokensUsage(aggregate) + report.Signals = orderedTokensProfileSignals(signals) + report.Recommendations = tokensProfileRecommendations(report) + report.Limitations = tokensProfileLimitations(report) + return report, nil +} + +func limitTokensProfileCheckpoints(infos []checkpoint.CommittedInfo, limit int) []checkpoint.CommittedInfo { + if limit <= 0 || len(infos) <= limit { + return infos + } + return infos[:limit] +} + +func tokensProfileCheckpointUsage(ctx context.Context, store *checkpoint.GitStore, checkpointID id.CheckpointID, summary *checkpoint.CheckpointSummary) (*agent.TokenUsage, bool, error) { + if summary == nil { + return nil, false, nil + } + + metas := make([]*checkpoint.CommittedMetadata, 0, len(summary.Sessions)) + metadataReadWarning := false + for i := range len(summary.Sessions) { + meta, err := store.ReadSessionMetadata(ctx, checkpointID, i) + if err != nil { + if ctxErr := ctx.Err(); ctxErr != nil { + return nil, false, ctxErr //nolint:wrapcheck // Propagating context cancellation. + } + metadataReadWarning = true + continue + } + metas = append(metas, meta) + } + sessionUsage := aggregateCheckpointTokenUsage(metas) + if !metadataReadWarning && sessionUsage != nil { + return sessionUsage, false, nil + } + if summary.TokenUsage != nil { + return summary.TokenUsage, metadataReadWarning, nil + } + return sessionUsage, metadataReadWarning, nil +} + +func addTokensProfileTokenSignals(signals map[string]*tokensProfileSignal, checkpointID id.CheckpointID, tokens *sessionTokensUsage, denominator int) { + if tokens == nil { + return + } + if tokens.Total > 0 && tokenPercent(tokens.CacheRead, tokens.Total) >= 80 { + addTokensProfileSignal(signals, "context-replay-hotspot", checkpointID, denominator) + } + if tokens.APICalls >= 20 { + addTokensProfileSignal(signals, "api-call-amplification", checkpointID, denominator) + } + if tokens.Total > 0 && tokens.SubagentTotal*100 >= tokens.Total*10 { + addTokensProfileSignal(signals, "subagent-heavy", checkpointID, denominator) + } +} + +func addTokensProfileSignal(signals map[string]*tokensProfileSignal, signalID string, checkpointID id.CheckpointID, denominator int) { + signal := signals[signalID] + if signal == nil { + definition := tokensProfileSignalDefinitionFor(signalID) + signal = &tokensProfileSignal{ + ID: definition.id, + Label: definition.label, + } + signals[signalID] = signal + } + signal.Count++ + if denominator > 0 { + signal.Percent = roundedPercent(signal.Count, denominator) + } + if checkpointID != "" { + signal.CheckpointIDs = append(signal.CheckpointIDs, checkpointID.String()) + } +} + +func tokensProfileSignalDefinitionFor(signalID string) tokensProfileSignalDefinition { + for _, definition := range tokensProfileSignalDefinitions { + if definition.id == signalID { + return definition + } + } + return tokensProfileSignalDefinition{id: signalID, label: signalID} +} + +func orderedTokensProfileSignals(signals map[string]*tokensProfileSignal) []tokensProfileSignal { + ordered := make([]tokensProfileSignal, 0, len(signals)) + for _, definition := range tokensProfileSignalDefinitions { + if signal := signals[definition.id]; signal != nil { + ordered = append(ordered, *signal) + } + } + return ordered +} + +func tokensProfileRecommendations(report tokensProfileReport) []sessionTokensRecommendation { + var recs []sessionTokensRecommendation + + if report.CheckpointsAnalyzed == 0 { + return []sessionTokensRecommendation{{ + ID: "no-checkpoints", + Severity: "low", + Message: "Create checkpoints first; token profiling needs committed checkpoint metadata to identify patterns.", + Signals: []string{"empty_checkpoint_history"}, + }} + } + + if tokensProfileSignalCount(report.Signals, "context-replay-hotspot") > 0 || + tokensProfileSignalCount(report.Signals, "api-call-amplification") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "search-before-reinvestigation", + Severity: "high", + Message: "Use `entire search` for prior decisions/checkpoints before broad re-investigation.", + Signals: []string{"cache_read_tokens", "api_call_count"}, + }) + } + if tokensProfileSignalCount(report.Signals, "api-call-amplification") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "batch-diagnostics", + Severity: "medium", + Message: "Batch diagnostic reads around one narrowed hypothesis when API call amplification repeats.", + Signals: []string{"api_call_count"}, + }) + } + if tokensProfileSignalCount(report.Signals, "context-replay-hotspot") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "preserve-then-compact", + Severity: "medium", + Message: "Summarize useful findings before continuing large-context work; compact or restart only after preserving relevant context.", + Signals: []string{"cache_read_tokens"}, + }) + } + if tokensProfileSignalCount(report.Signals, "subagent-heavy") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "scope-subagents", + Severity: "medium", + Message: "Scope subagent tasks tightly with a narrow objective and expected output.", + Signals: []string{"subagent_tokens"}, + }) + } + if report.MissingTokenData > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "improve-token-coverage", + Severity: "low", + Message: "Increase token coverage by using agents and checkpoints that report token usage.", + Signals: []string{"missing_token_usage"}, + }) + } + + if len(recs) == 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "no-repeated-hotspots", + Severity: "low", + Message: "No repeated token hotspots were visible in committed checkpoint metadata.", + Signals: []string{"checkpoint_token_metadata"}, + }) + } + return recs +} + +func tokensProfileSignalCount(signals []tokensProfileSignal, signalID string) int { + for _, signal := range signals { + if signal.ID == signalID { + return signal.Count + } + } + return 0 +} + +func tokensProfileLimitations(report tokensProfileReport) []string { + var limitations []string + if report.CheckpointsAvailable > report.CheckpointsAnalyzed { + limitations = append(limitations, fmt.Sprintf("Limited to latest %d of %d committed checkpoints; use --limit or --all to change scope.", report.CheckpointsAnalyzed, report.CheckpointsAvailable)) + } + if report.CheckpointsAnalyzed == 0 { + limitations = append(limitations, "No committed checkpoints found.") + } + if report.MissingTokenData > 0 { + limitations = append(limitations, fmt.Sprintf("%d checkpoint%s did not include token usage.", report.MissingTokenData, tokenPluralSuffix(report.MissingTokenData))) + } + if report.MetadataReadWarnings > 0 { + limitations = append(limitations, fmt.Sprintf("%d checkpoint%s had incomplete session metadata; profile used root token summaries or readable sessions where available.", report.MetadataReadWarnings, tokenPluralSuffix(report.MetadataReadWarnings))) + } + if report.Tokens != nil { + limitations = append(limitations, "Token totals are summed from analyzed checkpoints and may include overlapping checkpoint history; treat them as checkpoint-observed volume, not guaranteed unique session spend.") + } + if report.CheckpointsAnalyzed > 0 { + limitations = append(limitations, "Tool-level search/read spend is not captured yet; this profile infers patterns from token totals, cache/context replay, API call counts, and subagent totals.") + } + return limitations +} + +func writeTokensProfileJSON(w io.Writer, report tokensProfileReport) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + if err := enc.Encode(report); err != nil { + return fmt.Errorf("failed to encode token profile report: %w", err) + } + return nil +} + +func writeTokensProfileText(w io.Writer, report tokensProfileReport) { + fmt.Fprintln(w, "Token profile") + fmt.Fprintln(w) + fmt.Fprintf(w, "Source: %s\n", report.Source) + fmt.Fprintf(w, "Checkpoints available: %d\n", report.CheckpointsAvailable) + fmt.Fprintf(w, "Checkpoints analyzed: %d\n", report.CheckpointsAnalyzed) + fmt.Fprintf(w, "With token data: %d\n", report.CheckpointsWithTokenData) + fmt.Fprintf(w, "Missing token data: %d\n", report.MissingTokenData) + if report.MetadataReadWarnings > 0 { + fmt.Fprintf(w, "Metadata warnings: %d\n", report.MetadataReadWarnings) + } + + writeTokenUsageSectionWithTitle(w, "Checkpoint-observed token usage", report.Tokens) + writeTokensProfileSignals(w, report.Signals) + if len(report.Recommendations) > 0 { + writeTokenRecommendations(w, report.Recommendations) + } + writeTokenLimitations(w, report.Limitations) +} + +func writeTokensProfileSignals(w io.Writer, signals []tokensProfileSignal) { + if len(signals) == 0 { + return + } + + fmt.Fprintln(w) + fmt.Fprintln(w, "Repeated signals") + for _, signal := range signals { + fmt.Fprintf(w, "- %s: %d checkpoint%s", signal.Label, signal.Count, tokenPluralSuffix(signal.Count)) + if signal.Percent > 0 { + fmt.Fprintf(w, " (%d%%)", signal.Percent) + } + fmt.Fprintln(w) + } +} diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go new file mode 100644 index 0000000000..83d29e45e6 --- /dev/null +++ b/cmd/entire/cli/tokens_profile_test.go @@ -0,0 +1,290 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/strategy" + "github.com/entireio/cli/redact" +) + +func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000001", "profile-cache-hotspot", &agent.TokenUsage{ + InputTokens: 100, + CacheCreationTokens: 100, + CacheReadTokens: 800, + APICallCount: 5, + }) + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000002", "profile-api-heavy", &agent.TokenUsage{ + InputTokens: 400, + OutputTokens: 100, + APICallCount: 25, + }) + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000003", "profile-subagent-heavy", &agent.TokenUsage{ + InputTokens: 500, + OutputTokens: 500, + APICallCount: 3, + SubagentTokens: &agent.TokenUsage{ + InputTokens: 1_000, + }, + }) + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000004", "profile-missing", nil) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Token profile", + "Checkpoints analyzed: 4", + "With token data: 3", + "Missing token data: 1", + "Checkpoint-observed token usage", + "Total: 3.5k tokens", + "Cache read: 800", + "API calls: 33", + "Repeated signals", + "Cache/context replay hotspot: 1 checkpoint", + "API call amplification: 1 checkpoint", + "Subagent-heavy sessions: 1 checkpoint", + "Missing token data: 1 checkpoint", + "Recommendations", + "Use `entire search` for prior decisions/checkpoints before broad re-investigation.", + "Token totals are summed from analyzed checkpoints and may include overlapping checkpoint history", + "Tool-level search/read spend is not captured yet", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + + tokenUsageIndex := strings.Index(out, "Checkpoint-observed token usage") + recommendationsIndex := strings.Index(out, "Recommendations") + if tokenUsageIndex == -1 || recommendationsIndex == -1 { + t.Fatalf("expected token usage and recommendations sections, got:\n%s", out) + } + if tokenUsageIndex > recommendationsIndex { + t.Fatalf("expected token usage before recommendations, got:\n%s", out) + } +} + +func TestTokensProfileCmd_JSONOutput(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + + writeProfileTokenCheckpoint(ctx, t, store, "200bbb000001", "profile-json-cache", &agent.TokenUsage{ + InputTokens: 100, + CacheReadTokens: 900, + APICallCount: 2, + }) + writeProfileTokenCheckpoint(ctx, t, store, "200bbb000002", "profile-json-api", &agent.TokenUsage{ + InputTokens: 200, + OutputTokens: 100, + APICallCount: 22, + }) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result tokensProfileReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + var raw map[string]interface{} + if err := json.Unmarshal(stdout.Bytes(), &raw); err != nil { + t.Fatalf("expected valid JSON object, got parse error: %v\noutput: %s", err, stdout.String()) + } + if raw["usage_scope"] != "checkpoint_observed" { + t.Fatalf("usage_scope = %v, want checkpoint_observed", raw["usage_scope"]) + } + if result.CheckpointsAnalyzed != 2 { + t.Fatalf("checkpoints_analyzed = %d, want 2", result.CheckpointsAnalyzed) + } + if result.CheckpointsWithTokenData != 2 { + t.Fatalf("checkpoints_with_token_data = %d, want 2", result.CheckpointsWithTokenData) + } + if result.Tokens == nil || result.Tokens.Total != 1300 { + t.Fatalf("unexpected token total: %+v", result.Tokens) + } + if got := signalCount(result.Signals, "context-replay-hotspot"); got != 1 { + t.Fatalf("context-replay-hotspot signal count = %d, want 1", got) + } + if got := signalCount(result.Signals, "api-call-amplification"); got != 1 { + t.Fatalf("api-call-amplification signal count = %d, want 1", got) + } + if len(result.Recommendations) == 0 { + t.Fatalf("expected recommendations, got none") + } +} + +func TestTokensProfileCmd_JSONOutputReportsAPICallOnlyCheckpoints(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + + writeProfileTokenCheckpoint(ctx, t, store, "250bbb000001", "profile-json-api-only", &agent.TokenUsage{ + APICallCount: 25, + }) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result tokensProfileReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.CheckpointsWithTokenData != 1 { + t.Fatalf("checkpoints_with_token_data = %d, want 1", result.CheckpointsWithTokenData) + } + if result.MissingTokenData != 0 { + t.Fatalf("missing_token_data = %d, want 0", result.MissingTokenData) + } + if result.Tokens == nil || result.Tokens.Total != 0 || result.Tokens.APICalls != 25 { + t.Fatalf("unexpected token usage: %+v", result.Tokens) + } + if got := signalCount(result.Signals, "api-call-amplification"); got != 1 { + t.Fatalf("api-call-amplification signal count = %d, want 1", got) + } +} + +func TestTokensProfileCmd_LimitScopesAnalyzedCheckpoints(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + + writeProfileTokenCheckpoint(ctx, t, store, "300ccc000001", "profile-limit-one", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 1, + }) + writeProfileTokenCheckpoint(ctx, t, store, "300ccc000002", "profile-limit-two", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 1, + }) + writeProfileTokenCheckpoint(ctx, t, store, "300ccc000003", "profile-limit-three", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 1, + }) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile", "--limit", "2"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoints available: 3", + "Checkpoints analyzed: 2", + "Total: 400 tokens", + "Limited to latest 2 of 3 committed checkpoints", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestTokensProfileCmd_LimitAndAllAreMutuallyExclusive(t *testing.T) { + runExplainAutoTestRepo(t) + + cmd := newTokensGroupCmd() + cmd.SetArgs([]string{"profile", "--limit", "2", "--all"}) + + err := cmd.ExecuteContext(context.Background()) + if err == nil { + t.Fatal("expected error for --limit with --all") + } + if !strings.Contains(err.Error(), "limit") || !strings.Contains(err.Error(), "all") { + t.Fatalf("expected error to mention limit and all, got: %v", err) + } +} + +func TestTokensProfileCmd_EmptyHistory(t *testing.T) { + runExplainAutoTestRepo(t) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile"}) + + if err := cmd.ExecuteContext(context.Background()); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Token profile", + "Checkpoints analyzed: 0", + "Token data: unavailable", + "No committed checkpoints found.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func signalCount(signals []tokensProfileSignal, id string) int { + for _, signal := range signals { + if signal.ID == id { + return signal.Count + } + } + return 0 +} + +func writeProfileTokenCheckpoint(ctx context.Context, t *testing.T, store *checkpoint.GitStore, checkpointID string, sessionID string, usage *agent.TokenUsage) { + t.Helper() + + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: id.MustCheckpointID(checkpointID), + SessionID: sessionID, + Strategy: strategy.StrategyNameManualCommit, + Branch: "tokens-profile", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"profile"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: usage, + }); err != nil { + t.Fatalf("WriteCommitted(%s) error = %v", checkpointID, err) + } +}