fix(cli): wire zai+minimax into init menu, extract _env_bool, add quota warning, clean dead branch

Societus · Societus · commit 2febb57df4f2 · 2026-04-18T10:33:33.000-07:00
Apologies for the oversight -- these provider dict entries were mostly in place during development but got lost assembling the PR stack. - Add zai and minimax to _PROVIDER_DEFAULTS, _PROVIDER_ENV, and _PROVIDER_SIGNUP so they appear in interactive init - Extract _env_bool(name, default=False) helper accepting 1/yes/on/true and reuse for MINIMAX_REASONING_SPLIT parsing in both code paths - Add session_request_warn to RateLimitConfig: logs a warning when cumulative session requests exceed a threshold, giving users advance notice before hitting long-window provider quotas (e.g. MiniMax's 1500 req/5hr) - Remove unreachable litellm local-proxy branch (L488): _detect_provider_status already marks litellm as detected when LITELLM_BASE_URL is set, so the guard at L483 makes it unreachable - Add note about MiniMax 1500req/5hr vs our 60s window approximation Addresses review feedback from @swati510 on #84.
diff --git a/packages/cli/src/repowise/cli/helpers.py b/packages/cli/src/repowise/cli/helpers.py
@@ -18,6 +18,18 @@
 console = Console()
 err_console = Console(stderr=True)
 
+
+def _env_bool(name: str, *, default: bool = False) -> bool:
+    """Read a boolean from an environment variable.
+
+    Accepts ``1``, ``yes``, ``on``, and ``true`` (case-insensitive) as truthy.
+    Returns *default* if the variable is unset or empty.
+    """
+    value = os.environ.get(name, "").strip().lower()
+    if not value:
+        return default
+    return value in ("1", "yes", "on", "true")
+
 STATE_FILENAME = "state.json"
 REPOWISE_DIR = ".repowise"
 
@@ -285,7 +297,7 @@ def resolve_provider(
             if os.environ.get("MINIMAX_BASE_URL"):
                 kwargs["base_url"] = os.environ["MINIMAX_BASE_URL"]
             if os.environ.get("MINIMAX_REASONING_SPLIT"):
-                kwargs["reasoning_split"] = os.environ["MINIMAX_REASONING_SPLIT"].lower() == "true"
+                kwargs["reasoning_split"] = _env_bool("MINIMAX_REASONING_SPLIT")
             if os.environ.get("MINIMAX_TIER"):
                 kwargs["tier"] = os.environ["MINIMAX_TIER"]
 
@@ -354,7 +366,7 @@ def resolve_provider(
         if os.environ.get("MINIMAX_BASE_URL"):
             kwargs["base_url"] = os.environ["MINIMAX_BASE_URL"]
         if os.environ.get("MINIMAX_REASONING_SPLIT"):
-            kwargs["reasoning_split"] = os.environ["MINIMAX_REASONING_SPLIT"].lower() == "true"
+            kwargs["reasoning_split"] = _env_bool("MINIMAX_REASONING_SPLIT")
         if os.environ.get("MINIMAX_TIER"):
             kwargs["tier"] = os.environ["MINIMAX_TIER"]
         return get_provider("minimax", **kwargs)
diff --git a/packages/cli/src/repowise/cli/ui.py b/packages/cli/src/repowise/cli/ui.py
@@ -266,6 +266,8 @@ def print_phase_header(
     "anthropic": "claude-sonnet-4-6",
     "ollama": "llama3.2",
     "litellm": "groq/llama-3.1-70b-versatile",
+    "zai": "glm-5.1",
+    "minimax": "MiniMax-M1",
 }
 
 # For most providers, a single env var indicates configuration.
@@ -276,6 +278,8 @@ def print_phase_header(
     "anthropic": "ANTHROPIC_API_KEY",
     "ollama": "OLLAMA_BASE_URL",
     "litellm": "LITELLM_API_KEY",  # Also checks LITELLM_BASE_URL in _detect_provider_status
+    "zai": "ZAI_API_KEY",
+    "minimax": "MINIMAX_API_KEY",
 }
 
 _PROVIDER_SIGNUP: dict[str, str] = {
@@ -284,6 +288,8 @@ def print_phase_header(
     "anthropic": "https://console.anthropic.com/settings/keys",
     "ollama": "https://ollama.com/download",
     "litellm": "https://docs.litellm.ai/docs/proxy/proxy",
+    "zai": "https://open.bigmodel.cn/usercenter/apikeys",
+    "minimax": "https://platform.minimaxi.com/document/key%20management",
 }
 
 
@@ -484,22 +490,14 @@ def interactive_provider_select(
         env_var = _PROVIDER_ENV[chosen]
         signup_url = _PROVIDER_SIGNUP.get(chosen, "")
         console.print()
-        # Special case: litellm local proxy doesn't need an API key
-        if chosen == "litellm" and os.environ.get("LITELLM_BASE_URL"):
-            console.print(
-                f"  [{OK}]✓ Using LiteLLM proxy at[/] [{BRAND}]{os.environ['LITELLM_BASE_URL']}[/]"
-            )
-            console.print("  [dim]No API key required for local proxy.[/dim]")
-            console.print()
-        else:
-            console.print(f"  [bold]{chosen}[/bold] requires [cyan]{env_var}[/cyan].")
-            if signup_url:
-                console.print(f"  Get your API key here: [{BRAND}]{signup_url}[/]")
-            console.print()
-            key = _prompt_api_key(console, chosen, env_var, repo_path=repo_path)
-            if not key:
-                console.print(f"  [{WARN}]Skipped. Please select another provider.[/]")
-                return interactive_provider_select(console, model_flag, repo_path=repo_path)
+        console.print(f"  [bold]{chosen}[/bold] requires [cyan]{env_var}[/cyan].")
+        if signup_url:
+            console.print(f"  Get your API key here: [{BRAND}]{signup_url}[/]")
+        console.print()
+        key = _prompt_api_key(console, chosen, env_var, repo_path=repo_path)
+        if not key:
+            console.print(f"  [{WARN}]Skipped. Please select another provider.[/]")
+            return interactive_provider_select(console, model_flag, repo_path=repo_path)
 
     # --- model ---
     default_model = _PROVIDER_DEFAULTS.get(chosen, "")
diff --git a/packages/core/src/repowise/core/rate_limiter.py b/packages/core/src/repowise/core/rate_limiter.py
@@ -26,6 +26,10 @@
 import time
 from dataclasses import dataclass
 
+import structlog
+
+log = structlog.get_logger(__name__)
+
 
 @dataclass(frozen=True)
 class RateLimitConfig:
@@ -34,10 +38,14 @@ class RateLimitConfig:
     Attributes:
         requests_per_minute: Maximum API requests within any 60-second window.
         tokens_per_minute:   Maximum tokens (input + output) within any 60-second window.
+        session_request_warn: If set, log a warning when cumulative session requests
+                              exceed this threshold. Useful for providers whose real
+                              limits span a longer window than our 60-second limiter.
     """
 
     requests_per_minute: int
     tokens_per_minute: int
+    session_request_warn: int | None = None
 
 
 # Default rate limit configs for known providers.
@@ -50,8 +58,16 @@ class RateLimitConfig:
     "ollama": RateLimitConfig(requests_per_minute=1_000, tokens_per_minute=10_000_000),
     "litellm": RateLimitConfig(requests_per_minute=60, tokens_per_minute=150_000),
     "zai": RateLimitConfig(requests_per_minute=60, tokens_per_minute=150_000),
-    # MiniMax: conservative default (Starter tier). Set MINIMAX_TIER for plan-specific limits.
-    "minimax": RateLimitConfig(requests_per_minute=5, tokens_per_minute=25_000),
+    # MiniMax: published limits are 1500 requests / 5 hours. Our limiter uses a
+    # 60-second sliding window, so ~5 RPM is a steady-state approximation. Bursts
+    # may trigger local 429s, and slow paces may exceed real quota without tripping.
+    # session_request_warn fires once at ~73% of the 5-hour budget to alert the user.
+    # See: https://platform.minimaxi.com/document/rate-limit
+    "minimax": RateLimitConfig(
+        requests_per_minute=5,
+        tokens_per_minute=25_000,
+        session_request_warn=1100,
+    ),
 }
 
 
@@ -69,6 +85,9 @@ def __init__(self, config: RateLimitConfig) -> None:
         # (timestamp, token_count) pairs within the last 60 seconds
         self._token_records: list[tuple[float, int]] = []
         self._lock = asyncio.Lock()
+        # Cumulative session counter for long-window quota warnings
+        self._session_requests: int = 0
+        self._session_warned: bool = False
 
     async def acquire(self, estimated_tokens: int = 1_000) -> None:
         """Block until rate limits permit the next request.
@@ -92,6 +111,20 @@ async def acquire(self, estimated_tokens: int = 1_000) -> None:
                 if rpm_ok and tpm_ok:
                     self._request_times.append(now)
                     self._token_records.append((now, estimated_tokens))
+                    self._session_requests += 1
+                    # Warn when approaching long-window provider quota
+                    if (
+                        self._config.session_request_warn
+                        and not self._session_warned
+                        and self._session_requests >= self._config.session_request_warn
+                    ):
+                        self._session_warned = True
+                        log.warning(
+                            "rate_limiter.session_quota_approaching",
+                            session_requests=self._session_requests,
+                            warn_threshold=self._config.session_request_warn,
+                            hint="Provider may 429 on sustained usage. Consider reducing concurrency or pausing.",
+                        )
                     return
 
                 # Compute how long to sleep before re-checking