Skip to content

Commit 2febb57

Browse files
committed
fix(cli): wire zai+minimax into init menu, extract _env_bool, add quota warning, clean dead branch
Apologies for the oversight -- these provider dict entries were mostly in place during development but got lost assembling the PR stack. - Add zai and minimax to _PROVIDER_DEFAULTS, _PROVIDER_ENV, and _PROVIDER_SIGNUP so they appear in interactive init - Extract _env_bool(name, default=False) helper accepting 1/yes/on/true and reuse for MINIMAX_REASONING_SPLIT parsing in both code paths - Add session_request_warn to RateLimitConfig: logs a warning when cumulative session requests exceed a threshold, giving users advance notice before hitting long-window provider quotas (e.g. MiniMax's 1500 req/5hr) - Remove unreachable litellm local-proxy branch (L488): _detect_provider_status already marks litellm as detected when LITELLM_BASE_URL is set, so the guard at L483 makes it unreachable - Add note about MiniMax 1500req/5hr vs our 60s window approximation Addresses review feedback from @swati510 on #84.
1 parent 9c16b5e commit 2febb57

3 files changed

Lines changed: 63 additions & 20 deletions

File tree

packages/cli/src/repowise/cli/helpers.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@
1818
console = Console()
1919
err_console = Console(stderr=True)
2020

21+
22+
def _env_bool(name: str, *, default: bool = False) -> bool:
23+
"""Read a boolean from an environment variable.
24+
25+
Accepts ``1``, ``yes``, ``on``, and ``true`` (case-insensitive) as truthy.
26+
Returns *default* if the variable is unset or empty.
27+
"""
28+
value = os.environ.get(name, "").strip().lower()
29+
if not value:
30+
return default
31+
return value in ("1", "yes", "on", "true")
32+
2133
STATE_FILENAME = "state.json"
2234
REPOWISE_DIR = ".repowise"
2335

@@ -285,7 +297,7 @@ def resolve_provider(
285297
if os.environ.get("MINIMAX_BASE_URL"):
286298
kwargs["base_url"] = os.environ["MINIMAX_BASE_URL"]
287299
if os.environ.get("MINIMAX_REASONING_SPLIT"):
288-
kwargs["reasoning_split"] = os.environ["MINIMAX_REASONING_SPLIT"].lower() == "true"
300+
kwargs["reasoning_split"] = _env_bool("MINIMAX_REASONING_SPLIT")
289301
if os.environ.get("MINIMAX_TIER"):
290302
kwargs["tier"] = os.environ["MINIMAX_TIER"]
291303

@@ -354,7 +366,7 @@ def resolve_provider(
354366
if os.environ.get("MINIMAX_BASE_URL"):
355367
kwargs["base_url"] = os.environ["MINIMAX_BASE_URL"]
356368
if os.environ.get("MINIMAX_REASONING_SPLIT"):
357-
kwargs["reasoning_split"] = os.environ["MINIMAX_REASONING_SPLIT"].lower() == "true"
369+
kwargs["reasoning_split"] = _env_bool("MINIMAX_REASONING_SPLIT")
358370
if os.environ.get("MINIMAX_TIER"):
359371
kwargs["tier"] = os.environ["MINIMAX_TIER"]
360372
return get_provider("minimax", **kwargs)

packages/cli/src/repowise/cli/ui.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ def print_phase_header(
266266
"anthropic": "claude-sonnet-4-6",
267267
"ollama": "llama3.2",
268268
"litellm": "groq/llama-3.1-70b-versatile",
269+
"zai": "glm-5.1",
270+
"minimax": "MiniMax-M1",
269271
}
270272

271273
# For most providers, a single env var indicates configuration.
@@ -276,6 +278,8 @@ def print_phase_header(
276278
"anthropic": "ANTHROPIC_API_KEY",
277279
"ollama": "OLLAMA_BASE_URL",
278280
"litellm": "LITELLM_API_KEY", # Also checks LITELLM_BASE_URL in _detect_provider_status
281+
"zai": "ZAI_API_KEY",
282+
"minimax": "MINIMAX_API_KEY",
279283
}
280284

281285
_PROVIDER_SIGNUP: dict[str, str] = {
@@ -284,6 +288,8 @@ def print_phase_header(
284288
"anthropic": "https://console.anthropic.com/settings/keys",
285289
"ollama": "https://ollama.com/download",
286290
"litellm": "https://docs.litellm.ai/docs/proxy/proxy",
291+
"zai": "https://open.bigmodel.cn/usercenter/apikeys",
292+
"minimax": "https://platform.minimaxi.com/document/key%20management",
287293
}
288294

289295

@@ -484,22 +490,14 @@ def interactive_provider_select(
484490
env_var = _PROVIDER_ENV[chosen]
485491
signup_url = _PROVIDER_SIGNUP.get(chosen, "")
486492
console.print()
487-
# Special case: litellm local proxy doesn't need an API key
488-
if chosen == "litellm" and os.environ.get("LITELLM_BASE_URL"):
489-
console.print(
490-
f" [{OK}]✓ Using LiteLLM proxy at[/] [{BRAND}]{os.environ['LITELLM_BASE_URL']}[/]"
491-
)
492-
console.print(" [dim]No API key required for local proxy.[/dim]")
493-
console.print()
494-
else:
495-
console.print(f" [bold]{chosen}[/bold] requires [cyan]{env_var}[/cyan].")
496-
if signup_url:
497-
console.print(f" Get your API key here: [{BRAND}]{signup_url}[/]")
498-
console.print()
499-
key = _prompt_api_key(console, chosen, env_var, repo_path=repo_path)
500-
if not key:
501-
console.print(f" [{WARN}]Skipped. Please select another provider.[/]")
502-
return interactive_provider_select(console, model_flag, repo_path=repo_path)
493+
console.print(f" [bold]{chosen}[/bold] requires [cyan]{env_var}[/cyan].")
494+
if signup_url:
495+
console.print(f" Get your API key here: [{BRAND}]{signup_url}[/]")
496+
console.print()
497+
key = _prompt_api_key(console, chosen, env_var, repo_path=repo_path)
498+
if not key:
499+
console.print(f" [{WARN}]Skipped. Please select another provider.[/]")
500+
return interactive_provider_select(console, model_flag, repo_path=repo_path)
503501

504502
# --- model ---
505503
default_model = _PROVIDER_DEFAULTS.get(chosen, "")

packages/core/src/repowise/core/rate_limiter.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
import time
2727
from dataclasses import dataclass
2828

29+
import structlog
30+
31+
log = structlog.get_logger(__name__)
32+
2933

3034
@dataclass(frozen=True)
3135
class RateLimitConfig:
@@ -34,10 +38,14 @@ class RateLimitConfig:
3438
Attributes:
3539
requests_per_minute: Maximum API requests within any 60-second window.
3640
tokens_per_minute: Maximum tokens (input + output) within any 60-second window.
41+
session_request_warn: If set, log a warning when cumulative session requests
42+
exceed this threshold. Useful for providers whose real
43+
limits span a longer window than our 60-second limiter.
3744
"""
3845

3946
requests_per_minute: int
4047
tokens_per_minute: int
48+
session_request_warn: int | None = None
4149

4250

4351
# Default rate limit configs for known providers.
@@ -50,8 +58,16 @@ class RateLimitConfig:
5058
"ollama": RateLimitConfig(requests_per_minute=1_000, tokens_per_minute=10_000_000),
5159
"litellm": RateLimitConfig(requests_per_minute=60, tokens_per_minute=150_000),
5260
"zai": RateLimitConfig(requests_per_minute=60, tokens_per_minute=150_000),
53-
# MiniMax: conservative default (Starter tier). Set MINIMAX_TIER for plan-specific limits.
54-
"minimax": RateLimitConfig(requests_per_minute=5, tokens_per_minute=25_000),
61+
# MiniMax: published limits are 1500 requests / 5 hours. Our limiter uses a
62+
# 60-second sliding window, so ~5 RPM is a steady-state approximation. Bursts
63+
# may trigger local 429s, and slow paces may exceed real quota without tripping.
64+
# session_request_warn fires once at ~73% of the 5-hour budget to alert the user.
65+
# See: https://platform.minimaxi.com/document/rate-limit
66+
"minimax": RateLimitConfig(
67+
requests_per_minute=5,
68+
tokens_per_minute=25_000,
69+
session_request_warn=1100,
70+
),
5571
}
5672

5773

@@ -69,6 +85,9 @@ def __init__(self, config: RateLimitConfig) -> None:
6985
# (timestamp, token_count) pairs within the last 60 seconds
7086
self._token_records: list[tuple[float, int]] = []
7187
self._lock = asyncio.Lock()
88+
# Cumulative session counter for long-window quota warnings
89+
self._session_requests: int = 0
90+
self._session_warned: bool = False
7291

7392
async def acquire(self, estimated_tokens: int = 1_000) -> None:
7493
"""Block until rate limits permit the next request.
@@ -92,6 +111,20 @@ async def acquire(self, estimated_tokens: int = 1_000) -> None:
92111
if rpm_ok and tpm_ok:
93112
self._request_times.append(now)
94113
self._token_records.append((now, estimated_tokens))
114+
self._session_requests += 1
115+
# Warn when approaching long-window provider quota
116+
if (
117+
self._config.session_request_warn
118+
and not self._session_warned
119+
and self._session_requests >= self._config.session_request_warn
120+
):
121+
self._session_warned = True
122+
log.warning(
123+
"rate_limiter.session_quota_approaching",
124+
session_requests=self._session_requests,
125+
warn_threshold=self._config.session_request_warn,
126+
hint="Provider may 429 on sustained usage. Consider reducing concurrency or pausing.",
127+
)
95128
return
96129

97130
# Compute how long to sleep before re-checking

0 commit comments

Comments
 (0)