diff --git a/packages/core/src/repowise/core/providers/llm/__init__.py b/packages/core/src/repowise/core/providers/llm/__init__.py index e23502e..47cd23b 100644 --- a/packages/core/src/repowise/core/providers/llm/__init__.py +++ b/packages/core/src/repowise/core/providers/llm/__init__.py @@ -10,6 +10,7 @@ Built-in providers: anthropic — claude-opus-4-6, claude-sonnet-4-6, claude-haiku-4-5 + minimax — MiniMax-M2.7, MiniMax-M2.7-highspeed (Anthropic-compatible API) openai — gpt-5.4-nano, gpt-5.4-mini, gpt-5.4 gemini — gemini-3.1-flash-lite-preview, gemini-3-flash-preview, gemini-3.1-pro-preview ollama — local inference (llama3.2, codellama, etc.) diff --git a/packages/core/src/repowise/core/providers/llm/minimax.py b/packages/core/src/repowise/core/providers/llm/minimax.py new file mode 100644 index 0000000..1978752 --- /dev/null +++ b/packages/core/src/repowise/core/providers/llm/minimax.py @@ -0,0 +1,344 @@ +"""MiniMax provider for repowise. + +Uses MiniMax's Anthropic-compatible API endpoint, which mirrors the Anthropic +Messages API. Supports both document generation (generate) and streaming chat +with tool use (stream_chat). + +Supported models: + - MiniMax-M2.7 — Peak Performance. Ultimate Value. Master the Complex (default) + - MiniMax-M2.7-highspeed — Same performance, faster and more agile + +API documentation: + https://platform.minimax.io/docs/api-reference/text-anthropic-api +""" + +from __future__ import annotations + +import os + +import structlog +from anthropic import AsyncAnthropic +from anthropic import RateLimitError as _AnthropicRateLimitError +from anthropic import APIStatusError as _AnthropicAPIStatusError +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential_jitter, + RetryError, +) + +from repowise.core.providers.llm.base import ( + BaseProvider, + ChatStreamEvent, + ChatToolCall, + GeneratedResponse, + ProviderError, + RateLimitError, +) + +from typing import TYPE_CHECKING, Any, AsyncIterator +from repowise.core.rate_limiter import RateLimiter + +if TYPE_CHECKING: + from repowise.core.generation.cost_tracker import CostTracker + +log = structlog.get_logger(__name__) + +_MAX_RETRIES = 3 +_MIN_WAIT = 1.0 +_MAX_WAIT = 4.0 + +_DEFAULT_BASE_URL = "https://api.minimax.io/anthropic" + + +class MiniMaxProvider(BaseProvider): + """MiniMax chat provider using the Anthropic-compatible API. + + MiniMax's Anthropic-compatible endpoint mirrors the Anthropic Messages API. + This provider is a drop-in replacement for AnthropicProvider using MiniMax + models instead of Claude models. + + Args: + api_key: MiniMax API key. Falls back to MINIMAX_API_KEY env var. + model: Model identifier. Defaults to MiniMax-M2.7. + base_url: Base URL for the API. Defaults to https://api.minimax.io/anthropic. + rate_limiter: Optional pre-configured RateLimiter. + cost_tracker: Optional CostTracker for recording usage. + + Notes: + - temperature must be in (0.0, 1.0]. Values <= 0 are clamped to 0.01. + - Values > 1.0 are clamped to 1.0. + """ + + def __init__( + self, + api_key: str | None = None, + model: str = "MiniMax-M2.7", + base_url: str | None = None, + rate_limiter: RateLimiter | None = None, + cost_tracker: "CostTracker | None" = None, + ) -> None: + resolved_key = api_key or os.environ.get("MINIMAX_API_KEY") + if not resolved_key: + raise ProviderError( + "minimax", + "No API key provided. Pass api_key= or set MINIMAX_API_KEY.", + ) + resolved_base_url = base_url or os.environ.get("MINIMAX_BASE_URL", _DEFAULT_BASE_URL) + self._client = AsyncAnthropic(api_key=resolved_key, base_url=resolved_base_url) + self._model = model + self._rate_limiter = rate_limiter + self._cost_tracker = cost_tracker + + @property + def provider_name(self) -> str: + return "minimax" + + @property + def model_name(self) -> str: + return self._model + + def _clamp_temperature(self, temperature: float) -> float: + """Clamp temperature to MiniMax's valid range (0.0, 1.0].""" + if temperature <= 0.0: + return 0.01 + if temperature > 1.0: + return 1.0 + return temperature + + async def generate( + self, + system_prompt: str, + user_prompt: str, + max_tokens: int = 4096, + temperature: float = 1.0, + request_id: str | None = None, + ) -> GeneratedResponse: + if self._rate_limiter: + await self._rate_limiter.acquire(estimated_tokens=max_tokens) + + log.debug( + "minimax.generate.start", + model=self._model, + max_tokens=max_tokens, + request_id=request_id, + ) + + try: + return await self._generate_with_retry( + system_prompt=system_prompt, + user_prompt=user_prompt, + max_tokens=max_tokens, + temperature=temperature, + request_id=request_id, + ) + except RetryError as exc: + raise ProviderError( + "minimax", + f"All {_MAX_RETRIES} retries exhausted: {exc}", + ) from exc + + @retry( + retry=retry_if_exception_type(ProviderError), + stop=stop_after_attempt(_MAX_RETRIES), + wait=wait_exponential_jitter(initial=_MIN_WAIT, max=_MAX_WAIT), + reraise=True, + ) + async def _generate_with_retry( + self, + system_prompt: str, + user_prompt: str, + max_tokens: int, + temperature: float, + request_id: str | None, + ) -> GeneratedResponse: + try: + response = await self._client.messages.create( + model=self._model, + max_tokens=max_tokens, + temperature=self._clamp_temperature(temperature), + system=system_prompt, + messages=[{"role": "user", "content": user_prompt}], + ) + except _AnthropicRateLimitError as exc: + raise RateLimitError("minimax", str(exc), status_code=429) from exc + except _AnthropicAPIStatusError as exc: + raise ProviderError( + "minimax", str(exc), status_code=exc.status_code + ) from exc + + result = GeneratedResponse( + content=next( + (block.text for block in response.content if getattr(block, "type", None) == "text"), + "", + ), + input_tokens=response.usage.input_tokens, + output_tokens=response.usage.output_tokens, + cached_tokens=0, + usage={ + "input_tokens": response.usage.input_tokens, + "output_tokens": response.usage.output_tokens, + }, + ) + log.debug( + "minimax.generate.done", + input_tokens=result.input_tokens, + output_tokens=result.output_tokens, + request_id=request_id, + ) + + if self._cost_tracker is not None: + import asyncio + + try: + asyncio.get_event_loop().create_task( + self._cost_tracker.record( + model=self._model, + input_tokens=result.input_tokens, + output_tokens=result.output_tokens, + operation="doc_generation", + file_path=None, + ) + ) + except RuntimeError: + pass # No running event loop — skip async record + + return result + + # --- ChatProvider protocol implementation --- + + async def stream_chat( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]], + system_prompt: str, + max_tokens: int = 8192, + temperature: float = 0.7, + request_id: str | None = None, + tool_executor: Any | None = None, + ) -> AsyncIterator[ChatStreamEvent]: + import json as _json + + # Convert OpenAI-format tools to Anthropic format + anthropic_tools = [] + for t in tools: + fn = t.get("function", t) + anthropic_tools.append({ + "name": fn["name"], + "description": fn.get("description", ""), + "input_schema": fn.get("parameters", {}), + }) + + # Convert OpenAI-format messages to Anthropic format + anthropic_messages = _to_anthropic_messages(messages) + + kwargs: dict[str, Any] = { + "model": self._model, + "max_tokens": max_tokens, + "temperature": self._clamp_temperature(temperature), + "system": system_prompt, + "messages": anthropic_messages, + } + if anthropic_tools: + kwargs["tools"] = anthropic_tools + + try: + async with self._client.messages.stream(**kwargs) as stream: + current_tool_id: str | None = None + current_tool_name: str | None = None + current_tool_input_json = "" + + async for event in stream: + if event.type == "content_block_start": + block = event.content_block + if hasattr(block, "type") and block.type == "tool_use": + current_tool_id = block.id + current_tool_name = block.name + current_tool_input_json = "" + elif event.type == "content_block_delta": + delta = event.delta + if hasattr(delta, "type"): + if delta.type == "text_delta": + yield ChatStreamEvent(type="text_delta", text=delta.text) + elif delta.type == "input_json_delta": + current_tool_input_json += delta.partial_json + elif event.type == "content_block_stop": + if current_tool_name: + try: + args = _json.loads(current_tool_input_json) if current_tool_input_json else {} + except Exception: + args = {} + yield ChatStreamEvent( + type="tool_start", + tool_call=ChatToolCall( + id=current_tool_id or "", + name=current_tool_name, + arguments=args, + ), + ) + current_tool_id = None + current_tool_name = None + current_tool_input_json = "" + elif event.type == "message_delta": + stop = getattr(event.delta, "stop_reason", None) + usage = getattr(event, "usage", None) + if usage: + yield ChatStreamEvent( + type="usage", + input_tokens=getattr(usage, "input_tokens", 0) or 0, + output_tokens=getattr(usage, "output_tokens", 0) or 0, + ) + if stop: + yield ChatStreamEvent(type="stop", stop_reason=stop) + elif event.type == "message_stop": + pass # Final cleanup; stop already yielded via message_delta + except _AnthropicRateLimitError as exc: + raise RateLimitError("minimax", str(exc), status_code=429) from exc + except _AnthropicAPIStatusError as exc: + raise ProviderError("minimax", str(exc), status_code=exc.status_code) from exc + + +def _to_anthropic_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Convert OpenAI-format messages to Anthropic format for the MiniMax API.""" + result: list[dict[str, Any]] = [] + for msg in messages: + role = msg.get("role", "") + if role == "system": + continue # Handled via system= parameter + + if role == "tool": + result.append({ + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": msg.get("tool_call_id", ""), + "content": msg.get("content", ""), + }], + }) + elif role == "assistant": + content_blocks: list[dict[str, Any]] = [] + text = msg.get("content") + if text: + content_blocks.append({"type": "text", "text": text}) + for tc in msg.get("tool_calls", []): + fn = tc.get("function", {}) + import json as _json + args = fn.get("arguments", "{}") + if isinstance(args, str): + try: + args = _json.loads(args) + except Exception: + args = {} + content_blocks.append({ + "type": "tool_use", + "id": tc.get("id", ""), + "name": fn.get("name", ""), + "input": args, + }) + if content_blocks: + result.append({"role": "assistant", "content": content_blocks}) + else: + result.append({"role": "user", "content": msg.get("content", "")}) + + return result diff --git a/packages/core/src/repowise/core/providers/llm/registry.py b/packages/core/src/repowise/core/providers/llm/registry.py index 48e07a7..0fc4ff4 100644 --- a/packages/core/src/repowise/core/providers/llm/registry.py +++ b/packages/core/src/repowise/core/providers/llm/registry.py @@ -35,6 +35,7 @@ # you just can't use the anthropic provider. _BUILTIN_PROVIDERS: dict[str, tuple[str, str]] = { "anthropic": ("repowise.core.providers.llm.anthropic", "AnthropicProvider"), + "minimax": ("repowise.core.providers.llm.minimax", "MiniMaxProvider"), "openai": ("repowise.core.providers.llm.openai", "OpenAIProvider"), "gemini": ("repowise.core.providers.llm.gemini", "GeminiProvider"), "ollama": ("repowise.core.providers.llm.ollama", "OllamaProvider"), @@ -131,6 +132,7 @@ def get_provider( # Give a helpful error message naming the missing package _missing = { "anthropic": "anthropic", + "minimax": "anthropic", # minimax uses the anthropic package for Anthropic-compatible API "openai": "openai", "gemini": "google-genai", "ollama": "openai", # ollama uses the openai package diff --git a/packages/core/src/repowise/core/rate_limiter.py b/packages/core/src/repowise/core/rate_limiter.py index 3612624..4f10a6b 100644 --- a/packages/core/src/repowise/core/rate_limiter.py +++ b/packages/core/src/repowise/core/rate_limiter.py @@ -44,6 +44,7 @@ class RateLimitConfig: # These are conservative defaults; operators can override via config. PROVIDER_DEFAULTS: dict[str, RateLimitConfig] = { "anthropic": RateLimitConfig(requests_per_minute=50, tokens_per_minute=100_000), + "minimax": RateLimitConfig(requests_per_minute=60, tokens_per_minute=200_000), "openai": RateLimitConfig(requests_per_minute=60, tokens_per_minute=150_000), "gemini": RateLimitConfig(requests_per_minute=60, tokens_per_minute=1_000_000), # Ollama runs locally — effectively unlimited, but we cap to avoid OOM diff --git a/packages/server/src/repowise/server/provider_config.py b/packages/server/src/repowise/server/provider_config.py index 60f6c60..d319f5f 100644 --- a/packages/server/src/repowise/server/provider_config.py +++ b/packages/server/src/repowise/server/provider_config.py @@ -39,6 +39,14 @@ "env_keys": ["ANTHROPIC_API_KEY"], "requires_key": True, }, + { + "id": "minimax", + "name": "MiniMax", + "default_model": "MiniMax-M2.7", + "models": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"], + "env_keys": ["MINIMAX_API_KEY"], + "requires_key": True, + }, { "id": "openai", "name": "OpenAI", diff --git a/tests/integration/test_provider_live.py b/tests/integration/test_provider_live.py index 2be4fb0..e9018a4 100644 --- a/tests/integration/test_provider_live.py +++ b/tests/integration/test_provider_live.py @@ -7,6 +7,7 @@ pytest tests/integration/test_provider_live.py -k openai -v pytest tests/integration/test_provider_live.py -k gemini -v pytest tests/integration/test_provider_live.py -k anthropic -v + pytest tests/integration/test_provider_live.py -k minimax -v """ from __future__ import annotations @@ -108,3 +109,30 @@ async def test_anthropic_live(model): print( f"\n[{model}] tokens: {result.input_tokens}in / {result.output_tokens}out | content: {result.content!r}" ) + + +# --------------------------------------------------------------------------- +# MiniMax +# --------------------------------------------------------------------------- + +MINIMAX_KEY = os.environ.get("MINIMAX_API_KEY", "") + + +@pytest.mark.skipif(not MINIMAX_KEY, reason="MINIMAX_API_KEY not set") +@pytest.mark.parametrize("model", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]) +async def test_minimax_live(model): + from repowise.core.providers.llm.minimax import MiniMaxProvider + + provider = MiniMaxProvider(api_key=MINIMAX_KEY, model=model) + result = await provider.generate( + system_prompt="You are a concise assistant.", + user_prompt="Reply with exactly: OK", + max_tokens=512, + ) + assert isinstance(result, GeneratedResponse) + assert result.content.strip() + assert result.input_tokens > 0 + assert result.output_tokens > 0 + print( + f"\n[{model}] tokens: {result.input_tokens}in / {result.output_tokens}out | content: {result.content!r}" + ) diff --git a/tests/providers/test_minimax_provider.py b/tests/providers/test_minimax_provider.py new file mode 100644 index 0000000..776e66d --- /dev/null +++ b/tests/providers/test_minimax_provider.py @@ -0,0 +1,191 @@ +"""Unit tests for MiniMaxProvider. + +All tests use mocked API calls — no MINIMAX_API_KEY required. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from repowise.core.providers.llm.base import BaseProvider, GeneratedResponse, ProviderError +from repowise.core.providers.llm.minimax import MiniMaxProvider + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_provider(api_key: str = "test-minimax-key", model: str = "MiniMax-M2.7") -> MiniMaxProvider: + return MiniMaxProvider(api_key=api_key, model=model) + + +def _make_mock_response(content: str = "Test response", input_tokens: int = 100, output_tokens: int = 50): + block = MagicMock() + block.type = "text" + block.text = content + response = MagicMock() + response.content = [block] + response.usage = MagicMock(input_tokens=input_tokens, output_tokens=output_tokens) + return response + + +# --------------------------------------------------------------------------- +# Interface contract +# --------------------------------------------------------------------------- + + +class TestMiniMaxProviderInterface: + def test_is_base_provider_subclass(self) -> None: + assert issubclass(MiniMaxProvider, BaseProvider) + + def test_provider_name(self) -> None: + provider = _make_provider() + assert provider.provider_name == "minimax" + + def test_model_name_default(self) -> None: + provider = _make_provider() + assert provider.model_name == "MiniMax-M2.7" + + def test_model_name_highspeed(self) -> None: + provider = _make_provider(model="MiniMax-M2.7-highspeed") + assert provider.model_name == "MiniMax-M2.7-highspeed" + + def test_no_api_key_raises_provider_error(self) -> None: + with patch.dict("os.environ", {}, clear=False): + import os + original = os.environ.pop("MINIMAX_API_KEY", None) + try: + with pytest.raises(ProviderError, match="No API key"): + MiniMaxProvider(api_key=None) + finally: + if original is not None: + os.environ["MINIMAX_API_KEY"] = original + + def test_api_key_from_env(self, monkeypatch) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "env-key-123") + provider = MiniMaxProvider() + assert provider is not None + + +# --------------------------------------------------------------------------- +# Temperature clamping +# --------------------------------------------------------------------------- + + +class TestTemperatureClamping: + def test_temperature_above_zero_unchanged(self) -> None: + provider = _make_provider() + assert provider._clamp_temperature(0.7) == 0.7 + + def test_temperature_one_unchanged(self) -> None: + provider = _make_provider() + assert provider._clamp_temperature(1.0) == 1.0 + + def test_temperature_zero_clamped_to_minimum(self) -> None: + provider = _make_provider() + assert provider._clamp_temperature(0.0) == 0.01 + + def test_temperature_negative_clamped(self) -> None: + provider = _make_provider() + assert provider._clamp_temperature(-0.5) == 0.01 + + def test_temperature_above_one_clamped(self) -> None: + provider = _make_provider() + assert provider._clamp_temperature(1.5) == 1.0 + + +# --------------------------------------------------------------------------- +# generate() +# --------------------------------------------------------------------------- + + +class TestGenerate: + async def test_generate_returns_generated_response(self) -> None: + provider = _make_provider() + mock_response = _make_mock_response("Hello from MiniMax") + + with patch.object(provider._client.messages, "create", new=AsyncMock(return_value=mock_response)): + result = await provider.generate( + system_prompt="You are helpful.", + user_prompt="Say hello.", + ) + + assert isinstance(result, GeneratedResponse) + assert result.content == "Hello from MiniMax" + + async def test_generate_token_counts(self) -> None: + provider = _make_provider() + mock_response = _make_mock_response(input_tokens=200, output_tokens=80) + + with patch.object(provider._client.messages, "create", new=AsyncMock(return_value=mock_response)): + result = await provider.generate(system_prompt="sys", user_prompt="user") + + assert result.input_tokens == 200 + assert result.output_tokens == 80 + assert result.cached_tokens == 0 + + async def test_generate_passes_correct_model(self) -> None: + provider = _make_provider(model="MiniMax-M2.7-highspeed") + mock_response = _make_mock_response() + create_mock = AsyncMock(return_value=mock_response) + + with patch.object(provider._client.messages, "create", new=create_mock): + await provider.generate(system_prompt="sys", user_prompt="user") + + call_kwargs = create_mock.call_args.kwargs + assert call_kwargs["model"] == "MiniMax-M2.7-highspeed" + + async def test_generate_clamps_zero_temperature(self) -> None: + provider = _make_provider() + mock_response = _make_mock_response() + create_mock = AsyncMock(return_value=mock_response) + + with patch.object(provider._client.messages, "create", new=create_mock): + await provider.generate(system_prompt="sys", user_prompt="user", temperature=0.0) + + call_kwargs = create_mock.call_args.kwargs + assert call_kwargs["temperature"] > 0.0 + + async def test_generate_system_prompt_passed(self) -> None: + provider = _make_provider() + mock_response = _make_mock_response() + create_mock = AsyncMock(return_value=mock_response) + + with patch.object(provider._client.messages, "create", new=create_mock): + await provider.generate( + system_prompt="Be concise.", + user_prompt="Say OK.", + ) + + call_kwargs = create_mock.call_args.kwargs + assert call_kwargs["system"] == "Be concise." + + +# --------------------------------------------------------------------------- +# Registry integration +# --------------------------------------------------------------------------- + + +class TestRegistryIntegration: + def test_minimax_in_list_providers(self) -> None: + from repowise.core.providers.llm.registry import list_providers + assert "minimax" in list_providers() + + def test_get_provider_minimax(self) -> None: + from repowise.core.providers.llm.registry import get_provider + provider = get_provider("minimax", api_key="test-key", with_rate_limiter=False) + assert isinstance(provider, MiniMaxProvider) + assert provider.provider_name == "minimax" + + def test_get_provider_minimax_with_model(self) -> None: + from repowise.core.providers.llm.registry import get_provider + provider = get_provider( + "minimax", + api_key="test-key", + model="MiniMax-M2.7-highspeed", + with_rate_limiter=False, + ) + assert provider.model_name == "MiniMax-M2.7-highspeed" diff --git a/tests/providers/test_registry.py b/tests/providers/test_registry.py index 92a71f4..dcde3bb 100644 --- a/tests/providers/test_registry.py +++ b/tests/providers/test_registry.py @@ -22,6 +22,7 @@ class TestListProviders: def test_includes_all_builtin_providers(self) -> None: providers = list_providers() assert "anthropic" in providers + assert "minimax" in providers assert "openai" in providers assert "ollama" in providers assert "litellm" in providers @@ -114,5 +115,5 @@ def factory(**kw: object) -> MockProvider: assert received.get("api_key") == "key-123" def test_builtin_count(self) -> None: - """Sanity check: we have exactly 6 built-in providers.""" - assert len(_BUILTIN_PROVIDERS) == 6 + """Sanity check: we have exactly 7 built-in providers.""" + assert len(_BUILTIN_PROVIDERS) == 7