Skip to content

Commit 1f9f67a

Browse files
committed
fix: address OpenRouter PR review comments
- Embedder: raise ValueError in __init__ for unknown models instead of silently falling back to 768 dims, which would mis-size stored vectors against the model's real output and corrupt the vector store. - Provider: drop **_kwargs catchall and accept cost_tracker explicitly so unknown kwargs from future registry changes fail loudly instead of vanishing. - Provider: switch max_completion_tokens → max_tokens in generate() and stream_chat(). Per OpenRouter API docs, max_tokens is the universal parameter across the 200+ proxied models; max_completion_tokens is an OpenAI-specific newer name not all proxied models accept.
1 parent 57d40c7 commit 1f9f67a

4 files changed

Lines changed: 39 additions & 14 deletions

File tree

packages/core/src/repowise/core/providers/embedding/openrouter.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,21 @@ def __init__(
4646
raise ValueError(
4747
"OpenRouter API key required. Pass api_key= or set OPENROUTER_API_KEY env var."
4848
)
49+
if model not in self._DIMS:
50+
known = ", ".join(sorted(self._DIMS))
51+
raise ValueError(
52+
f"Unknown embedding model {model!r}. Stored vectors would be mis-sized "
53+
f"against the model's real output, silently corrupting the vector store. "
54+
f"Add {model!r} to OpenRouterEmbedder._DIMS with its correct dimension count, "
55+
f"or pick a known model: {known}."
56+
)
4957
self._model = model
5058
self._timeout = timeout
5159
self._client: object | None = None
5260

5361
@property
5462
def dimensions(self) -> int:
55-
return self._DIMS.get(self._model, 768)
63+
return self._DIMS[self._model]
5664

5765
async def embed(self, texts: list[str]) -> list[list[float]]:
5866
"""Embed a batch of texts using OpenRouter.

packages/core/src/repowise/core/providers/llm/openrouter.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@
3636
RateLimitError,
3737
)
3838

39-
from typing import Any, AsyncIterator
39+
from typing import TYPE_CHECKING, Any, AsyncIterator
4040
from repowise.core.rate_limiter import RateLimiter
4141

42+
if TYPE_CHECKING:
43+
from repowise.core.generation.cost_tracker import CostTracker
44+
4245
log = structlog.get_logger(__name__)
4346

4447
_MAX_RETRIES = 3
@@ -58,11 +61,9 @@ class OpenRouterProvider(BaseProvider):
5861
rate_limiter: Optional RateLimiter instance.
5962
http_referer: Optional site URL for OpenRouter rankings/leaderboards.
6063
app_title: App name shown on OpenRouter dashboard. Defaults to "repowise".
61-
62-
Note:
63-
Cost tracking is not supported for OpenRouter. It proxies 200+ models
64-
with varying prices, and repowise's fallback pricing would show inflated
65-
numbers. Check the OpenRouter dashboard for actual costs.
64+
cost_tracker: Accepted for registry compatibility but not used — OpenRouter
65+
proxies 200+ models with varying prices, so repowise's fallback
66+
pricing would be misleading. Check the OpenRouter dashboard.
6667
"""
6768

6869
def __init__(
@@ -73,7 +74,7 @@ def __init__(
7374
rate_limiter: RateLimiter | None = None,
7475
http_referer: str | None = None,
7576
app_title: str = "repowise",
76-
**_kwargs: Any,
77+
cost_tracker: "CostTracker | None" = None,
7778
) -> None:
7879
resolved_key = api_key or os.environ.get("OPENROUTER_API_KEY")
7980
if not resolved_key:
@@ -153,7 +154,7 @@ async def _generate_with_retry(
153154
try:
154155
response = await self._client.chat.completions.create(
155156
model=self._model,
156-
max_completion_tokens=max_tokens,
157+
max_tokens=max_tokens,
157158
temperature=temperature,
158159
messages=[
159160
{"role": "system", "content": system_prompt},
@@ -205,7 +206,7 @@ async def stream_chat(
205206
full_messages = [{"role": "system", "content": system_prompt}, *messages]
206207
kwargs: dict[str, Any] = {
207208
"model": self._model,
208-
"max_completion_tokens": max_tokens,
209+
"max_tokens": max_tokens,
209210
"temperature": temperature,
210211
"messages": full_messages,
211212
"stream": True,

tests/unit/test_persistence/test_openrouter_embedder.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,10 @@ def test_dimensions_openai_large():
5151
assert emb.dimensions == 3072
5252

5353

54-
def test_dimensions_unknown_model_defaults_to_768():
55-
emb = OpenRouterEmbedder(api_key="k", model="some/future-model")
56-
assert emb.dimensions == 768
54+
def test_unknown_model_raises_at_construction():
55+
"""Unknown models must fail fast — a silent dim fallback would corrupt the vector store."""
56+
with pytest.raises(ValueError, match="Unknown embedding model"):
57+
OpenRouterEmbedder(api_key="k", model="some/future-model")
5758

5859

5960
# ---------------------------------------------------------------------------

tests/unit/test_providers/test_openrouter_provider.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,21 @@ def test_no_headers_when_empty():
6969
assert not headers.get("X-Title")
7070

7171

72+
def test_accepts_cost_tracker_kwarg():
73+
"""cost_tracker is accepted for registry parity but ignored (OpenRouter proxies
74+
200+ models with varying prices; repowise's fallback pricing would be misleading)."""
75+
sentinel = object()
76+
p = OpenRouterProvider(api_key="sk-or-test", cost_tracker=sentinel)
77+
assert p.provider_name == "openrouter"
78+
79+
80+
def test_rejects_unknown_kwargs():
81+
"""Unknown kwargs must fail loud — silently swallowing them would hide future
82+
registry changes (e.g. new tier=, budget= params passed through)."""
83+
with pytest.raises(TypeError):
84+
OpenRouterProvider(api_key="sk-or-test", future_param="oops")
85+
86+
7287
# ---------------------------------------------------------------------------
7388
# Successful generation
7489
# ---------------------------------------------------------------------------
@@ -132,7 +147,7 @@ async def fake_create(**kwargs):
132147

133148
kw = captured_kwargs[0]
134149
assert kw["model"] == "google/gemini-3.1-flash-lite-preview"
135-
assert kw["max_completion_tokens"] == 2048
150+
assert kw["max_tokens"] == 2048
136151
assert kw["temperature"] == 0.5
137152
messages = kw["messages"]
138153
assert messages[0] == {"role": "system", "content": "system msg"}

0 commit comments

Comments
 (0)