1515 - qwen2.5-coder — excellent multilingual code model
1616
1717Usage:
18- provider = OllamaProvider(model="codellama", base_url="http://localhost:11434/v1 ")
18+ provider = OllamaProvider(model="codellama", base_url="http://localhost:11434")
1919"""
2020
2121from __future__ import annotations
4848_MIN_WAIT = 1.0
4949_MAX_WAIT = 8.0 # Ollama can be slow on first load, allow more wait time
5050
51- _DEFAULT_BASE_URL = "http://localhost:11434/v1"
51+ _DEFAULT_BASE_URL = "http://localhost:11434"
52+
53+
54+ def _normalize_base_url (url : str ) -> str :
55+ """Ensure base_url ends with /v1 for OpenAI SDK compatibility."""
56+ url = url .rstrip ("/" )
57+ if not url .endswith ("/v1" ):
58+ url += "/v1"
59+ return url
5260
5361
5462class OllamaProvider (BaseProvider ):
@@ -59,7 +67,8 @@ class OllamaProvider(BaseProvider):
5967 Args:
6068 model: Ollama model name (e.g., 'llama3.2', 'codellama').
6169 Must be pulled first: `ollama pull <model>`
62- base_url: Ollama API base URL. Defaults to http://localhost:11434/v1
70+ base_url: Ollama server URL. Defaults to http://localhost:11434.
71+ The /v1 suffix is appended automatically if missing.
6372 rate_limiter: Optional RateLimiter (useful when running multiple
6473 concurrent requests against a resource-constrained machine).
6574 """
@@ -70,8 +79,7 @@ def __init__(
7079 base_url : str = _DEFAULT_BASE_URL ,
7180 rate_limiter : RateLimiter | None = None ,
7281 ) -> None :
73- # Ollama's OpenAI-compatible endpoint accepts any non-empty api_key
74- self ._client = AsyncOpenAI (api_key = "ollama" , base_url = base_url )
82+ self ._client = AsyncOpenAI (api_key = "ollama" , base_url = _normalize_base_url (base_url ))
7583 self ._model = model
7684 self ._rate_limiter = rate_limiter
7785
0 commit comments