diff --git a/README.md b/README.md
index db10fe57581..ce1336724f9 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and m
**AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt.
-Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with Ollama. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
+Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with **Ollama**, **vLLM**, **LM Studio**, or any **OpenAI-compatible self-hosted endpoint** — pick the *OpenAI-compatible (vLLM, LM Studio, llama.cpp)* option in the provider dropdown and point it at your server's `/v1` URL. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
[
](https://changedetection.io?src=github)
diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py
index 7e4e57507be..74af6b71287 100644
--- a/changedetectionio/blueprint/settings/__init__.py
+++ b/changedetectionio/blueprint/settings/__init__.py
@@ -36,6 +36,8 @@ def settings_page():
default['llm'] = {
'llm_model': _stored_llm.get('model', ''),
'llm_api_base': _stored_llm.get('api_base', ''),
+ 'llm_provider_kind': _stored_llm.get('provider_kind', ''),
+ 'llm_local_token_multiplier': _stored_llm.get('local_token_multiplier', 5),
'llm_change_summary_default': datastore.data['settings']['application'].get('llm_change_summary_default', ''),
'llm_override_diff_with_summary': datastore.data['settings']['application'].get('llm_override_diff_with_summary', True),
'llm_restock_use_fallback_extract': datastore.data['settings']['application'].get('llm_restock_use_fallback_extract', True),
@@ -148,6 +150,10 @@ def settings_page():
'model': (llm_data.get('llm_model') or '').strip(),
'api_key': effective_api_key,
'api_base': (llm_data.get('llm_api_base') or '').strip(),
+ # Identifies a self-hosted OpenAI-compatible endpoint so reasoning-friendly
+ # token caps can be applied conditionally (cloud-LLM defaults stay tight).
+ 'provider_kind': (llm_data.get('llm_provider_kind') or '').strip(),
+ 'local_token_multiplier': int(llm_data.get('llm_local_token_multiplier') or 5),
'token_budget_month': existing_llm.get('token_budget_month', 0),
'max_input_chars': existing_llm.get('max_input_chars', 0),
**preserved_counters,
diff --git a/changedetectionio/blueprint/settings/llm.py b/changedetectionio/blueprint/settings/llm.py
index 2658633ebf0..690711d011e 100644
--- a/changedetectionio/blueprint/settings/llm.py
+++ b/changedetectionio/blueprint/settings/llm.py
@@ -30,15 +30,20 @@ def llm_get_models():
api_key = (datastore.data['settings']['application'].get('llm') or {}).get('api_key', '')
logger.debug("LLM model list: no api_key in request, using stored key")
- _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/'}
+ _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/',
+ 'openai_compatible': 'openai/'}
+ # vLLM / LM Studio / llama.cpp speak OpenAI's wire format — route through litellm's
+ # 'openai' provider but keep the UI-level name distinct from cloud OpenAI.
+ _LITELLM_PROVIDER = {'openai_compatible': 'openai'}
prefix = _PREFIXES.get(provider, '')
+ litellm_provider = _LITELLM_PROVIDER.get(provider, provider)
try:
import litellm
- logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} api_base={api_base!r}")
+ logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} (litellm={litellm_provider!r}) api_base={api_base!r}")
raw = litellm.get_valid_models(
check_provider_endpoint=True,
- custom_llm_provider=provider,
+ custom_llm_provider=litellm_provider,
api_key=api_key or None,
api_base=api_base or None,
) or []
@@ -67,14 +72,18 @@ def llm_test():
try:
logger.debug(f"LLM connection test: sending test prompt to model={model!r}")
+ # Reuse the same multiplier path the production calls use, so cloud providers
+ # stay on a small base cap (matching upstream's pre-existing behavior) and only
+ # 'openai_compatible' endpoints opt into the reasoning-friendly headroom.
+ from changedetectionio.llm.evaluator import apply_local_token_multiplier
text, total_tokens, input_tokens, output_tokens = completion(
model=model,
messages=[{'role': 'user', 'content':
- 'Reply with exactly five words confirming you are ready.'}],
+ 'Respond with just the word: ready'}],
api_key=llm_cfg.get('api_key') or None,
api_base=api_base or None,
- timeout=20,
- max_tokens=200,
+ timeout=30,
+ max_tokens=apply_local_token_multiplier(200, llm_cfg),
)
reply = text.strip()
if not reply:
diff --git a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html
index 636b348d66a..18233e7f6f5 100644
--- a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+++ b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html
@@ -111,6 +111,7 @@