From 4a5af6da9930230f1ac3b76d87f8ed949901f833 Mon Sep 17 00:00:00 2001
From: tekgnosis-net <6506223+tekgnosis-net@users.noreply.github.com>
Date: Sat, 2 May 2026 21:41:34 +1000
Subject: [PATCH 1/2] LLM - Add OpenAI-compatible provider (vLLM, LM Studio,
llama.cpp) with token multiplier for reasoning models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adds a new "OpenAI-compatible (vLLM, LM Studio, llama.cpp)" option in the
Settings → AI provider dropdown for self-hosted endpoints that speak
OpenAI's wire format. The form schema and litellm.completion() plumbing
already supported custom api_base + api_key — the wiring is purely UI
plus a small mapping in the model-list endpoint.
Reasoning-model token multiplier (opt-in, scoped to the new option):
Models like Qwen3 / DeepSeek-R1 / Gemma 3 emit chain-of-thought into
message.reasoning_content before the answer lands in message.content.
The original tight max_tokens caps truncate mid-thought
(finish_reason='length') and the answer never lands. A new IntegerField
llm_local_token_multiplier (default 5x, range 1-20) appears only when the
new provider is selected; the helper apply_local_token_multiplier() wraps
every completion() call site (setup, summary, preview, intent eval,
restock fallback) and is a no-op for any other provider kind. Cloud users
(OpenAI/Anthropic/Gemini/OpenRouter/Ollama) see no behavioral or cost
change — original caps are preserved unchanged. Local self-hosted models
cost no per-token money, so headroom is cheap.
UI / form
- New option under the existing Local / Self-hosted optgroup
- Hidden field llm_provider_kind (set by dropdown JS) +
llm_local_token_multiplier IntegerField (rendered only when
openai_compatible)
- LIVE_PROVIDERS, KEY_HINTS, api_base visibility, and detectCurrentProvider
updated to recognize the new option
Backend
- llm_get_models maps openai_compatible -> openai for
litellm.get_valid_models so vLLM's /v1/models is hit with the right
provider semantics; results get an openai/ prefix so saved values route
correctly through litellm.completion() later
- Test-connection: simpler prompt, max_tokens 200 -> 4000, timeout 20 -> 30
to give reasoning models room
- Form persistence stores provider_kind + local_token_multiplier in
datastore['settings']['application']['llm'] with round-trip
pre-population
i18n: 3 new English msgids extracted to messages.pot and propagated to
all 14 .po catalogs via setup.py update_catalog.
README: mention vLLM / LM Studio / OpenAI-compatible alongside Ollama.
---
README.md | 2 +-
.../blueprint/settings/__init__.py | 6 +++
changedetectionio/blueprint/settings/llm.py | 20 ++++---
.../settings/templates/settings_llm_tab.html | 54 ++++++++++++++-----
changedetectionio/forms.py | 19 +++++++
changedetectionio/llm/evaluator.py | 43 +++++++++++++--
.../restock_diff/plugins/llm_restock.py | 6 ++-
.../translations/cs/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/de/LC_MESSAGES/messages.po | 21 ++++++++
.../en_GB/LC_MESSAGES/messages.po | 21 ++++++++
.../en_US/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/es/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/fr/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/it/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/ja/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/ko/LC_MESSAGES/messages.po | 21 ++++++++
changedetectionio/translations/messages.pot | 23 +++++++-
.../pt_BR/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/tr/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/uk/LC_MESSAGES/messages.po | 21 ++++++++
.../translations/zh/LC_MESSAGES/messages.po | 21 ++++++++
.../zh_Hant_TW/LC_MESSAGES/messages.po | 21 ++++++++
22 files changed, 443 insertions(+), 24 deletions(-)
diff --git a/README.md b/README.md
index db10fe57581..ce1336724f9 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and m
**AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt.
-Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with Ollama. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
+Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with **Ollama**, **vLLM**, **LM Studio**, or any **OpenAI-compatible self-hosted endpoint** — pick the *OpenAI-compatible (vLLM, LM Studio, llama.cpp)* option in the provider dropdown and point it at your server's `/v1` URL. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
[
](https://changedetection.io?src=github)
diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py
index 7e4e57507be..74af6b71287 100644
--- a/changedetectionio/blueprint/settings/__init__.py
+++ b/changedetectionio/blueprint/settings/__init__.py
@@ -36,6 +36,8 @@ def settings_page():
default['llm'] = {
'llm_model': _stored_llm.get('model', ''),
'llm_api_base': _stored_llm.get('api_base', ''),
+ 'llm_provider_kind': _stored_llm.get('provider_kind', ''),
+ 'llm_local_token_multiplier': _stored_llm.get('local_token_multiplier', 5),
'llm_change_summary_default': datastore.data['settings']['application'].get('llm_change_summary_default', ''),
'llm_override_diff_with_summary': datastore.data['settings']['application'].get('llm_override_diff_with_summary', True),
'llm_restock_use_fallback_extract': datastore.data['settings']['application'].get('llm_restock_use_fallback_extract', True),
@@ -148,6 +150,10 @@ def settings_page():
'model': (llm_data.get('llm_model') or '').strip(),
'api_key': effective_api_key,
'api_base': (llm_data.get('llm_api_base') or '').strip(),
+ # Identifies a self-hosted OpenAI-compatible endpoint so reasoning-friendly
+ # token caps can be applied conditionally (cloud-LLM defaults stay tight).
+ 'provider_kind': (llm_data.get('llm_provider_kind') or '').strip(),
+ 'local_token_multiplier': int(llm_data.get('llm_local_token_multiplier') or 5),
'token_budget_month': existing_llm.get('token_budget_month', 0),
'max_input_chars': existing_llm.get('max_input_chars', 0),
**preserved_counters,
diff --git a/changedetectionio/blueprint/settings/llm.py b/changedetectionio/blueprint/settings/llm.py
index 2658633ebf0..8be993c9944 100644
--- a/changedetectionio/blueprint/settings/llm.py
+++ b/changedetectionio/blueprint/settings/llm.py
@@ -30,15 +30,20 @@ def llm_get_models():
api_key = (datastore.data['settings']['application'].get('llm') or {}).get('api_key', '')
logger.debug("LLM model list: no api_key in request, using stored key")
- _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/'}
+ _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/',
+ 'openai_compatible': 'openai/'}
+ # vLLM / LM Studio / llama.cpp speak OpenAI's wire format — route through litellm's
+ # 'openai' provider but keep the UI-level name distinct from cloud OpenAI.
+ _LITELLM_PROVIDER = {'openai_compatible': 'openai'}
prefix = _PREFIXES.get(provider, '')
+ litellm_provider = _LITELLM_PROVIDER.get(provider, provider)
try:
import litellm
- logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} api_base={api_base!r}")
+ logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} (litellm={litellm_provider!r}) api_base={api_base!r}")
raw = litellm.get_valid_models(
check_provider_endpoint=True,
- custom_llm_provider=provider,
+ custom_llm_provider=litellm_provider,
api_key=api_key or None,
api_base=api_base or None,
) or []
@@ -70,11 +75,14 @@ def llm_test():
text, total_tokens, input_tokens, output_tokens = completion(
model=model,
messages=[{'role': 'user', 'content':
- 'Reply with exactly five words confirming you are ready.'}],
+ 'Respond with just the word: ready'}],
api_key=llm_cfg.get('api_key') or None,
api_base=api_base or None,
- timeout=20,
- max_tokens=200,
+ timeout=30,
+ # Sized for reasoning models (Qwen3, DeepSeek-R1, o1/o3, Gemini 2.5 thinking)
+ # which emit chain-of-thought into message.reasoning_content before the answer
+ # lands in message.content — a small cap truncates mid-thought and yields no answer.
+ max_tokens=4000,
)
reply = text.strip()
if not reply:
diff --git a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html
index 636b348d66a..18233e7f6f5 100644
--- a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html
+++ b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html
@@ -111,6 +111,7 @@