From 4a5af6da9930230f1ac3b76d87f8ed949901f833 Mon Sep 17 00:00:00 2001 From: tekgnosis-net <6506223+tekgnosis-net@users.noreply.github.com> Date: Sat, 2 May 2026 21:41:34 +1000 Subject: [PATCH 1/2] LLM - Add OpenAI-compatible provider (vLLM, LM Studio, llama.cpp) with token multiplier for reasoning models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new "OpenAI-compatible (vLLM, LM Studio, llama.cpp)" option in the Settings → AI provider dropdown for self-hosted endpoints that speak OpenAI's wire format. The form schema and litellm.completion() plumbing already supported custom api_base + api_key — the wiring is purely UI plus a small mapping in the model-list endpoint. Reasoning-model token multiplier (opt-in, scoped to the new option): Models like Qwen3 / DeepSeek-R1 / Gemma 3 emit chain-of-thought into message.reasoning_content before the answer lands in message.content. The original tight max_tokens caps truncate mid-thought (finish_reason='length') and the answer never lands. A new IntegerField llm_local_token_multiplier (default 5x, range 1-20) appears only when the new provider is selected; the helper apply_local_token_multiplier() wraps every completion() call site (setup, summary, preview, intent eval, restock fallback) and is a no-op for any other provider kind. Cloud users (OpenAI/Anthropic/Gemini/OpenRouter/Ollama) see no behavioral or cost change — original caps are preserved unchanged. Local self-hosted models cost no per-token money, so headroom is cheap. UI / form - New option under the existing Local / Self-hosted optgroup - Hidden field llm_provider_kind (set by dropdown JS) + llm_local_token_multiplier IntegerField (rendered only when openai_compatible) - LIVE_PROVIDERS, KEY_HINTS, api_base visibility, and detectCurrentProvider updated to recognize the new option Backend - llm_get_models maps openai_compatible -> openai for litellm.get_valid_models so vLLM's /v1/models is hit with the right provider semantics; results get an openai/ prefix so saved values route correctly through litellm.completion() later - Test-connection: simpler prompt, max_tokens 200 -> 4000, timeout 20 -> 30 to give reasoning models room - Form persistence stores provider_kind + local_token_multiplier in datastore['settings']['application']['llm'] with round-trip pre-population i18n: 3 new English msgids extracted to messages.pot and propagated to all 14 .po catalogs via setup.py update_catalog. README: mention vLLM / LM Studio / OpenAI-compatible alongside Ollama. --- README.md | 2 +- .../blueprint/settings/__init__.py | 6 +++ changedetectionio/blueprint/settings/llm.py | 20 ++++--- .../settings/templates/settings_llm_tab.html | 54 ++++++++++++++----- changedetectionio/forms.py | 19 +++++++ changedetectionio/llm/evaluator.py | 43 +++++++++++++-- .../restock_diff/plugins/llm_restock.py | 6 ++- .../translations/cs/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/de/LC_MESSAGES/messages.po | 21 ++++++++ .../en_GB/LC_MESSAGES/messages.po | 21 ++++++++ .../en_US/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/es/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/fr/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/it/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/ja/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/ko/LC_MESSAGES/messages.po | 21 ++++++++ changedetectionio/translations/messages.pot | 23 +++++++- .../pt_BR/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/tr/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/uk/LC_MESSAGES/messages.po | 21 ++++++++ .../translations/zh/LC_MESSAGES/messages.po | 21 ++++++++ .../zh_Hant_TW/LC_MESSAGES/messages.po | 21 ++++++++ 22 files changed, 443 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index db10fe57581..ce1336724f9 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and m **AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt. -Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with Ollama. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers). +Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with **Ollama**, **vLLM**, **LM Studio**, or any **OpenAI-compatible self-hosted endpoint** — pick the *OpenAI-compatible (vLLM, LM Studio, llama.cpp)* option in the provider dropdown and point it at your server's `/v1` URL. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers). [AI-powered website change detection — plain language change summaries and smart alert rules](https://changedetection.io?src=github) diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py index 7e4e57507be..74af6b71287 100644 --- a/changedetectionio/blueprint/settings/__init__.py +++ b/changedetectionio/blueprint/settings/__init__.py @@ -36,6 +36,8 @@ def settings_page(): default['llm'] = { 'llm_model': _stored_llm.get('model', ''), 'llm_api_base': _stored_llm.get('api_base', ''), + 'llm_provider_kind': _stored_llm.get('provider_kind', ''), + 'llm_local_token_multiplier': _stored_llm.get('local_token_multiplier', 5), 'llm_change_summary_default': datastore.data['settings']['application'].get('llm_change_summary_default', ''), 'llm_override_diff_with_summary': datastore.data['settings']['application'].get('llm_override_diff_with_summary', True), 'llm_restock_use_fallback_extract': datastore.data['settings']['application'].get('llm_restock_use_fallback_extract', True), @@ -148,6 +150,10 @@ def settings_page(): 'model': (llm_data.get('llm_model') or '').strip(), 'api_key': effective_api_key, 'api_base': (llm_data.get('llm_api_base') or '').strip(), + # Identifies a self-hosted OpenAI-compatible endpoint so reasoning-friendly + # token caps can be applied conditionally (cloud-LLM defaults stay tight). + 'provider_kind': (llm_data.get('llm_provider_kind') or '').strip(), + 'local_token_multiplier': int(llm_data.get('llm_local_token_multiplier') or 5), 'token_budget_month': existing_llm.get('token_budget_month', 0), 'max_input_chars': existing_llm.get('max_input_chars', 0), **preserved_counters, diff --git a/changedetectionio/blueprint/settings/llm.py b/changedetectionio/blueprint/settings/llm.py index 2658633ebf0..8be993c9944 100644 --- a/changedetectionio/blueprint/settings/llm.py +++ b/changedetectionio/blueprint/settings/llm.py @@ -30,15 +30,20 @@ def llm_get_models(): api_key = (datastore.data['settings']['application'].get('llm') or {}).get('api_key', '') logger.debug("LLM model list: no api_key in request, using stored key") - _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/'} + _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/', + 'openai_compatible': 'openai/'} + # vLLM / LM Studio / llama.cpp speak OpenAI's wire format — route through litellm's + # 'openai' provider but keep the UI-level name distinct from cloud OpenAI. + _LITELLM_PROVIDER = {'openai_compatible': 'openai'} prefix = _PREFIXES.get(provider, '') + litellm_provider = _LITELLM_PROVIDER.get(provider, provider) try: import litellm - logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} api_base={api_base!r}") + logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} (litellm={litellm_provider!r}) api_base={api_base!r}") raw = litellm.get_valid_models( check_provider_endpoint=True, - custom_llm_provider=provider, + custom_llm_provider=litellm_provider, api_key=api_key or None, api_base=api_base or None, ) or [] @@ -70,11 +75,14 @@ def llm_test(): text, total_tokens, input_tokens, output_tokens = completion( model=model, messages=[{'role': 'user', 'content': - 'Reply with exactly five words confirming you are ready.'}], + 'Respond with just the word: ready'}], api_key=llm_cfg.get('api_key') or None, api_base=api_base or None, - timeout=20, - max_tokens=200, + timeout=30, + # Sized for reasoning models (Qwen3, DeepSeek-R1, o1/o3, Gemini 2.5 thinking) + # which emit chain-of-thought into message.reasoning_content before the answer + # lands in message.content — a small cap truncates mid-thought and yields no answer. + max_tokens=4000, ) reply = text.strip() if not reply: diff --git a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html index 636b348d66a..18233e7f6f5 100644 --- a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html +++ b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html @@ -111,6 +111,7 @@

{{ _('AI-powered change monitor + @@ -127,6 +128,18 @@

{{ _('AI-powered change monitor {{ _('Only needed for Ollama or custom/self-hosted endpoints. Leave blank for cloud providers.') }} + {# Hidden field carrying the dropdown selection so the backend knows when to apply + reasoning-friendly token caps (only for self-hosted OpenAI-compatible endpoints). #} + {{ form.llm.form.llm_provider_kind() }} + + +