Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and m

**AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt.

Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with Ollama. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).
Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with **Ollama**, **vLLM**, **LM Studio**, or any **OpenAI-compatible self-hosted endpoint** — pick the *OpenAI-compatible (vLLM, LM Studio, llama.cpp)* option in the provider dropdown and point it at your server's `/v1` URL. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers).

[<img src="./docs/LLM-change-summary.jpeg" style="max-width:100%;" alt="AI-powered website change detection — plain language change summaries and smart alert rules" title="AI website change detection with LLM change summaries and intelligent alert filtering" />](https://changedetection.io?src=github)

Expand Down
6 changes: 6 additions & 0 deletions changedetectionio/blueprint/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def settings_page():
default['llm'] = {
'llm_model': _stored_llm.get('model', ''),
'llm_api_base': _stored_llm.get('api_base', ''),
'llm_provider_kind': _stored_llm.get('provider_kind', ''),
'llm_local_token_multiplier': _stored_llm.get('local_token_multiplier', 5),
'llm_change_summary_default': datastore.data['settings']['application'].get('llm_change_summary_default', ''),
'llm_override_diff_with_summary': datastore.data['settings']['application'].get('llm_override_diff_with_summary', True),
'llm_restock_use_fallback_extract': datastore.data['settings']['application'].get('llm_restock_use_fallback_extract', True),
Expand Down Expand Up @@ -148,6 +150,10 @@ def settings_page():
'model': (llm_data.get('llm_model') or '').strip(),
'api_key': effective_api_key,
'api_base': (llm_data.get('llm_api_base') or '').strip(),
# Identifies a self-hosted OpenAI-compatible endpoint so reasoning-friendly
# token caps can be applied conditionally (cloud-LLM defaults stay tight).
'provider_kind': (llm_data.get('llm_provider_kind') or '').strip(),
'local_token_multiplier': int(llm_data.get('llm_local_token_multiplier') or 5),
'token_budget_month': existing_llm.get('token_budget_month', 0),
'max_input_chars': existing_llm.get('max_input_chars', 0),
**preserved_counters,
Expand Down
20 changes: 14 additions & 6 deletions changedetectionio/blueprint/settings/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,20 @@ def llm_get_models():
api_key = (datastore.data['settings']['application'].get('llm') or {}).get('api_key', '')
logger.debug("LLM model list: no api_key in request, using stored key")

_PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/'}
_PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/',
'openai_compatible': 'openai/'}
# vLLM / LM Studio / llama.cpp speak OpenAI's wire format — route through litellm's
# 'openai' provider but keep the UI-level name distinct from cloud OpenAI.
_LITELLM_PROVIDER = {'openai_compatible': 'openai'}
prefix = _PREFIXES.get(provider, '')
litellm_provider = _LITELLM_PROVIDER.get(provider, provider)

try:
import litellm
logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} api_base={api_base!r}")
logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} (litellm={litellm_provider!r}) api_base={api_base!r}")
raw = litellm.get_valid_models(
check_provider_endpoint=True,
custom_llm_provider=provider,
custom_llm_provider=litellm_provider,
api_key=api_key or None,
api_base=api_base or None,
) or []
Expand Down Expand Up @@ -70,11 +75,14 @@ def llm_test():
text, total_tokens, input_tokens, output_tokens = completion(
model=model,
messages=[{'role': 'user', 'content':
'Reply with exactly five words confirming you are ready.'}],
'Respond with just the word: ready'}],
api_key=llm_cfg.get('api_key') or None,
api_base=api_base or None,
timeout=20,
max_tokens=200,
timeout=30,
# Sized for reasoning models (Qwen3, DeepSeek-R1, o1/o3, Gemini 2.5 thinking)
# which emit chain-of-thought into message.reasoning_content before the answer
# lands in message.content — a small cap truncates mid-thought and yields no answer.
max_tokens=4000,
)
Comment thread
tekgnosis-net marked this conversation as resolved.
reply = text.strip()
if not reply:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ <h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitor
</optgroup>
<optgroup label="{{ _('Local / Self-hosted') }}">
<option value="ollama">Ollama (local)</option>
<option value="openai_compatible">{{ _('OpenAI-compatible (vLLM, LM Studio, llama.cpp)') }}</option>
</optgroup>
<optgroup label="OpenRouter">
<option value="openrouter">OpenRouter (200+ models)</option>
Expand All @@ -127,6 +128,18 @@ <h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitor
<span class="pure-form-message-inline">{{ _('Only needed for Ollama or custom/self-hosted endpoints. Leave blank for cloud providers.') }}</span>
</div>

{# Hidden field carrying the dropdown selection so the backend knows when to apply
reasoning-friendly token caps (only for self-hosted OpenAI-compatible endpoints). #}
{{ form.llm.form.llm_provider_kind() }}

<div class="pure-control-group" id="llm-local-advanced-group" style="display:none">
<label for="{{ form.llm.form.llm_local_token_multiplier.id }}">{{ form.llm.form.llm_local_token_multiplier.label.text }}</label>
{{ form.llm.form.llm_local_token_multiplier() }}
<span class="pure-form-message-inline">
{{ _('Local reasoning models (Qwen3, DeepSeek-R1, Gemma 3, etc.) emit chain-of-thought before the final answer. This multiplier scales every <code>max_tokens</code> cap for this endpoint to leave reasoning room. Defaults to %(default)s; raise it if responses come back truncated, lower it if you want tighter limits. Only applied to self-hosted OpenAI-compatible endpoints — cloud providers (OpenAI, Anthropic, Gemini) keep their original tight caps.', default='5x') | safe }}
</span>
</div>

<div class="pure-control-group" id="llm-fetch-group" style="display:none">
<label></label>
<button type="button" id="llm-fetch-btn" class="pure-button button-xsmall" onclick="llmFetchModels()"
Expand Down Expand Up @@ -377,14 +390,15 @@ <h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitor

<script>
(function () {
const LIVE_PROVIDERS = ['openai', 'anthropic', 'gemini', 'ollama', 'openrouter'];
const LIVE_PROVIDERS = ['openai', 'anthropic', 'gemini', 'ollama', 'openai_compatible', 'openrouter'];
const BASE_DEFAULTS = { ollama: 'http://localhost:11434' };
const KEY_HINTS = {
openai: '{{ _("platform.openai.com → API keys") }}',
anthropic: '{{ _("console.anthropic.com → API keys") }}',
gemini: '{{ _("aistudio.google.com → Get API key") }}',
ollama: '{{ _("No API key needed for local Ollama") }}',
openrouter: '{{ _("openrouter.ai → Keys") }}',
openai: '{{ _("platform.openai.com → API keys") }}',
anthropic: '{{ _("console.anthropic.com → API keys") }}',
gemini: '{{ _("aistudio.google.com → Get API key") }}',
ollama: '{{ _("No API key needed for local Ollama") }}',
openai_compatible: '{{ _("Bearer token for your self-hosted server (vLLM, LM Studio, etc.)") }}',
openrouter: '{{ _("openrouter.ai → Keys") }}',
};

window.llmDisclaimerToggle = function (cb) {
Expand All @@ -393,20 +407,31 @@ <h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitor
};

window.llmOnProviderChange = function (provider) {
const fetchGroup = document.getElementById('llm-fetch-group');
const baseGroup = document.getElementById('llm-base-group');
const modelSelGrp = document.getElementById('llm-model-select-group');
const baseField = document.querySelector('[name="llm-llm_api_base"]');
const hint = document.getElementById('llm-key-hint');
const fetchGroup = document.getElementById('llm-fetch-group');
const baseGroup = document.getElementById('llm-base-group');
const modelSelGrp = document.getElementById('llm-model-select-group');
const localAdvGrp = document.getElementById('llm-local-advanced-group');
const baseField = document.querySelector('[name="llm-llm_api_base"]');
const kindField = document.querySelector('[name="llm-llm_provider_kind"]');
const hint = document.getElementById('llm-key-hint');

fetchGroup.style.display = LIVE_PROVIDERS.includes(provider) ? '' : 'none';

const needsBase = provider === 'ollama';
const needsBase = provider === 'ollama' || provider === 'openai_compatible';
baseGroup.style.display = needsBase ? '' : 'none';
if (BASE_DEFAULTS[provider] !== undefined) {
if (!baseField.value) baseField.value = BASE_DEFAULTS[provider];
}

// Persist the dropdown selection so the backend can branch on provider kind
// (currently only 'openai_compatible' triggers the local-multiplier code path).
if (kindField) kindField.value = provider || '';

// Show the local-endpoint advanced settings (token multiplier) only for the
// OpenAI-compatible self-hosted option. Cloud providers and Ollama get the
// original tight caps and don't see this section at all.
if (localAdvGrp) localAdvGrp.style.display = (provider === 'openai_compatible') ? '' : 'none';

hint.textContent = KEY_HINTS[provider] || '';
modelSelGrp.style.display = 'none';
document.getElementById('llm-fetch-status').textContent = '';
Expand Down Expand Up @@ -516,6 +541,11 @@ <h3><span class="stab-overview-glyph">✦</span> {{ _('AI-powered change monitor
if (m.startsWith('gemini/')) guessed = 'gemini';
else if (m.startsWith('ollama/')) guessed = 'ollama';
else if (m.startsWith('openrouter/')) guessed = 'openrouter';
else if (m.startsWith('openai/')) {
// openai/<model> + custom api_base = self-hosted OpenAI-compatible (vLLM etc.)
const baseField = document.querySelector('[name="llm-llm_api_base"]');
guessed = (baseField && baseField.value.trim()) ? 'openai_compatible' : 'openai';
}
else if (m.startsWith('claude')) guessed = 'anthropic';
else if (m.startsWith('gpt') || m.startsWith('o1') || m.startsWith('o3')) guessed = 'openai';

Expand Down
19 changes: 19 additions & 0 deletions changedetectionio/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Form,
Field,
FloatField,
HiddenField,
IntegerField,
PasswordField,
RadioField,
Expand Down Expand Up @@ -1084,6 +1085,24 @@ class globalSettingsLLMForm(Form):
"style": "width: 24em;",
},
)
# Persisted by the Provider dropdown JS — lets the backend distinguish a self-hosted
# OpenAI-compatible endpoint (vLLM, LM Studio, llama.cpp) from cloud OpenAI, so we can
# apply reasoning-friendly token caps only when the user opted in.
llm_provider_kind = HiddenField(
validators=[validators.Optional()],
default='',
)
# Multiplier applied to LLM max_tokens caps when provider_kind == 'openai_compatible'.
# Reasoning models (Qwen3, DeepSeek-R1, Gemma 3, etc.) emit chain-of-thought into
# message.reasoning_content before the final answer lands in message.content.
# Local self-hosted models cost no per-token money, so giving them headroom is cheap;
# cloud providers stay on the original tight caps so existing users see no cost change.
llm_local_token_multiplier = IntegerField(
_l('Token multiplier for local reasoning models'),
validators=[validators.Optional(), validators.NumberRange(min=1, max=20)],
default=5,
render_kw={"placeholder": "5", "style": "width: 6em;"},
)
llm_change_summary_default = TextAreaField(
_l('Default AI Change Summary prompt'),
validators=[validators.Optional(), validators.Length(max=2000)],
Expand Down
43 changes: 40 additions & 3 deletions changedetectionio/llm/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ def _cached_system(text: str, model: str = '') -> dict:

LLM_DEFAULT_MAX_SUMMARY_TOKENS = 3000

# Output-token cap for the JSON-returning calls (intent eval, preview, setup/prefilter).
# Mirrors client.py's _MAX_COMPLETION_TOKENS so the multiplier helper has a base value
# to scale; cloud-LLM users hit this default unmodified, preserving prior cost defaults.
JSON_RESPONSE_MAX_TOKENS = 400

# Default prompt used when the user hasn't configured llm_change_summary
DEFAULT_CHANGE_SUMMARY_PROMPT = "Describe in plain English what changed — list what was added or removed as bullet points, including key details for each item. Be careful of content that merely just moved around, you should mention that it moved but dont report that it was added/removed etc. Be considerate of the style content you are summarising the change of, adjust your report accordingly. Do not quote non-English text verbatim; translate and summarise all content into English. Your entire response must be in English."

Expand All @@ -90,6 +95,32 @@ def _summary_max_tokens(diff: str, max_cap: int = LLM_DEFAULT_MAX_SUMMARY_TOKENS
return max(400, min(len(diff) // 4, max_cap))


def apply_local_token_multiplier(base_max_tokens: int, llm_cfg: dict) -> int:
"""
Scale max_tokens for self-hosted OpenAI-compatible endpoints (vLLM, LM Studio, llama.cpp).

Reasoning models (Qwen3, DeepSeek-R1, Gemma 3, etc.) emit chain-of-thought into
`message.reasoning_content` BEFORE the final answer lands in `message.content`.
Without enough headroom the request truncates mid-thought (`finish_reason='length'`)
and the answer never lands — callers see an empty string and silently fall through
to safe defaults, hiding the problem.

Local self-hosted models cost no per-token money, so headroom is cheap; cloud
providers (OpenAI, Anthropic, Gemini, OpenRouter) keep their original tight caps
so existing users see no cost change.

Activated only when `llm_cfg['provider_kind'] == 'openai_compatible'`.
Multiplier defaults to 5x and is user-configurable in Settings → AI → Provider.
"""
if (llm_cfg or {}).get('provider_kind') != 'openai_compatible':
return base_max_tokens
try:
multiplier = int(llm_cfg.get('local_token_multiplier') or 5)
except (TypeError, ValueError):
multiplier = 5
return base_max_tokens * max(1, multiplier)


Comment thread
tekgnosis-net marked this conversation as resolved.
Outdated
# ---------------------------------------------------------------------------
# Intent resolution
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -338,6 +369,7 @@ def run_setup(watch, datastore, snapshot_text: str) -> None:
],
api_key=cfg.get('api_key'),
api_base=cfg.get('api_base'),
max_tokens=apply_local_token_multiplier(JSON_RESPONSE_MAX_TOKENS, cfg),
extra_body=_thinking_extra_body(cfg['model'], int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)),
)
_check_token_budget(watch, cfg, tokens)
Expand Down Expand Up @@ -431,9 +463,12 @@ def summarise_change(watch, datastore, diff: str, current_snapshot: str = '') ->
],
api_key=cfg.get('api_key'),
api_base=cfg.get('api_base'),
max_tokens=_summary_max_tokens(
diff,
max_cap=int(datastore.data['settings']['application'].get('llm_max_summary_tokens', LLM_DEFAULT_MAX_SUMMARY_TOKENS) or LLM_DEFAULT_MAX_SUMMARY_TOKENS),
max_tokens=apply_local_token_multiplier(
_summary_max_tokens(
diff,
max_cap=int(datastore.data['settings']['application'].get('llm_max_summary_tokens', LLM_DEFAULT_MAX_SUMMARY_TOKENS) or LLM_DEFAULT_MAX_SUMMARY_TOKENS),
),
cfg,
),
extra_body=_extra_body,
)
Expand Down Expand Up @@ -496,6 +531,7 @@ def preview_extract(watch, datastore, content: str) -> dict | None:
],
api_key=cfg.get('api_key'),
api_base=cfg.get('api_base'),
max_tokens=apply_local_token_multiplier(JSON_RESPONSE_MAX_TOKENS, cfg),
extra_body=_thinking_extra_body(cfg['model'], int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)),
)
accumulate_global_tokens(datastore, tokens, model=cfg['model'])
Expand Down Expand Up @@ -579,6 +615,7 @@ def evaluate_change(watch, datastore, diff: str, current_snapshot: str = '') ->
],
api_key=cfg.get('api_key'),
api_base=cfg.get('api_base'),
max_tokens=apply_local_token_multiplier(JSON_RESPONSE_MAX_TOKENS, cfg),
extra_body=_thinking_extra_body(cfg['model'], int(datastore.data['settings']['application'].get('llm_thinking_budget', LLM_DEFAULT_THINKING_BUDGET) or 0)),
)
raw, tokens = _resp[0], _resp[1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import re
from loguru import logger
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.llm.evaluator import apply_local_token_multiplier

# Injected at startup by inject_datastore_into_plugins()
datastore = None
Expand Down Expand Up @@ -234,7 +235,10 @@ def get_itemprop_availability_override(content, fetcher_name, fetcher_instance,
],
api_key=llm_cfg.get('api_key'),
api_base=llm_cfg.get('api_base'),
max_tokens=80,
# 80 fits a {price, currency, availability} JSON answer comfortably for cloud
# models. Local reasoning models burn most of that on chain-of-thought before
# the JSON lands — the multiplier scales it up only when provider_kind says so.
max_tokens=apply_local_token_multiplier(80, llm_cfg),
)

accumulate_global_tokens(
Expand Down
Loading
Loading