diff --git a/README.md b/README.md index db10fe57581..ce1336724f9 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Stop drowning in noise. Connect any LLM (OpenAI, Gemini, Anthropic, Ollama and m **AI change summaries** — instead of staring at a raw diff, your notification reads _"Price dropped from $89.99 to $67.00"_ or _"3 new products added to the listing"_. Works globally or per-watch, with full control over the prompt. -Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with Ollama. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers). +Works with any model you already pay for — GPT-4o-mini and Gemini Flash handle this well at fractions of a cent per check. Or run it entirely locally with **Ollama**, **vLLM**, **LM Studio**, or any **OpenAI-compatible self-hosted endpoint** — pick the *OpenAI-compatible (vLLM, LM Studio, llama.cpp)* option in the provider dropdown and point it at your server's `/v1` URL. Powered by [LiteLLM](https://github.com/BerriAI/litellm), giving you seamless access to [100+ supported providers and models](https://docs.litellm.ai/docs/providers). [AI-powered website change detection — plain language change summaries and smart alert rules](https://changedetection.io?src=github) diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py index 7e4e57507be..74af6b71287 100644 --- a/changedetectionio/blueprint/settings/__init__.py +++ b/changedetectionio/blueprint/settings/__init__.py @@ -36,6 +36,8 @@ def settings_page(): default['llm'] = { 'llm_model': _stored_llm.get('model', ''), 'llm_api_base': _stored_llm.get('api_base', ''), + 'llm_provider_kind': _stored_llm.get('provider_kind', ''), + 'llm_local_token_multiplier': _stored_llm.get('local_token_multiplier', 5), 'llm_change_summary_default': datastore.data['settings']['application'].get('llm_change_summary_default', ''), 'llm_override_diff_with_summary': datastore.data['settings']['application'].get('llm_override_diff_with_summary', True), 'llm_restock_use_fallback_extract': datastore.data['settings']['application'].get('llm_restock_use_fallback_extract', True), @@ -148,6 +150,10 @@ def settings_page(): 'model': (llm_data.get('llm_model') or '').strip(), 'api_key': effective_api_key, 'api_base': (llm_data.get('llm_api_base') or '').strip(), + # Identifies a self-hosted OpenAI-compatible endpoint so reasoning-friendly + # token caps can be applied conditionally (cloud-LLM defaults stay tight). + 'provider_kind': (llm_data.get('llm_provider_kind') or '').strip(), + 'local_token_multiplier': int(llm_data.get('llm_local_token_multiplier') or 5), 'token_budget_month': existing_llm.get('token_budget_month', 0), 'max_input_chars': existing_llm.get('max_input_chars', 0), **preserved_counters, diff --git a/changedetectionio/blueprint/settings/llm.py b/changedetectionio/blueprint/settings/llm.py index 2658633ebf0..690711d011e 100644 --- a/changedetectionio/blueprint/settings/llm.py +++ b/changedetectionio/blueprint/settings/llm.py @@ -30,15 +30,20 @@ def llm_get_models(): api_key = (datastore.data['settings']['application'].get('llm') or {}).get('api_key', '') logger.debug("LLM model list: no api_key in request, using stored key") - _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/'} + _PREFIXES = {'gemini': 'gemini/', 'ollama': 'ollama/', 'openrouter': 'openrouter/', + 'openai_compatible': 'openai/'} + # vLLM / LM Studio / llama.cpp speak OpenAI's wire format — route through litellm's + # 'openai' provider but keep the UI-level name distinct from cloud OpenAI. + _LITELLM_PROVIDER = {'openai_compatible': 'openai'} prefix = _PREFIXES.get(provider, '') + litellm_provider = _LITELLM_PROVIDER.get(provider, provider) try: import litellm - logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} api_base={api_base!r}") + logger.debug(f"LLM model list: calling litellm.get_valid_models provider={provider!r} (litellm={litellm_provider!r}) api_base={api_base!r}") raw = litellm.get_valid_models( check_provider_endpoint=True, - custom_llm_provider=provider, + custom_llm_provider=litellm_provider, api_key=api_key or None, api_base=api_base or None, ) or [] @@ -67,14 +72,18 @@ def llm_test(): try: logger.debug(f"LLM connection test: sending test prompt to model={model!r}") + # Reuse the same multiplier path the production calls use, so cloud providers + # stay on a small base cap (matching upstream's pre-existing behavior) and only + # 'openai_compatible' endpoints opt into the reasoning-friendly headroom. + from changedetectionio.llm.evaluator import apply_local_token_multiplier text, total_tokens, input_tokens, output_tokens = completion( model=model, messages=[{'role': 'user', 'content': - 'Reply with exactly five words confirming you are ready.'}], + 'Respond with just the word: ready'}], api_key=llm_cfg.get('api_key') or None, api_base=api_base or None, - timeout=20, - max_tokens=200, + timeout=30, + max_tokens=apply_local_token_multiplier(200, llm_cfg), ) reply = text.strip() if not reply: diff --git a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html index 636b348d66a..18233e7f6f5 100644 --- a/changedetectionio/blueprint/settings/templates/settings_llm_tab.html +++ b/changedetectionio/blueprint/settings/templates/settings_llm_tab.html @@ -111,6 +111,7 @@

{{ _('AI-powered change monitor + @@ -127,6 +128,18 @@

{{ _('AI-powered change monitor {{ _('Only needed for Ollama or custom/self-hosted endpoints. Leave blank for cloud providers.') }} + {# Hidden field carrying the dropdown selection so the backend knows when to apply + reasoning-friendly token caps (only for self-hosted OpenAI-compatible endpoints). #} + {{ form.llm.form.llm_provider_kind() }} + + +