diff --git a/crates/goose/src/providers/declarative/auxen.json b/crates/goose/src/providers/declarative/auxen.json new file mode 100644 index 000000000000..5354d6d3c302 --- /dev/null +++ b/crates/goose/src/providers/declarative/auxen.json @@ -0,0 +1,39 @@ +{ + "name": "auxen", + "engine": "openai", + "display_name": "Auxen", + "description": "Per-customer dedicated LLM endpoints (Llama, Qwen, Mistral, Gemma, Mixtral, Phi, Command R) with OpenAI-compatible API", + "api_key_env": "AUXEN_API_KEY", + "base_url": "${AUXEN_API_BASE}/chat/completions", + "env_vars": [ + { + "name": "AUXEN_API_BASE", + "required": true, + "secret": false, + "primary": true, + "description": "Auxen instance base URL issued by the Auxen dashboard, e.g. https://api.auxen.ai/v1/inst_xxx/v1" + }, + { + "name": "AUXEN_API_KEY", + "required": true, + "secret": true, + "description": "Auxen instance API key (auxk_*) issued by the Auxen dashboard" + } + ], + "models": [ + { "name": "llama-3.1-8b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "llama-3.1-70b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "llama-3.2-3b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "qwen2.5-7b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "qwen2.5-14b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "qwen2.5-32b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "mistral-7b", "context_limit": 32768, "max_tokens": 8192 }, + { "name": "mistral-nemo-12b", "context_limit": 131072, "max_tokens": 8192 }, + { "name": "mixtral-8x7b", "context_limit": 32768, "max_tokens": 8192 }, + { "name": "gemma2-9b", "context_limit": 8192, "max_tokens": 8192 }, + { "name": "phi-3-mini", "context_limit": 131072, "max_tokens": 4096 }, + { "name": "command-r-7b", "context_limit": 131072, "max_tokens": 4096 } + ], + "supports_streaming": true, + "requires_auth": true +} diff --git a/documentation/docs/getting-started/providers.md b/documentation/docs/getting-started/providers.md index 90367355c743..80220a035f2d 100644 --- a/documentation/docs/getting-started/providers.md +++ b/documentation/docs/getting-started/providers.md @@ -26,6 +26,7 @@ goose is compatible with a wide range of LLM providers, allowing you to choose a | [Amazon SageMaker TGI](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints.html) | Run Text Generation Inference models through Amazon SageMaker endpoints. **AWS credentials must be configured in advance.** | `SAGEMAKER_ENDPOINT_NAME`, `AWS_REGION` (optional), `AWS_PROFILE` (optional) | | [Anthropic](https://www.anthropic.com/) | Offers Claude, an advanced AI model for natural language tasks. | `ANTHROPIC_API_KEY`, `ANTHROPIC_HOST` (optional) | | [Atomic Chat](https://github.com/AtomicBot-ai/Atomic-Chat) | Run local models with Atomic Chat's OpenAI-compatible server. **Because this provider runs locally, you must first [download a model](#local-llms).** | None required. Connects to local server at `localhost:1337` by default. | +| [Auxen](https://auxen.ai/) | Per-customer **dedicated** LLM endpoints (Llama, Qwen, Mistral, Gemma, Mixtral, Phi, Command R) with OpenAI-compatible API. Each instance is a dedicated GPU billed per-minute of runtime, not per-token. | `AUXEN_API_BASE`, `AUXEN_API_KEY` | | [Avian](https://avian.io/) | Cost-effective inference API with DeepSeek, Kimi, GLM, and MiniMax models. OpenAI-compatible with streaming and function calling support. | `AVIAN_API_KEY`, `AVIAN_HOST` (optional) | | [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5. Supports both API key and Azure credential chain authentication. | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME`, `AZURE_OPENAI_API_KEY` (optional) | | [ChatGPT Codex](https://chatgpt.com/codex) | Access GPT-5 Codex models optimized for code generation and understanding. **Requires a ChatGPT Plus/Pro subscription.** | No manual key. Uses browser-based OAuth authentication for both CLI and Desktop. |