diff --git a/llms/openai/internal/openaiclient/chat.go b/llms/openai/internal/openaiclient/chat.go index 991c236ab..3be7028ae 100644 --- a/llms/openai/internal/openaiclient/chat.go +++ b/llms/openai/internal/openaiclient/chat.go @@ -103,13 +103,15 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) { Alias: (*Alias)(&r), } - // Handle temperature for reasoning models - if isReasoningModel(r.Model) { - // Reasoning models (GPT-5, o1, o3) only accept temperature=1 (default) - // Omit temperature field to let API use its default value + // Handle temperature for reasoning models. + // When reasoning is enabled (reasoning_effort !== "none"), this API only + // accepts the default temperature behavior, so omit the temperature field. + // When reasoning_effort === "none", temperature may be provided explicitly, + // including temperature: 0. + // https://developers.openai.com/api/docs/guides/latest-model#gpt-54-parameter-compatibility + if isReasoningModel(r.Model) && r.ReasoningEffort != "none" { aux.Temperature = nil } else { - // For regular models, always send temperature aux.Temperature = &r.Temperature } @@ -131,17 +133,17 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) { } // isReasoningModel returns true if the model is a reasoning model that has temperature constraints. -// Reasoning models (GPT-5, o1, o3) only accept temperature=1 and reject other values. +// Reasoning models only accept temperature=1 and reject other values unless reasoning is disabled. func isReasoningModel(model string) bool { - // o1 series: o1-preview, o1-mini - if strings.HasPrefix(model, "o1-") { + // o1 series: o1, o1-mini, o1-preview, … + if model == "o1" || strings.HasPrefix(model, "o1-") { return true } - // o3 series: o3, o3-mini (note: "o3" without suffix is also valid) + // o3 series: o3, o3-mini, … if model == "o3" || strings.HasPrefix(model, "o3-") { return true } - // GPT-5 series (when released) + // GPT-5 series if strings.HasPrefix(model, "gpt-5") { return true } diff --git a/llms/openai/openaillm.go b/llms/openai/openaillm.go index 84690072a..1777b37d9 100644 --- a/llms/openai/openaillm.go +++ b/llms/openai/openaillm.go @@ -27,16 +27,20 @@ const ( RoleTool = "tool" ) -// ModelCapability defines what a model supports +// ModelCapability defines what a model supports. type ModelCapability struct { - Pattern string // Regex pattern to match model names - SupportsSystem bool // If true, supports system messages - SupportsThinking bool // If true, supports reasoning/thinking - SupportsCaching bool // If true, supports prompt caching - // Add more capabilities as needed + Pattern string // Regex pattern to match model names + SupportsSystem bool // If true, supports system messages + SupportsThinking bool // If true, is a reasoning/thinking model + SupportsCaching bool // If true, supports prompt caching + SupportedReasoningEfforts []string // Valid reasoning_effort values for this model variant; Support and allowed values differ across model families (e.g. gpt-5.4+ accepts none/low/medium/high/xhigh, earlier o-series do not accept the parameter at all) } -// modelCapabilities defines capabilities for different model patterns +// reasoningEffortsGPT54Plus is the set of valid reasoning_effort values for gpt-5.4 and later. +var reasoningEffortsGPT54Plus = []string{"none", "low", "medium", "high", "xhigh"} + +// modelCapabilities defines capabilities for different model patterns. +// Patterns are evaluated in order; more specific patterns must appear first. var modelCapabilities = []ModelCapability{ // OpenAI reasoning models (o1, o3 series) - no system message support { @@ -45,6 +49,14 @@ var modelCapabilities = []ModelCapability{ SupportsThinking: true, SupportsCaching: false, }, + // gpt-5.4 and later: supports reasoning_effort + { + Pattern: `(?i)^gpt-5\.[4-9]`, + SupportsSystem: true, + SupportsThinking: true, + SupportsCaching: false, + SupportedReasoningEfforts: reasoningEffortsGPT54Plus, + }, // GPT-4 models { Pattern: `(?i)^gpt-4`, // Matches gpt-4, gpt-4-turbo, etc. @@ -59,7 +71,16 @@ var modelCapabilities = []ModelCapability{ SupportsThinking: false, SupportsCaching: false, }, - // Future models can be added here +} + +// supportsReasoningEffort reports whether the given effort value is valid for the model. +func supportsReasoningEffort(caps ModelCapability, effort string) bool { + for _, e := range caps.SupportedReasoningEfforts { + if e == effort { + return true + } + } + return false } // getModelCapabilities returns the capabilities for a given model @@ -210,40 +231,41 @@ func (o *LLM) GenerateContent(ctx context.Context, messages []llms.MessageConten } } - // Extract reasoning effort for thinking models - // Note: OpenAI o1/o3 models have built-in reasoning and don't support reasoning_effort parameter - // This is kept for future models that might support it (like GPT-5) + // Set reasoning_effort for GPT-5 models. The valid set varies by sub-model: + // gpt-5.1: none, low, medium, high + // gpt-5.4: none, low, medium, high, xhigh + // Unsupported values are silently omitted rather than sent to the API. var reasoningEffort string - // Commented out for now since current o1 models don't support this parameter - /* - if opts.Metadata != nil { - if config, ok := opts.Metadata["thinking_config"].(*llms.ThinkingConfig); ok { - // Map thinking mode to reasoning effort - switch config.Mode { - case llms.ThinkingModeLow: - reasoningEffort = "low" - case llms.ThinkingModeMedium: - reasoningEffort = "medium" - case llms.ThinkingModeHigh: - reasoningEffort = "high" - } + if len(modelCaps.SupportedReasoningEfforts) > 0 { + if config := llms.GetThinkingConfig(&opts); config != nil { + var candidate string + switch config.Mode { + case llms.ThinkingModeNone: + candidate = "none" + case llms.ThinkingModeLow: + candidate = "low" + case llms.ThinkingModeMedium: + candidate = "medium" + case llms.ThinkingModeHigh: + candidate = "high" + case llms.ThinkingModeXHigh: + candidate = "xhigh" + } + if supportsReasoningEffort(modelCaps, candidate) { + reasoningEffort = candidate + } - // Handle streaming for thinking - if config.StreamThinking && opts.StreamingReasoningFunc == nil && opts.StreamingFunc != nil { - // Set up default reasoning streaming if requested but not provided - // Wrap the single-param streaming func into a reasoning func - opts.StreamingReasoningFunc = func(ctx context.Context, reasoningChunk []byte, chunk []byte) error { - // For default behavior, we might want to stream both or just the main content - // Here we'll just stream the main content chunk - if len(chunk) > 0 { - return opts.StreamingFunc(ctx, chunk) - } - return nil + if config.StreamThinking && opts.StreamingReasoningFunc == nil && opts.StreamingFunc != nil { + streamFn := opts.StreamingFunc + opts.StreamingReasoningFunc = func(ctx context.Context, _ []byte, chunk []byte) error { + if len(chunk) > 0 { + return streamFn(ctx, chunk) } + return nil } } } - */ + } // Filter out internal metadata that shouldn't be sent to API apiMetadata := make(map[string]any) diff --git a/llms/reasoning.go b/llms/reasoning.go index 3be416c78..40e16c1eb 100644 --- a/llms/reasoning.go +++ b/llms/reasoning.go @@ -18,6 +18,10 @@ const ( // ThinkingModeHigh allocates maximum tokens for thinking (~80% of max tokens). ThinkingModeHigh ThinkingMode = "high" + // ThinkingModeXHigh allocates the maximum reasoning budget. + // Only supported by gpt-5.1-codex-max and later. + ThinkingModeXHigh ThinkingMode = "xhigh" + // ThinkingModeAuto lets the model decide how much thinking is needed. ThinkingModeAuto ThinkingMode = "auto" )