From b796b9d7edaefa3c8c1f5b934444698b351d3a00 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 04:30:19 +0800 Subject: [PATCH 1/7] refactor(ai-proxy): extract apply_instance_overrides into a pure helper Move the three-step instance-override application (options flat overwrite, override.llm_options capability hook, override.request_body deep merge) out of the inline block in ai-providers/base.lua build_request and into a new pure helper in apisix/plugins/ai-proxy/base.lua. build_request calls the helper at the same point the inline code lived (post-converter), so the body sent upstream is unchanged. extra_opts no longer carries the four override-derived fields; it passes the picked ai_instance through and the helper reads from it directly. Zero behavior change. Motivation: ai-cache (planned follow-up plugin) needs to compute its cache key from the post-override effective body without going through build_request, which performs the upstream HTTP call, signing, and keepalive. --- apisix/plugins/ai-providers/base.lua | 34 +++------------- apisix/plugins/ai-proxy/base.lua | 58 ++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 35 deletions(-) diff --git a/apisix/plugins/ai-providers/base.lua b/apisix/plugins/ai-providers/base.lua index 944be263296a..2607f710f684 100644 --- a/apisix/plugins/ai-providers/base.lua +++ b/apisix/plugins/ai-providers/base.lua @@ -34,7 +34,7 @@ local transport_http = require("apisix.plugins.ai-transport.http") local transport_auth = require("apisix.plugins.ai-transport.auth") local log_sanitize = require("apisix.utils.log-sanitize") local protocols = require("apisix.plugins.ai-protocols") -local deep_merge = require("apisix.plugins.ai-proxy.merge").deep_merge +local ai_proxy_base = require("apisix.plugins.ai-proxy.base") local ngx = ngx local ngx_now = ngx.now local tonumber = tonumber @@ -198,33 +198,11 @@ function _M.build_request(self, ctx, conf, request_body, opts) or opts.target_host or self.host, } - -- Inject model options (flat overwrite) - if opts.model_options then - for opt, val in pairs(opts.model_options) do - if request_body[opt] ~= nil then - core.log.info("model_options overwriting request field '", opt, "'") - end - request_body[opt] = val - end - end - - -- Apply llm_options via provider capability hook (always force-overwrites) - if opts.override_llm_options then - local cap = self.capabilities and self.capabilities[ctx.ai_target_protocol] - if cap and cap.rewrite_request_body then - cap.rewrite_request_body(request_body, opts.override_llm_options, true) - end - end - - -- Apply per-target-protocol request body override (deep merge) - if opts.request_body_override_map then - local patch = opts.request_body_override_map[ctx.ai_target_protocol] - if patch then - core.log.info("applying request_body override for target protocol '", - ctx.ai_target_protocol, "'") - request_body = deep_merge(request_body, patch, opts.request_body_force_override) - end - end + -- Apply instance-level overrides (options + override.{llm_options, request_body}). + -- Runs after the converter so request_body is in target-protocol shape, and the + -- request_body[target_protocol] patch applies to the post-conversion body. + request_body = ai_proxy_base.apply_instance_overrides( + request_body, opts.ai_instance, self, ctx.ai_target_protocol) params.body = request_body if self.remove_model then diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua index 745f3b7a5c42..0ade8ac2ba07 100644 --- a/apisix/plugins/ai-proxy/base.lua +++ b/apisix/plugins/ai-proxy/base.lua @@ -28,6 +28,7 @@ local protocols = require("apisix.plugins.ai-protocols") local transport_http = require("apisix.plugins.ai-transport.http") local log_sanitize = require("apisix.utils.log-sanitize") local apisix_upstream = require("resty.apisix.upstream") +local deep_merge = require("apisix.plugins.ai-proxy.merge").deep_merge local _M = {} @@ -99,6 +100,55 @@ function _M.detect_request_type(ctx) end +-- Apply instance-level overrides to the request body, returning the body +-- that would be sent upstream. Encapsulates the precedence rules used by the +-- ai-proxy / ai-proxy-multi instance config: +-- 1. ai_instance.options - flat overwrite onto request_body. +-- 2. override.llm_options - applied via the provider capability hook +-- rewrite_request_body for target_protocol (force-overwrites). +-- 3. override.request_body[target_protocol] - deep-merged; force controlled +-- by override.request_body_force_override. +-- May mutate request_body in place; returns the same (mutated) table or the +-- table produced by the final deep merge. +-- Pure relative to its arguments: no ctx access, no I/O. +function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, target_protocol) + local model_options = ai_instance and ai_instance.options + if model_options then + for opt, val in pairs(model_options) do + if request_body[opt] ~= nil then + core.log.info("model_options overwriting request field '", opt, "'") + end + request_body[opt] = val + end + end + + local override_llm_options = + core.table.try_read_attr(ai_instance, "override", "llm_options") + if override_llm_options then + local caps = ai_provider and ai_provider.capabilities + local cap = caps and caps[target_protocol] + if cap and cap.rewrite_request_body then + cap.rewrite_request_body(request_body, override_llm_options, true) + end + end + + local request_body_override_map = + core.table.try_read_attr(ai_instance, "override", "request_body") + if request_body_override_map then + local patch = request_body_override_map[target_protocol] + if patch then + core.log.info("applying request_body override for target protocol '", + target_protocol, "'") + local force = core.table.try_read_attr(ai_instance, "override", + "request_body_force_override") + request_body = deep_merge(request_body, patch, force) + end + end + + return request_body +end + + -- Execute the AI proxy pipeline: -- 1. Validate request -- 2. Route client protocol to driver capability (passthrough / convert / error) @@ -124,15 +174,9 @@ function _M.before_proxy(conf, ctx, on_error) local extra_opts = { name = ai_instance.name, endpoint = core.table.try_read_attr(ai_instance, "override", "endpoint"), - model_options = ai_instance.options, conf = ai_instance.provider_conf or {}, auth = ai_instance.auth, - override_llm_options = - core.table.try_read_attr(ai_instance, "override", "llm_options"), - request_body_override_map = - core.table.try_read_attr(ai_instance, "override", "request_body"), - request_body_force_override = - core.table.try_read_attr(ai_instance, "override", "request_body_force_override"), + ai_instance = ai_instance, } -- Step 1: Route client protocol to driver capability local client_protocol = ctx.ai_client_protocol From c871e982afdecad2a5aa129aad77a79b94ee0281 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 05:08:45 +0800 Subject: [PATCH 2/7] feat(ai-proxy): add effective_model and effective_request_for_cache helpers Two pure helpers on top of apply_instance_overrides (introduced in the preceding refactor), both in apisix/plugins/ai-proxy/base.lua: - effective_model(ctx) returns ai_instance.options.model when the operator forces a model on the instance, falling back to ctx.var.request_llm_model (the client-supplied model that detect_request_type mirrors). - effective_request_for_cache(ctx) returns the request body as it would be sent upstream: reads the parsed body, resolves the target protocol from ctx.ai_client_protocol against the provider's capabilities (so peer plugins running in access phase before before_proxy can still get the post-override view), and applies apply_instance_overrides. A small internal resolve_target_protocol helper mirrors the routing logic in before_proxy so callers don't have to wait for ctx.ai_target_protocol to be populated. These helpers exist for ai-cache (planned follow-up) to compute a cache key over the effective body without invoking build_request (which would make the upstream HTTP call). The signatures are pure and ctx-driven. Test: t/plugin/ai-proxy-request-body-override.t TEST 17 drives a real request through ai-proxy with options + override.request_body, then uses serverless-post-function (priority -2000, runs after ai-proxy access at 1040) to invoke both helpers and log their output. Asserts both the upstream-received body AND the helper outputs reflect the same post-override view. --- apisix/plugins/ai-proxy/base.lua | 69 +++++++++++++++++++++++ t/plugin/ai-proxy-request-body-override.t | 61 ++++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua index 0ade8ac2ba07..754b00e9665a 100644 --- a/apisix/plugins/ai-proxy/base.lua +++ b/apisix/plugins/ai-proxy/base.lua @@ -149,6 +149,75 @@ function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, tar end +-- Effective model that would be sent upstream for the picked AI instance. +-- Returns the operator-forced model (ai_instance.options.model) when set; +-- otherwise the client-supplied model that detect_request_type mirrored to +-- ctx.var.request_llm_model. Returns nil when neither is available. +function _M.effective_model(ctx) + local ai_instance = ctx and ctx.picked_ai_instance + local options = ai_instance and ai_instance.options + if options and options.model then + return options.model + end + return ctx and ctx.var and ctx.var.request_llm_model +end + + +-- Resolve the target protocol the upstream LLM speaks for the picked instance, +-- given the detected client protocol. Mirrors the routing in before_proxy so +-- callers that run before before_proxy (peer plugins in access phase) can still +-- compute it. Returns ctx.ai_target_protocol when already set; otherwise picks +-- a passthrough target if the provider speaks the client protocol natively, the +-- "passthrough" sentinel for the catch-all client protocol, or the converter's +-- target when a converter bridges client to provider. +local function resolve_target_protocol(ctx, ai_provider) + if ctx.ai_target_protocol then + return ctx.ai_target_protocol + end + local client_protocol = ctx.ai_client_protocol + if not client_protocol then + return nil + end + local caps = ai_provider and ai_provider.capabilities or {} + if caps[client_protocol] then + return client_protocol + end + if client_protocol == "passthrough" then + return "passthrough" + end + local _, target = protocols.find_converter(client_protocol, caps) + return target +end + + +-- Effective request body that would be sent upstream for the current request. +-- Reads the parsed request body and applies apply_instance_overrides against +-- ctx.picked_ai_instance and the resolved target protocol. Pure: no HTTP, no +-- signing, no upstream call. Intended for ai-cache (and similar peer plugins) +-- to compute a cache key over the post-override view of the body. +-- Requires ctx.picked_ai_instance and ctx.ai_client_protocol to be populated +-- (both set by ai-proxy / ai-proxy-multi access phase before any peer plugin +-- with priority lower than 1040 runs). +function _M.effective_request_for_cache(ctx) + local request_body, err = core.request.get_json_request_body_table() + if not request_body then + return nil, err + end + local ai_instance = ctx and ctx.picked_ai_instance + if not ai_instance then + return nil, "no picked_ai_instance on ctx" + end + local ok, ai_provider = pcall(require, + "apisix.plugins.ai-providers." .. ai_instance.provider) + if not ok then + return nil, "failed to load provider: " .. tostring(ai_instance.provider) + end + local target_protocol = resolve_target_protocol(ctx, ai_provider) + return _M.apply_instance_overrides( + request_body, ai_instance, ai_provider, target_protocol) +end + + -- Execute the AI proxy pipeline: -- 1. Validate request -- 2. Route client protocol to driver capability (passthrough / convert / error) diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t index 088123bebac2..d1220e167957 100644 --- a/t/plugin/ai-proxy-request-body-override.t +++ b/t/plugin/ai-proxy-request-body-override.t @@ -819,3 +819,64 @@ max_tokens=321 } --- response_body max_completion_tokens=200 temperature=0.5 + + + +=== TEST 17: effective_model + effective_request_for_cache reflect post-override view +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + -- ai-proxy applies overrides; serverless-post-function (priority -2000) + -- runs after ai-proxy access (priority 1040) in the access phase, invokes + -- the helpers, and logs their output. The test then asserts BOTH the + -- upstream-received body AND the helper outputs reflect the same + -- post-override view. + local code = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/chat", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer t" } }, + "options": { "model": "options-model" }, + "override": { + "endpoint": "http://localhost:6732", + "request_body": { + "openai-chat": { "temperature": 0.42 } + } + }, + "ssl_verify": false + }, + "serverless-post-function": { + "functions": [ + "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local m = b.effective_model(ctx); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_MODEL=', m or 'nil'); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end" + ] + } + } + }]] + ) + if code >= 300 then ngx.status = code; return end + + local http = require("resty.http").new() + local res = assert(http:request_uri("http://127.0.0.1:" .. ngx.var.server_port .. "/chat", { + method = "POST", + body = '{"messages":[{"role":"user","content":"hi"}],"model":"client-model"}', + headers = { ["Content-Type"] = "application/json" }, + })) + local cjson = require("cjson.safe") + local body = cjson.decode(res.body) + local echoed = cjson.decode(body.choices[1].message.content) + ngx.say("upstream model=", echoed.model, + " upstream temperature=", echoed.temperature) + } + } +--- response_body +upstream model=options-model upstream temperature=0.42 +--- error_log eval +[ + qr/EFFECTIVE_MODEL=options-model/, + qr/EFFECTIVE_BODY=.*"model":"options-model"/, + qr/EFFECTIVE_BODY=.*"temperature":0\.42/, +] From 6e25627c55e441c81b60f10e59ac57d8f5dbe678 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 05:36:14 +0800 Subject: [PATCH 3/7] refactor(ai-proxy): drop effective_model helper effective_model duplicates information already present on the body that effective_request_for_cache returns (ai_instance.options.model is written onto the body during apply_instance_overrides step 1). Callers that need the model can read it off the effective body. A cheap ctx-only model lookup can be added later if a concrete consumer needs it without parsing the body. Updates TEST 17 to drop the EFFECTIVE_MODEL assertion; the EFFECTIVE_BODY assertions still prove the helper produces the same body the upstream receives. --- apisix/plugins/ai-proxy/base.lua | 14 -------------- t/plugin/ai-proxy-request-body-override.t | 9 ++++----- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua index 754b00e9665a..55ed08034ddc 100644 --- a/apisix/plugins/ai-proxy/base.lua +++ b/apisix/plugins/ai-proxy/base.lua @@ -149,20 +149,6 @@ function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, tar end --- Effective model that would be sent upstream for the picked AI instance. --- Returns the operator-forced model (ai_instance.options.model) when set; --- otherwise the client-supplied model that detect_request_type mirrored to --- ctx.var.request_llm_model. Returns nil when neither is available. -function _M.effective_model(ctx) - local ai_instance = ctx and ctx.picked_ai_instance - local options = ai_instance and ai_instance.options - if options and options.model then - return options.model - end - return ctx and ctx.var and ctx.var.request_llm_model -end - - -- Resolve the target protocol the upstream LLM speaks for the picked instance, -- given the detected client protocol. Mirrors the routing in before_proxy so -- callers that run before before_proxy (peer plugins in access phase) can still diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t index d1220e167957..a9396c38b019 100644 --- a/t/plugin/ai-proxy-request-body-override.t +++ b/t/plugin/ai-proxy-request-body-override.t @@ -822,15 +822,15 @@ max_completion_tokens=200 temperature=0.5 -=== TEST 17: effective_model + effective_request_for_cache reflect post-override view +=== TEST 17: effective_request_for_cache returns post-override body --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test -- ai-proxy applies overrides; serverless-post-function (priority -2000) -- runs after ai-proxy access (priority 1040) in the access phase, invokes - -- the helpers, and logs their output. The test then asserts BOTH the - -- upstream-received body AND the helper outputs reflect the same + -- the helper, and logs its output. The test asserts BOTH the + -- upstream-received body AND the helper output reflect the same -- post-override view. local code = t('/apisix/admin/routes/1', ngx.HTTP_PUT, @@ -851,7 +851,7 @@ max_completion_tokens=200 temperature=0.5 }, "serverless-post-function": { "functions": [ - "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local m = b.effective_model(ctx); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_MODEL=', m or 'nil'); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end" + "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end" ] } } @@ -876,7 +876,6 @@ max_completion_tokens=200 temperature=0.5 upstream model=options-model upstream temperature=0.42 --- error_log eval [ - qr/EFFECTIVE_MODEL=options-model/, qr/EFFECTIVE_BODY=.*"model":"options-model"/, qr/EFFECTIVE_BODY=.*"temperature":0\.42/, ] From 72cc8d773a794f885b425932e5da288f691617d1 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 06:21:23 +0800 Subject: [PATCH 4/7] feat(ai-proxy): apply converter in effective_request_for_cache The cache key produced via effective_request_for_cache should reflect what would actually be sent upstream. Previously the helper only applied apply_instance_overrides, so if a converter was in the chain (e.g. anthropic-messages client routed to an openai-chat provider) the helper returned the pre-converter body while build_request sent the converted body - the cache key would diverge from the upstream request shape. Now the helper: 1. Reads the request body 2. Resolves (target_protocol, converter) via resolve_target_protocol 3. Applies the converter when present 4. Applies apply_instance_overrides resolve_target_protocol's return signature widens from `target_protocol` to `(target_protocol, converter)`; the fast-path (ctx.ai_target_protocol already set) returns ctx.ai_converter alongside. Tests: - TEST 17 (no-converter path) reformatted - the inline serverless-post- function was a single 297-char line; broken into a readable multi-line body to match the style used elsewhere in the file. - TEST 18 added covering the converter path: anthropic-messages client to an openai provider. Asserts EFFECTIVE_BODY contains max_completion_tokens (post-converter rename of max_tokens) and temperature 0.42 (post-override), but NOT the original max_tokens field - proving the converter ran inside the helper. Drive-by: comment on apply_instance_overrides shortened from 11 lines to 5 (precedence rules + "mutates in place"). Other two helpers keep their longer docs. --- apisix/plugins/ai-proxy/base.lua | 62 +++++++++--------- t/plugin/ai-proxy-request-body-override.t | 76 ++++++++++++++++++++++- 2 files changed, 102 insertions(+), 36 deletions(-) diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua index 55ed08034ddc..7954cf83cd33 100644 --- a/apisix/plugins/ai-proxy/base.lua +++ b/apisix/plugins/ai-proxy/base.lua @@ -100,17 +100,11 @@ function _M.detect_request_type(ctx) end --- Apply instance-level overrides to the request body, returning the body --- that would be sent upstream. Encapsulates the precedence rules used by the --- ai-proxy / ai-proxy-multi instance config: --- 1. ai_instance.options - flat overwrite onto request_body. --- 2. override.llm_options - applied via the provider capability hook --- rewrite_request_body for target_protocol (force-overwrites). --- 3. override.request_body[target_protocol] - deep-merged; force controlled --- by override.request_body_force_override. --- May mutate request_body in place; returns the same (mutated) table or the --- table produced by the final deep merge. --- Pure relative to its arguments: no ctx access, no I/O. +-- Apply ai_instance overrides to request_body and return the effective body +-- that would be sent upstream. Precedence: options (flat overwrite) -> +-- override.llm_options (provider capability rewrite) -> +-- override.request_body[target_protocol] (deep merge). Mutates request_body +-- in place. function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, target_protocol) local model_options = ai_instance and ai_instance.options if model_options then @@ -149,41 +143,36 @@ function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, tar end --- Resolve the target protocol the upstream LLM speaks for the picked instance, --- given the detected client protocol. Mirrors the routing in before_proxy so --- callers that run before before_proxy (peer plugins in access phase) can still --- compute it. Returns ctx.ai_target_protocol when already set; otherwise picks --- a passthrough target if the provider speaks the client protocol natively, the --- "passthrough" sentinel for the catch-all client protocol, or the converter's --- target when a converter bridges client to provider. +-- Resolve (target_protocol, converter) from ctx.ai_client_protocol + provider +-- capabilities. Mirrors before_proxy's routing so peer plugins running in +-- access phase (before before_proxy sets ctx.ai_target_protocol / +-- ctx.ai_converter) can compute them themselves. local function resolve_target_protocol(ctx, ai_provider) if ctx.ai_target_protocol then - return ctx.ai_target_protocol + return ctx.ai_target_protocol, ctx.ai_converter end local client_protocol = ctx.ai_client_protocol if not client_protocol then - return nil + return nil, nil end local caps = ai_provider and ai_provider.capabilities or {} if caps[client_protocol] then - return client_protocol + return client_protocol, nil end if client_protocol == "passthrough" then - return "passthrough" + return "passthrough", nil end - local _, target = protocols.find_converter(client_protocol, caps) - return target + local converter, target = protocols.find_converter(client_protocol, caps) + return target, converter end --- Effective request body that would be sent upstream for the current request. --- Reads the parsed request body and applies apply_instance_overrides against --- ctx.picked_ai_instance and the resolved target protocol. Pure: no HTTP, no --- signing, no upstream call. Intended for ai-cache (and similar peer plugins) --- to compute a cache key over the post-override view of the body. --- Requires ctx.picked_ai_instance and ctx.ai_client_protocol to be populated --- (both set by ai-proxy / ai-proxy-multi access phase before any peer plugin --- with priority lower than 1040 runs). +-- Return the request body as it would be sent upstream for the current ctx. +-- Reads the parsed body, applies the converter (if the client protocol differs +-- from the provider's target protocol), then applies apply_instance_overrides. +-- The result matches what build_request would send upstream. Pure: no HTTP, +-- no signing, no upstream call. Requires ctx.picked_ai_instance and +-- ctx.ai_client_protocol (both set by ai-proxy access phase). function _M.effective_request_for_cache(ctx) local request_body, err = core.request.get_json_request_body_table() if not request_body then @@ -198,7 +187,14 @@ function _M.effective_request_for_cache(ctx) if not ok then return nil, "failed to load provider: " .. tostring(ai_instance.provider) end - local target_protocol = resolve_target_protocol(ctx, ai_provider) + local target_protocol, converter = resolve_target_protocol(ctx, ai_provider) + if converter and converter.convert_request then + local converted, conv_err = converter.convert_request(request_body, ctx) + if not converted then + return nil, conv_err or "converter failed" + end + request_body = converted + end return _M.apply_instance_overrides( request_body, ai_instance, ai_provider, target_protocol) end diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t index a9396c38b019..0c8608aff751 100644 --- a/t/plugin/ai-proxy-request-body-override.t +++ b/t/plugin/ai-proxy-request-body-override.t @@ -850,9 +850,14 @@ max_completion_tokens=200 temperature=0.5 "ssl_verify": false }, "serverless-post-function": { - "functions": [ - "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end" - ] + "functions": ["return function(_, ctx) + local b = require('apisix.plugins.ai-proxy.base') + local cjson = require('cjson.safe') + local body, err = b.effective_request_for_cache(ctx) + ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', + body and cjson.encode(body) + or ('ERR:' .. tostring(err))) + end"] } } }]] @@ -879,3 +884,68 @@ upstream model=options-model upstream temperature=0.42 qr/EFFECTIVE_BODY=.*"model":"options-model"/, qr/EFFECTIVE_BODY=.*"temperature":0\.42/, ] + + + +=== TEST 18: effective_request_for_cache applies the converter (anthropic-messages -> openai-chat) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + -- Client sends anthropic-messages format to an openai provider, which + -- speaks openai-chat natively. The converter translates the body and + -- override.request_body.openai-chat then applies. The helper should + -- mirror this: convert first, then apply overrides. Distinctive + -- post-converter marker: max_tokens (anthropic) becomes + -- max_completion_tokens (openai-chat) and the original max_tokens + -- is stripped by the converter ("never forward max_tokens"). + local code = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "openai", + "auth": { "header": { "Authorization": "Bearer t" } }, + "override": { + "endpoint": "http://localhost:6732", + "request_body": { + "openai-chat": { "temperature": 0.42 } + } + }, + "ssl_verify": false + }, + "serverless-post-function": { + "functions": ["return function(_, ctx) + local b = require('apisix.plugins.ai-proxy.base') + local cjson = require('cjson.safe') + local body, err = b.effective_request_for_cache(ctx) + ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', + body and cjson.encode(body) + or ('ERR:' .. tostring(err))) + end"] + } + } + }]] + ) + if code >= 300 then ngx.status = code; return end + + local http = require("resty.http").new() + local res = assert(http:request_uri("http://127.0.0.1:" .. ngx.var.server_port .. "/v1/messages", { + method = "POST", + body = '{"model":"claude-3","max_tokens":10,"messages":[{"role":"user","content":"hi"}]}', + headers = { ["Content-Type"] = "application/json" }, + })) + ngx.status = res.status + ngx.say("status=", res.status) + } + } +--- response_body +status=200 +--- error_log eval +[ + qr/EFFECTIVE_BODY=.*"max_completion_tokens":10/, + qr/EFFECTIVE_BODY=.*"temperature":0\.42/, +] +--- no_error_log eval +qr/EFFECTIVE_BODY=.*"max_tokens":10/ From 0b0c8609d40a96fed5df86827e5351f5a35590fc Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 08:21:33 +0800 Subject: [PATCH 5/7] fix(ai-request-rewrite): pass ai_instance to build_request The b796b9d7 refactor changed build_request to read overrides from opts.ai_instance, but the ai-request-rewrite sidecar caller was missed and kept passing the now-dead opts.model_options. Result: conf.options silently stopped propagating to the LLM sidecar request body. Fix: pass ai_instance = conf. conf has the same .options / .override shape apply_instance_overrides reads; the override.llm_options / request_body branches are no-ops since the rewrite schema only defines override.endpoint. t/plugin/ai-request-rewrite2.t TEST 1, which validates extra_option in the LLM-stub request body, now passes (was failing with status 400 "LLM service returned error status: 400" once httpbin is reachable). --- apisix/plugins/ai-request-rewrite.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apisix/plugins/ai-request-rewrite.lua b/apisix/plugins/ai-request-rewrite.lua index 900f700836e9..31712caa5771 100644 --- a/apisix/plugins/ai-request-rewrite.lua +++ b/apisix/plugins/ai-request-rewrite.lua @@ -122,7 +122,7 @@ local function request_to_llm(conf, request_table, ctx, target_path) local extra_opts = { endpoint = core.table.try_read_attr(conf, "override", "endpoint"), auth = conf.auth, - model_options = conf.options, + ai_instance = conf, target_path = target_path, } ctx.llm_request_start_time = ngx.now() From 1d8a9a2c854d8cb4c69e84730c6944fd2fbe2663 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 08:21:33 +0800 Subject: [PATCH 6/7] test(ai-proxy): assert TEST 18 helper output matches upstream body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously only checked status=200 plus the EFFECTIVE_BODY error_log regex. The cache-key correctness contract requires the helper's output to match what build_request actually sends upstream — but with no upstream-side assertion, the test would have passed even if the helper diverged from build_request as long as the helper's own log contained the expected fields. Decode body.content[1].text (the openai-chat body echoed by the /v1/chat/completions stub, surfaced through the converter's response transform) and assert max_completion_tokens=10, temperature=0.42, max_tokens=nil. Combined with the existing EFFECTIVE_BODY regex on the same fields, this pins down helper == upstream for the converter's distinctive markers. Mirrors TEST 17's structure. --- t/plugin/ai-proxy-request-body-override.t | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t index 0c8608aff751..4ca3aae2b535 100644 --- a/t/plugin/ai-proxy-request-body-override.t +++ b/t/plugin/ai-proxy-request-body-override.t @@ -937,11 +937,20 @@ upstream model=options-model upstream temperature=0.42 headers = { ["Content-Type"] = "application/json" }, })) ngx.status = res.status - ngx.say("status=", res.status) + -- The /v1/messages stub echoes the raw upstream body as the message + -- text; ai-proxy converts the openai-chat response back to + -- anthropic-messages, so body.content[1].text is the post-converter + -- post-override body the upstream actually received. + local cjson = require("cjson.safe") + local body = cjson.decode(res.body) + local echoed = cjson.decode(body.content[1].text) + ngx.say("upstream max_completion_tokens=", echoed.max_completion_tokens, + " upstream temperature=", echoed.temperature, + " upstream max_tokens=", tostring(echoed.max_tokens)) } } --- response_body -status=200 +upstream max_completion_tokens=10 upstream temperature=0.42 upstream max_tokens=nil --- error_log eval [ qr/EFFECTIVE_BODY=.*"max_completion_tokens":10/, From b7562c0065a3bc37222d758b6714dc9266f1b96c Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 14 May 2026 10:10:25 +0800 Subject: [PATCH 7/7] chore: fix lint --- apisix/plugins/ai-proxy/base.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua index 7954cf83cd33..d13f89ff252a 100644 --- a/apisix/plugins/ai-proxy/base.lua +++ b/apisix/plugins/ai-proxy/base.lua @@ -23,6 +23,7 @@ local pcall = pcall local pairs = pairs local type = type local table = table +local tostring = tostring local exporter = require("apisix.plugins.prometheus.exporter") local protocols = require("apisix.plugins.ai-protocols") local transport_http = require("apisix.plugins.ai-transport.http")