From b796b9d7edaefa3c8c1f5b934444698b351d3a00 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 04:30:19 +0800
Subject: [PATCH 1/7] refactor(ai-proxy): extract apply_instance_overrides into
 a pure helper

Move the three-step instance-override application (options flat overwrite,
override.llm_options capability hook, override.request_body deep merge) out
of the inline block in ai-providers/base.lua build_request and into a new
pure helper in apisix/plugins/ai-proxy/base.lua. build_request calls the
helper at the same point the inline code lived (post-converter), so the
body sent upstream is unchanged.

extra_opts no longer carries the four override-derived fields; it passes
the picked ai_instance through and the helper reads from it directly.

Zero behavior change. Motivation: ai-cache (planned follow-up plugin)
needs to compute its cache key from the post-override effective body
without going through build_request, which performs the upstream HTTP
call, signing, and keepalive.
---
 apisix/plugins/ai-providers/base.lua | 34 +++-------------
 apisix/plugins/ai-proxy/base.lua     | 58 ++++++++++++++++++++++++----
 2 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/apisix/plugins/ai-providers/base.lua b/apisix/plugins/ai-providers/base.lua
index 944be263296a..2607f710f684 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -34,7 +34,7 @@ local transport_http = require("apisix.plugins.ai-transport.http")
 local transport_auth = require("apisix.plugins.ai-transport.auth")
 local log_sanitize = require("apisix.utils.log-sanitize")
 local protocols = require("apisix.plugins.ai-protocols")
-local deep_merge = require("apisix.plugins.ai-proxy.merge").deep_merge
+local ai_proxy_base = require("apisix.plugins.ai-proxy.base")
 local ngx = ngx
 local ngx_now = ngx.now
 local tonumber = tonumber
@@ -198,33 +198,11 @@ function _M.build_request(self, ctx, conf, request_body, opts)
                           or opts.target_host or self.host,
     }
 
-    -- Inject model options (flat overwrite)
-    if opts.model_options then
-        for opt, val in pairs(opts.model_options) do
-            if request_body[opt] ~= nil then
-                core.log.info("model_options overwriting request field '", opt, "'")
-            end
-            request_body[opt] = val
-        end
-    end
-
-    -- Apply llm_options via provider capability hook (always force-overwrites)
-    if opts.override_llm_options then
-        local cap = self.capabilities and self.capabilities[ctx.ai_target_protocol]
-        if cap and cap.rewrite_request_body then
-            cap.rewrite_request_body(request_body, opts.override_llm_options, true)
-        end
-    end
-
-    -- Apply per-target-protocol request body override (deep merge)
-    if opts.request_body_override_map then
-        local patch = opts.request_body_override_map[ctx.ai_target_protocol]
-        if patch then
-            core.log.info("applying request_body override for target protocol '",
-                          ctx.ai_target_protocol, "'")
-            request_body = deep_merge(request_body, patch, opts.request_body_force_override)
-        end
-    end
+    -- Apply instance-level overrides (options + override.{llm_options, request_body}).
+    -- Runs after the converter so request_body is in target-protocol shape, and the
+    -- request_body[target_protocol] patch applies to the post-conversion body.
+    request_body = ai_proxy_base.apply_instance_overrides(
+        request_body, opts.ai_instance, self, ctx.ai_target_protocol)
     params.body = request_body
 
     if self.remove_model then
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 745f3b7a5c42..0ade8ac2ba07 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -28,6 +28,7 @@ local protocols = require("apisix.plugins.ai-protocols")
 local transport_http = require("apisix.plugins.ai-transport.http")
 local log_sanitize = require("apisix.utils.log-sanitize")
 local apisix_upstream = require("resty.apisix.upstream")
+local deep_merge = require("apisix.plugins.ai-proxy.merge").deep_merge
 
 local _M = {}
 
@@ -99,6 +100,55 @@ function _M.detect_request_type(ctx)
 end
 
 
+-- Apply instance-level overrides to the request body, returning the body
+-- that would be sent upstream. Encapsulates the precedence rules used by the
+-- ai-proxy / ai-proxy-multi instance config:
+--   1. ai_instance.options - flat overwrite onto request_body.
+--   2. override.llm_options - applied via the provider capability hook
+--      rewrite_request_body for target_protocol (force-overwrites).
+--   3. override.request_body[target_protocol] - deep-merged; force controlled
+--      by override.request_body_force_override.
+-- May mutate request_body in place; returns the same (mutated) table or the
+-- table produced by the final deep merge.
+-- Pure relative to its arguments: no ctx access, no I/O.
+function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, target_protocol)
+    local model_options = ai_instance and ai_instance.options
+    if model_options then
+        for opt, val in pairs(model_options) do
+            if request_body[opt] ~= nil then
+                core.log.info("model_options overwriting request field '", opt, "'")
+            end
+            request_body[opt] = val
+        end
+    end
+
+    local override_llm_options =
+        core.table.try_read_attr(ai_instance, "override", "llm_options")
+    if override_llm_options then
+        local caps = ai_provider and ai_provider.capabilities
+        local cap = caps and caps[target_protocol]
+        if cap and cap.rewrite_request_body then
+            cap.rewrite_request_body(request_body, override_llm_options, true)
+        end
+    end
+
+    local request_body_override_map =
+        core.table.try_read_attr(ai_instance, "override", "request_body")
+    if request_body_override_map then
+        local patch = request_body_override_map[target_protocol]
+        if patch then
+            core.log.info("applying request_body override for target protocol '",
+                          target_protocol, "'")
+            local force = core.table.try_read_attr(ai_instance, "override",
+                                                   "request_body_force_override")
+            request_body = deep_merge(request_body, patch, force)
+        end
+    end
+
+    return request_body
+end
+
+
 -- Execute the AI proxy pipeline:
 --   1. Validate request
 --   2. Route client protocol to driver capability (passthrough / convert / error)
@@ -124,15 +174,9 @@ function _M.before_proxy(conf, ctx, on_error)
         local extra_opts = {
             name = ai_instance.name,
             endpoint = core.table.try_read_attr(ai_instance, "override", "endpoint"),
-            model_options = ai_instance.options,
             conf = ai_instance.provider_conf or {},
             auth = ai_instance.auth,
-            override_llm_options =
-                core.table.try_read_attr(ai_instance, "override", "llm_options"),
-            request_body_override_map =
-                core.table.try_read_attr(ai_instance, "override", "request_body"),
-            request_body_force_override =
-                core.table.try_read_attr(ai_instance, "override", "request_body_force_override"),
+            ai_instance = ai_instance,
         }
         -- Step 1: Route client protocol to driver capability
         local client_protocol = ctx.ai_client_protocol

From c871e982afdecad2a5aa129aad77a79b94ee0281 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 05:08:45 +0800
Subject: [PATCH 2/7] feat(ai-proxy): add effective_model and
 effective_request_for_cache helpers

Two pure helpers on top of apply_instance_overrides (introduced in the
preceding refactor), both in apisix/plugins/ai-proxy/base.lua:

- effective_model(ctx) returns ai_instance.options.model when the operator
  forces a model on the instance, falling back to ctx.var.request_llm_model
  (the client-supplied model that detect_request_type mirrors).

- effective_request_for_cache(ctx) returns the request body as it would be
  sent upstream: reads the parsed body, resolves the target protocol from
  ctx.ai_client_protocol against the provider's capabilities (so peer
  plugins running in access phase before before_proxy can still get the
  post-override view), and applies apply_instance_overrides.

A small internal resolve_target_protocol helper mirrors the routing logic
in before_proxy so callers don't have to wait for ctx.ai_target_protocol
to be populated.

These helpers exist for ai-cache (planned follow-up) to compute a cache
key over the effective body without invoking build_request (which would
make the upstream HTTP call). The signatures are pure and ctx-driven.

Test: t/plugin/ai-proxy-request-body-override.t TEST 17 drives a real
request through ai-proxy with options + override.request_body, then uses
serverless-post-function (priority -2000, runs after ai-proxy access at
1040) to invoke both helpers and log their output. Asserts both the
upstream-received body AND the helper outputs reflect the same
post-override view.
---
 apisix/plugins/ai-proxy/base.lua          | 69 +++++++++++++++++++++++
 t/plugin/ai-proxy-request-body-override.t | 61 ++++++++++++++++++++
 2 files changed, 130 insertions(+)

diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 0ade8ac2ba07..754b00e9665a 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -149,6 +149,75 @@ function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, tar
 end
 
 
+-- Effective model that would be sent upstream for the picked AI instance.
+-- Returns the operator-forced model (ai_instance.options.model) when set;
+-- otherwise the client-supplied model that detect_request_type mirrored to
+-- ctx.var.request_llm_model. Returns nil when neither is available.
+function _M.effective_model(ctx)
+    local ai_instance = ctx and ctx.picked_ai_instance
+    local options = ai_instance and ai_instance.options
+    if options and options.model then
+        return options.model
+    end
+    return ctx and ctx.var and ctx.var.request_llm_model
+end
+
+
+-- Resolve the target protocol the upstream LLM speaks for the picked instance,
+-- given the detected client protocol. Mirrors the routing in before_proxy so
+-- callers that run before before_proxy (peer plugins in access phase) can still
+-- compute it. Returns ctx.ai_target_protocol when already set; otherwise picks
+-- a passthrough target if the provider speaks the client protocol natively, the
+-- "passthrough" sentinel for the catch-all client protocol, or the converter's
+-- target when a converter bridges client to provider.
+local function resolve_target_protocol(ctx, ai_provider)
+    if ctx.ai_target_protocol then
+        return ctx.ai_target_protocol
+    end
+    local client_protocol = ctx.ai_client_protocol
+    if not client_protocol then
+        return nil
+    end
+    local caps = ai_provider and ai_provider.capabilities or {}
+    if caps[client_protocol] then
+        return client_protocol
+    end
+    if client_protocol == "passthrough" then
+        return "passthrough"
+    end
+    local _, target = protocols.find_converter(client_protocol, caps)
+    return target
+end
+
+
+-- Effective request body that would be sent upstream for the current request.
+-- Reads the parsed request body and applies apply_instance_overrides against
+-- ctx.picked_ai_instance and the resolved target protocol. Pure: no HTTP, no
+-- signing, no upstream call. Intended for ai-cache (and similar peer plugins)
+-- to compute a cache key over the post-override view of the body.
+-- Requires ctx.picked_ai_instance and ctx.ai_client_protocol to be populated
+-- (both set by ai-proxy / ai-proxy-multi access phase before any peer plugin
+-- with priority lower than 1040 runs).
+function _M.effective_request_for_cache(ctx)
+    local request_body, err = core.request.get_json_request_body_table()
+    if not request_body then
+        return nil, err
+    end
+    local ai_instance = ctx and ctx.picked_ai_instance
+    if not ai_instance then
+        return nil, "no picked_ai_instance on ctx"
+    end
+    local ok, ai_provider = pcall(require,
+        "apisix.plugins.ai-providers." .. ai_instance.provider)
+    if not ok then
+        return nil, "failed to load provider: " .. tostring(ai_instance.provider)
+    end
+    local target_protocol = resolve_target_protocol(ctx, ai_provider)
+    return _M.apply_instance_overrides(
+        request_body, ai_instance, ai_provider, target_protocol)
+end
+
+
 -- Execute the AI proxy pipeline:
 --   1. Validate request
 --   2. Route client protocol to driver capability (passthrough / convert / error)
diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t
index 088123bebac2..d1220e167957 100644
--- a/t/plugin/ai-proxy-request-body-override.t
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -819,3 +819,64 @@ max_tokens=321
     }
 --- response_body
 max_completion_tokens=200 temperature=0.5
+
+
+
+=== TEST 17: effective_model + effective_request_for_cache reflect post-override view
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            -- ai-proxy applies overrides; serverless-post-function (priority -2000)
+            -- runs after ai-proxy access (priority 1040) in the access phase, invokes
+            -- the helpers, and logs their output. The test then asserts BOTH the
+            -- upstream-received body AND the helper outputs reflect the same
+            -- post-override view.
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer t" } },
+                            "options": { "model": "options-model" },
+                            "override": {
+                                "endpoint": "http://localhost:6732",
+                                "request_body": {
+                                    "openai-chat": { "temperature": 0.42 }
+                                }
+                            },
+                            "ssl_verify": false
+                        },
+                        "serverless-post-function": {
+                            "functions": [
+                                "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local m = b.effective_model(ctx); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_MODEL=', m or 'nil'); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end"
+                            ]
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:" .. ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = '{"messages":[{"role":"user","content":"hi"}],"model":"client-model"}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("upstream model=", echoed.model,
+                    " upstream temperature=", echoed.temperature)
+        }
+    }
+--- response_body
+upstream model=options-model upstream temperature=0.42
+--- error_log eval
+[
+    qr/EFFECTIVE_MODEL=options-model/,
+    qr/EFFECTIVE_BODY=.*"model":"options-model"/,
+    qr/EFFECTIVE_BODY=.*"temperature":0\.42/,
+]

From 6e25627c55e441c81b60f10e59ac57d8f5dbe678 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 05:36:14 +0800
Subject: [PATCH 3/7] refactor(ai-proxy): drop effective_model helper

effective_model duplicates information already present on the body that
effective_request_for_cache returns (ai_instance.options.model is written
onto the body during apply_instance_overrides step 1). Callers that need
the model can read it off the effective body. A cheap ctx-only model
lookup can be added later if a concrete consumer needs it without parsing
the body.

Updates TEST 17 to drop the EFFECTIVE_MODEL assertion; the
EFFECTIVE_BODY assertions still prove the helper produces the same body
the upstream receives.
---
 apisix/plugins/ai-proxy/base.lua          | 14 --------------
 t/plugin/ai-proxy-request-body-override.t |  9 ++++-----
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 754b00e9665a..55ed08034ddc 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -149,20 +149,6 @@ function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, tar
 end
 
 
--- Effective model that would be sent upstream for the picked AI instance.
--- Returns the operator-forced model (ai_instance.options.model) when set;
--- otherwise the client-supplied model that detect_request_type mirrored to
--- ctx.var.request_llm_model. Returns nil when neither is available.
-function _M.effective_model(ctx)
-    local ai_instance = ctx and ctx.picked_ai_instance
-    local options = ai_instance and ai_instance.options
-    if options and options.model then
-        return options.model
-    end
-    return ctx and ctx.var and ctx.var.request_llm_model
-end
-
-
 -- Resolve the target protocol the upstream LLM speaks for the picked instance,
 -- given the detected client protocol. Mirrors the routing in before_proxy so
 -- callers that run before before_proxy (peer plugins in access phase) can still
diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t
index d1220e167957..a9396c38b019 100644
--- a/t/plugin/ai-proxy-request-body-override.t
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -822,15 +822,15 @@ max_completion_tokens=200 temperature=0.5
 
 
 
-=== TEST 17: effective_model + effective_request_for_cache reflect post-override view
+=== TEST 17: effective_request_for_cache returns post-override body
 --- config
     location /t {
         content_by_lua_block {
             local t = require("lib.test_admin").test
             -- ai-proxy applies overrides; serverless-post-function (priority -2000)
             -- runs after ai-proxy access (priority 1040) in the access phase, invokes
-            -- the helpers, and logs their output. The test then asserts BOTH the
-            -- upstream-received body AND the helper outputs reflect the same
+            -- the helper, and logs its output. The test asserts BOTH the
+            -- upstream-received body AND the helper output reflect the same
             -- post-override view.
             local code = t('/apisix/admin/routes/1',
                  ngx.HTTP_PUT,
@@ -851,7 +851,7 @@ max_completion_tokens=200 temperature=0.5
                         },
                         "serverless-post-function": {
                             "functions": [
-                                "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local m = b.effective_model(ctx); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_MODEL=', m or 'nil'); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end"
+                                "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end"
                             ]
                         }
                     }
@@ -876,7 +876,6 @@ max_completion_tokens=200 temperature=0.5
 upstream model=options-model upstream temperature=0.42
 --- error_log eval
 [
-    qr/EFFECTIVE_MODEL=options-model/,
     qr/EFFECTIVE_BODY=.*"model":"options-model"/,
     qr/EFFECTIVE_BODY=.*"temperature":0\.42/,
 ]

From 72cc8d773a794f885b425932e5da288f691617d1 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 06:21:23 +0800
Subject: [PATCH 4/7] feat(ai-proxy): apply converter in
 effective_request_for_cache

The cache key produced via effective_request_for_cache should reflect
what would actually be sent upstream. Previously the helper only applied
apply_instance_overrides, so if a converter was in the chain (e.g.
anthropic-messages client routed to an openai-chat provider) the helper
returned the pre-converter body while build_request sent the converted
body - the cache key would diverge from the upstream request shape.

Now the helper:
  1. Reads the request body
  2. Resolves (target_protocol, converter) via resolve_target_protocol
  3. Applies the converter when present
  4. Applies apply_instance_overrides

resolve_target_protocol's return signature widens from `target_protocol`
to `(target_protocol, converter)`; the fast-path (ctx.ai_target_protocol
already set) returns ctx.ai_converter alongside.

Tests:
- TEST 17 (no-converter path) reformatted - the inline serverless-post-
  function was a single 297-char line; broken into a readable multi-line
  body to match the style used elsewhere in the file.
- TEST 18 added covering the converter path: anthropic-messages client
  to an openai provider. Asserts EFFECTIVE_BODY contains
  max_completion_tokens (post-converter rename of max_tokens) and
  temperature 0.42 (post-override), but NOT the original max_tokens
  field - proving the converter ran inside the helper.

Drive-by: comment on apply_instance_overrides shortened from 11 lines
to 5 (precedence rules + "mutates in place"). Other two helpers keep
their longer docs.
---
 apisix/plugins/ai-proxy/base.lua          | 62 +++++++++---------
 t/plugin/ai-proxy-request-body-override.t | 76 ++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 36 deletions(-)

diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 55ed08034ddc..7954cf83cd33 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -100,17 +100,11 @@ function _M.detect_request_type(ctx)
 end
 
 
--- Apply instance-level overrides to the request body, returning the body
--- that would be sent upstream. Encapsulates the precedence rules used by the
--- ai-proxy / ai-proxy-multi instance config:
---   1. ai_instance.options - flat overwrite onto request_body.
---   2. override.llm_options - applied via the provider capability hook
---      rewrite_request_body for target_protocol (force-overwrites).
---   3. override.request_body[target_protocol] - deep-merged; force controlled
---      by override.request_body_force_override.
--- May mutate request_body in place; returns the same (mutated) table or the
--- table produced by the final deep merge.
--- Pure relative to its arguments: no ctx access, no I/O.
+-- Apply ai_instance overrides to request_body and return the effective body
+-- that would be sent upstream. Precedence: options (flat overwrite) ->
+-- override.llm_options (provider capability rewrite) ->
+-- override.request_body[target_protocol] (deep merge). Mutates request_body
+-- in place.
 function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, target_protocol)
     local model_options = ai_instance and ai_instance.options
     if model_options then
@@ -149,41 +143,36 @@ function _M.apply_instance_overrides(request_body, ai_instance, ai_provider, tar
 end
 
 
--- Resolve the target protocol the upstream LLM speaks for the picked instance,
--- given the detected client protocol. Mirrors the routing in before_proxy so
--- callers that run before before_proxy (peer plugins in access phase) can still
--- compute it. Returns ctx.ai_target_protocol when already set; otherwise picks
--- a passthrough target if the provider speaks the client protocol natively, the
--- "passthrough" sentinel for the catch-all client protocol, or the converter's
--- target when a converter bridges client to provider.
+-- Resolve (target_protocol, converter) from ctx.ai_client_protocol + provider
+-- capabilities. Mirrors before_proxy's routing so peer plugins running in
+-- access phase (before before_proxy sets ctx.ai_target_protocol /
+-- ctx.ai_converter) can compute them themselves.
 local function resolve_target_protocol(ctx, ai_provider)
     if ctx.ai_target_protocol then
-        return ctx.ai_target_protocol
+        return ctx.ai_target_protocol, ctx.ai_converter
     end
     local client_protocol = ctx.ai_client_protocol
     if not client_protocol then
-        return nil
+        return nil, nil
     end
     local caps = ai_provider and ai_provider.capabilities or {}
     if caps[client_protocol] then
-        return client_protocol
+        return client_protocol, nil
     end
     if client_protocol == "passthrough" then
-        return "passthrough"
+        return "passthrough", nil
     end
-    local _, target = protocols.find_converter(client_protocol, caps)
-    return target
+    local converter, target = protocols.find_converter(client_protocol, caps)
+    return target, converter
 end
 
 
--- Effective request body that would be sent upstream for the current request.
--- Reads the parsed request body and applies apply_instance_overrides against
--- ctx.picked_ai_instance and the resolved target protocol. Pure: no HTTP, no
--- signing, no upstream call. Intended for ai-cache (and similar peer plugins)
--- to compute a cache key over the post-override view of the body.
--- Requires ctx.picked_ai_instance and ctx.ai_client_protocol to be populated
--- (both set by ai-proxy / ai-proxy-multi access phase before any peer plugin
--- with priority lower than 1040 runs).
+-- Return the request body as it would be sent upstream for the current ctx.
+-- Reads the parsed body, applies the converter (if the client protocol differs
+-- from the provider's target protocol), then applies apply_instance_overrides.
+-- The result matches what build_request would send upstream. Pure: no HTTP,
+-- no signing, no upstream call. Requires ctx.picked_ai_instance and
+-- ctx.ai_client_protocol (both set by ai-proxy access phase).
 function _M.effective_request_for_cache(ctx)
     local request_body, err = core.request.get_json_request_body_table()
     if not request_body then
@@ -198,7 +187,14 @@ function _M.effective_request_for_cache(ctx)
     if not ok then
         return nil, "failed to load provider: " .. tostring(ai_instance.provider)
     end
-    local target_protocol = resolve_target_protocol(ctx, ai_provider)
+    local target_protocol, converter = resolve_target_protocol(ctx, ai_provider)
+    if converter and converter.convert_request then
+        local converted, conv_err = converter.convert_request(request_body, ctx)
+        if not converted then
+            return nil, conv_err or "converter failed"
+        end
+        request_body = converted
+    end
     return _M.apply_instance_overrides(
         request_body, ai_instance, ai_provider, target_protocol)
 end
diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t
index a9396c38b019..0c8608aff751 100644
--- a/t/plugin/ai-proxy-request-body-override.t
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -850,9 +850,14 @@ max_completion_tokens=200 temperature=0.5
                             "ssl_verify": false
                         },
                         "serverless-post-function": {
-                            "functions": [
-                                "return function(_, ctx) local b = require('apisix.plugins.ai-proxy.base'); local cjson = require('cjson.safe'); local body, err = b.effective_request_for_cache(ctx); ngx.log(ngx.WARN, 'EFFECTIVE_BODY=', body and cjson.encode(body) or ('ERR:'..tostring(err))) end"
-                            ]
+                            "functions": ["return function(_, ctx)
+                                local b = require('apisix.plugins.ai-proxy.base')
+                                local cjson = require('cjson.safe')
+                                local body, err = b.effective_request_for_cache(ctx)
+                                ngx.log(ngx.WARN, 'EFFECTIVE_BODY=',
+                                    body and cjson.encode(body)
+                                          or ('ERR:' .. tostring(err)))
+                            end"]
                         }
                     }
                 }]]
@@ -879,3 +884,68 @@ upstream model=options-model upstream temperature=0.42
     qr/EFFECTIVE_BODY=.*"model":"options-model"/,
     qr/EFFECTIVE_BODY=.*"temperature":0\.42/,
 ]
+
+
+
+=== TEST 18: effective_request_for_cache applies the converter (anthropic-messages -> openai-chat)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            -- Client sends anthropic-messages format to an openai provider, which
+            -- speaks openai-chat natively. The converter translates the body and
+            -- override.request_body.openai-chat then applies. The helper should
+            -- mirror this: convert first, then apply overrides. Distinctive
+            -- post-converter marker: max_tokens (anthropic) becomes
+            -- max_completion_tokens (openai-chat) and the original max_tokens
+            -- is stripped by the converter ("never forward max_tokens").
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer t" } },
+                            "override": {
+                                "endpoint": "http://localhost:6732",
+                                "request_body": {
+                                    "openai-chat": { "temperature": 0.42 }
+                                }
+                            },
+                            "ssl_verify": false
+                        },
+                        "serverless-post-function": {
+                            "functions": ["return function(_, ctx)
+                                local b = require('apisix.plugins.ai-proxy.base')
+                                local cjson = require('cjson.safe')
+                                local body, err = b.effective_request_for_cache(ctx)
+                                ngx.log(ngx.WARN, 'EFFECTIVE_BODY=',
+                                    body and cjson.encode(body)
+                                          or ('ERR:' .. tostring(err)))
+                            end"]
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:" .. ngx.var.server_port .. "/v1/messages", {
+                method = "POST",
+                body = '{"model":"claude-3","max_tokens":10,"messages":[{"role":"user","content":"hi"}]}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            ngx.status = res.status
+            ngx.say("status=", res.status)
+        }
+    }
+--- response_body
+status=200
+--- error_log eval
+[
+    qr/EFFECTIVE_BODY=.*"max_completion_tokens":10/,
+    qr/EFFECTIVE_BODY=.*"temperature":0\.42/,
+]
+--- no_error_log eval
+qr/EFFECTIVE_BODY=.*"max_tokens":10/

From 0b0c8609d40a96fed5df86827e5351f5a35590fc Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 08:21:33 +0800
Subject: [PATCH 5/7] fix(ai-request-rewrite): pass ai_instance to
 build_request

The b796b9d7 refactor changed build_request to read overrides from
opts.ai_instance, but the ai-request-rewrite sidecar caller was missed
and kept passing the now-dead opts.model_options. Result: conf.options
silently stopped propagating to the LLM sidecar request body.

Fix: pass ai_instance = conf. conf has the same .options / .override
shape apply_instance_overrides reads; the override.llm_options /
request_body branches are no-ops since the rewrite schema only defines
override.endpoint.

t/plugin/ai-request-rewrite2.t TEST 1, which validates extra_option in
the LLM-stub request body, now passes (was failing with status 400
"LLM service returned error status: 400" once httpbin is reachable).
---
 apisix/plugins/ai-request-rewrite.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apisix/plugins/ai-request-rewrite.lua b/apisix/plugins/ai-request-rewrite.lua
index 900f700836e9..31712caa5771 100644
--- a/apisix/plugins/ai-request-rewrite.lua
+++ b/apisix/plugins/ai-request-rewrite.lua
@@ -122,7 +122,7 @@ local function request_to_llm(conf, request_table, ctx, target_path)
     local extra_opts = {
         endpoint = core.table.try_read_attr(conf, "override", "endpoint"),
         auth = conf.auth,
-        model_options = conf.options,
+        ai_instance = conf,
         target_path = target_path,
     }
     ctx.llm_request_start_time = ngx.now()

From 1d8a9a2c854d8cb4c69e84730c6944fd2fbe2663 Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 08:21:33 +0800
Subject: [PATCH 6/7] test(ai-proxy): assert TEST 18 helper output matches
 upstream body
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously only checked status=200 plus the EFFECTIVE_BODY error_log
regex. The cache-key correctness contract requires the helper's output
to match what build_request actually sends upstream — but with no
upstream-side assertion, the test would have passed even if the helper
diverged from build_request as long as the helper's own log contained
the expected fields.

Decode body.content[1].text (the openai-chat body echoed by the
/v1/chat/completions stub, surfaced through the converter's response
transform) and assert max_completion_tokens=10, temperature=0.42,
max_tokens=nil. Combined with the existing EFFECTIVE_BODY regex on the
same fields, this pins down helper == upstream for the converter's
distinctive markers. Mirrors TEST 17's structure.
---
 t/plugin/ai-proxy-request-body-override.t | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/t/plugin/ai-proxy-request-body-override.t b/t/plugin/ai-proxy-request-body-override.t
index 0c8608aff751..4ca3aae2b535 100644
--- a/t/plugin/ai-proxy-request-body-override.t
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -937,11 +937,20 @@ upstream model=options-model upstream temperature=0.42
                 headers = { ["Content-Type"] = "application/json" },
             }))
             ngx.status = res.status
-            ngx.say("status=", res.status)
+            -- The /v1/messages stub echoes the raw upstream body as the message
+            -- text; ai-proxy converts the openai-chat response back to
+            -- anthropic-messages, so body.content[1].text is the post-converter
+            -- post-override body the upstream actually received.
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.content[1].text)
+            ngx.say("upstream max_completion_tokens=", echoed.max_completion_tokens,
+                    " upstream temperature=", echoed.temperature,
+                    " upstream max_tokens=", tostring(echoed.max_tokens))
         }
     }
 --- response_body
-status=200
+upstream max_completion_tokens=10 upstream temperature=0.42 upstream max_tokens=nil
 --- error_log eval
 [
     qr/EFFECTIVE_BODY=.*"max_completion_tokens":10/,

From b7562c0065a3bc37222d758b6714dc9266f1b96c Mon Sep 17 00:00:00 2001
From: janiussyafiq <izzraff.js@gmail.com>
Date: Thu, 14 May 2026 10:10:25 +0800
Subject: [PATCH 7/7] chore: fix lint

---
 apisix/plugins/ai-proxy/base.lua | 1 +
 1 file changed, 1 insertion(+)

diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 7954cf83cd33..d13f89ff252a 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -23,6 +23,7 @@ local pcall   = pcall
 local pairs   = pairs
 local type    = type
 local table   = table
+local tostring = tostring
 local exporter = require("apisix.plugins.prometheus.exporter")
 local protocols = require("apisix.plugins.ai-protocols")
 local transport_http = require("apisix.plugins.ai-transport.http")