truefoundry · models-bot · Jul 2, 2026 · cursor · Jul 2, 2026 · cursor
diff --git a/providers/google-vertex/anthropic/claude-opus-4-1.yaml b/providers/google-vertex/anthropic/claude-opus-4-1.yaml
@@ -1,20 +1,21 @@
 costs:
-    - cache_creation_input_token_cost: 0.00001875
-      cache_creation_input_token_cost_per_hour: 0.00003
-      cache_read_input_token_cost: 0.0000015
-      input_cost_per_token: 0.000015
-      input_cost_per_token_batches: 0.0000075
-      output_cost_per_token: 0.000075
-      output_cost_per_token_batches: 0.0000375
+    - cache_creation_input_token_cost: 1.875e-5
+      cache_creation_input_token_cost_per_hour: 3e-5
+      cache_read_input_token_cost: 1.5e-6
+      input_cost_per_token: 1.5e-5
+      input_cost_per_token_batches: 7.5e-6
+      output_cost_per_token: 7.5e-5
+      output_cost_per_token_batches: 3.75e-5
       region: us-east5
-    - cache_creation_input_token_cost: 0.00001875
-      cache_creation_input_token_cost_per_hour: 0.00003
-      cache_read_input_token_cost: 0.0000015
-      input_cost_per_token: 0.000015
-      input_cost_per_token_batches: 0.0000075
-      output_cost_per_token: 0.000075
-      output_cost_per_token_batches: 0.0000375
+    - cache_creation_input_token_cost: 1.875e-5
+      cache_creation_input_token_cost_per_hour: 3e-5
+      cache_read_input_token_cost: 1.5e-6
+      input_cost_per_token: 1.5e-5
+      input_cost_per_token_batches: 7.5e-6
+      output_cost_per_token: 7.5e-5
+      output_cost_per_token_batches: 3.75e-5
       region: global
+deprecationDate: "2026-06-05"
 features:
     - function_calling
     - parallel_function_calling
@@ -24,6 +25,7 @@ features:
     - cache_control
     - system_messages
     - structured_output
+isDeprecated: true
 limits:
     context_window: 200000
     max_input_tokens: 200000
@@ -51,7 +53,7 @@ sources:
     - https://platform.claude.com/docs/en/docs/build-with-claude/claude-on-vertex-ai
     - https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
     - https://platform.claude.com/docs/en/about-claude/model-deprecations
-status: active
+status: deprecated
 supportedModes:
     - chat
 thinking: true
diff --git a/providers/google-vertex/anthropic/claude-opus-4-1@20250805.yaml b/providers/google-vertex/anthropic/claude-opus-4-1@20250805.yaml
@@ -15,13 +15,15 @@ costs:
       output_cost_per_token: 7.5e-5
       output_cost_per_token_batches: 3.75e-5
       region: global
+deprecationDate: "2026-06-05"
 features:
     - function_calling
     - tool_choice
     - assistant_prefill
     - prompt_caching
     - cache_control
     - system_messages
+isDeprecated: true
 limits:
     context_window: 200000
     max_input_tokens: 200000
@@ -47,7 +49,7 @@ sources:
     - https://platform.claude.com/docs/en/about-claude/pricing
     - https://platform.claude.com/docs/en/about-claude/model-deprecations
     - https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/opus-4-1
-status: active
+status: deprecated
 supportedModes:
     - chat
 thinking: true
diff --git a/providers/google-vertex/anthropic/claude-opus-4-5.yaml b/providers/google-vertex/anthropic/claude-opus-4-5.yaml
@@ -7,6 +7,14 @@ costs:
       output_cost_per_token: 0.0000275
       output_cost_per_token_batches: 0.00001375
       region: us-east5
+    - cache_creation_input_token_cost: 0.000006875
+      cache_creation_input_token_cost_per_hour: 0.000011
+      cache_read_input_token_cost: 5.5e-7
+      input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
+      output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
+      region: us
     - cache_creation_input_token_cost: 0.00000625
       cache_creation_input_token_cost_per_hour: 0.00001
       cache_read_input_token_cost: 5e-7
@@ -23,6 +31,14 @@ costs:
       output_cost_per_token: 0.0000275
       output_cost_per_token_batches: 0.00001375
       region: europe-west1
+    - cache_creation_input_token_cost: 0.000006875
+      cache_creation_input_token_cost_per_hour: 0.000011
+      cache_read_input_token_cost: 5.5e-7
+      input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
+      output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
+      region: eu
     - cache_creation_input_token_cost: 0.000006875
       cache_creation_input_token_cost_per_hour: 0.000011
       cache_read_input_token_cost: 5.5e-7

diff --git a/providers/google-vertex/anthropic/claude-opus-4-5@20251101.yaml b/providers/google-vertex/anthropic/claude-opus-4-5@20251101.yaml
@@ -3,37 +3,49 @@ costs:
       cache_creation_input_token_cost_per_hour: 0.000011
       cache_read_input_token_cost: 5.5e-7
       input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
       output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
       region: us-east5
     - cache_creation_input_token_cost: 0.000006875
       cache_creation_input_token_cost_per_hour: 0.000011
       cache_read_input_token_cost: 5.5e-7
       input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
       output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
       region: us
     - cache_creation_input_token_cost: 0.00000625
       cache_creation_input_token_cost_per_hour: 0.00001
       cache_read_input_token_cost: 5e-7
       input_cost_per_token: 0.000005
+      input_cost_per_token_batches: 0.0000025
       output_cost_per_token: 0.000025
+      output_cost_per_token_batches: 0.0000125
       region: global
     - cache_creation_input_token_cost: 0.000006875
       cache_creation_input_token_cost_per_hour: 0.000011
       cache_read_input_token_cost: 5.5e-7
       input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
       output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
       region: europe-west1
     - cache_creation_input_token_cost: 0.000006875
       cache_creation_input_token_cost_per_hour: 0.000011
       cache_read_input_token_cost: 5.5e-7
       input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
       output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
       region: eu
     - cache_creation_input_token_cost: 0.000006875
       cache_creation_input_token_cost_per_hour: 0.000011
       cache_read_input_token_cost: 5.5e-7
       input_cost_per_token: 0.0000055
+      input_cost_per_token_batches: 0.00000275
       output_cost_per_token: 0.0000275
+      output_cost_per_token_batches: 0.00001375
       region: asia-southeast1
 features:
     - function_calling

diff --git a/providers/google-vertex/anthropic/claude-sonnet-4-5@20250929.yaml b/providers/google-vertex/anthropic/claude-sonnet-4-5@20250929.yaml
@@ -134,7 +134,7 @@ limits:
     max_input_tokens: 200000
     max_output_tokens: 64000
     max_tokens: 64000
-    tool_use_system_prompt_tokens: 346
+    tool_use_system_prompt_tokens: 496
 modalities:
     input:
         - text

diff --git a/providers/google-vertex/anthropic/claude-sonnet-4-6@default.yaml b/providers/google-vertex/anthropic/claude-sonnet-4-6@default.yaml
@@ -59,8 +59,8 @@ features:
 limits:
     context_window: 1000000
     max_input_tokens: 1000000
-    max_output_tokens: 64000
-    max_tokens: 64000
+    max_output_tokens: 128000
+    max_tokens: 128000
     tool_use_system_prompt_tokens: 497
 modalities:
     input:
@@ -73,7 +73,7 @@ mode: chat
 model: anthropic/claude-sonnet-4-6@default
 params:
     - key: max_tokens
-      maxValue: 64000
+      maxValue: 128000
     - key: temperature
       maxValue: 1
 provisioning: serverless

diff --git a/providers/google-vertex/deepseek-ai/deepseek-ocr-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-ocr-maas.yaml
@@ -1,3 +1,4 @@
+# CUE validation failed — see errors
 costs:
     - input_cost_per_token: 3e-7
       output_cost_per_token: 0.0000012
@@ -26,4 +27,4 @@ sources:
     - https://docs.cloud.google.com/vertex-ai/generative-ai/docs/maas/deepseek
 status: active
 supportedModes:
-    - chat
+    - ocr
diff --git a/providers/google-vertex/deepseek-ai/deepseek-v3.1-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-v3.1-maas.yaml
@@ -1,5 +1,6 @@
 costs:
-    - input_cost_per_token: 6e-7
+    - cache_read_input_token_cost: 6e-8
+      input_cost_per_token: 6e-7
       input_cost_per_token_batches: 3e-7
       output_cost_per_token: 0.0000017
       output_cost_per_token_batches: 8.5e-7

diff --git a/providers/google-vertex/deepseek-ai/deepseek-v3.2-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-v3.2-maas.yaml
@@ -7,7 +7,6 @@ costs:
       region: global
 features:
     - function_calling
-    - structured_output
     - system_messages
     - tool_choice
     - prompt_caching

diff --git a/providers/google-vertex/gemini-2.5-flash-image.yaml b/providers/google-vertex/gemini-2.5-flash-image.yaml
@@ -83,12 +83,13 @@ costs:
       output_cost_per_token: 0.0000025
       output_cost_per_token_batches: 0.00000125
       region: europe-central2
-deprecationDate: "2026-10-02"
 features:
     - system_messages
+    - structured_output
+    - prompt_caching
 limits:
-    context_window: 32768
-    max_input_tokens: 32768
+    context_window: 65536
+    max_input_tokens: 65536
     max_output_tokens: 32768
     max_tokens: 32768
 modalities: