diff --git a/public/index.html b/public/index.html
index 6969d30c9..056d8e7bb 100644
--- a/public/index.html
+++ b/public/index.html
@@ -2182,6 +2182,9 @@
Constrains the verbosity of the model's response.
+
+ On Opus 4.6 / Sonnet 4.6, a non-automatic Reasoning Effort takes precedence over Verbosity.
+
diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js
index 56a83eeca..30372627e 100644
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@@ -228,6 +228,7 @@ async function sendClaudeRequest(request, response) {
const isLimitedSampling = /^claude-(opus-4-1|sonnet-4-5|haiku-4-5|opus-4-5|opus-4-6|sonnet-4-6)/.test(request.body.model);
const useVerbosity = /^claude-(opus-4-5|opus-4-6|sonnet-4-6)/.test(request.body.model);
const noPrefillModel = /^claude-(opus-4-6|sonnet-4-6)/.test(request.body.model);
+ const isAdaptiveModel = /^claude-(opus-4-6|sonnet-4-6)/.test(request.body.model);
let fixThinkingPrefill = false;
// Add custom stop sequences
const stopSequences = [];
@@ -305,10 +306,18 @@ async function sendClaudeRequest(request, response) {
}
const reasoningEffort = request.body.reasoning_effort;
- const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
+ const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream, isAdaptiveModel);
- if (useThinking && Number.isInteger(budgetTokens)) {
- // No prefill when thinking
+ // Adaptive thinking: returns a string effort level (like Gemini 3)
+ if (useThinking && typeof budgetTokens === 'string') {
+ fixThinkingPrefill = true;
+ requestBody.thinking = { type: 'adaptive' };
+ requestBody.output_config ??= {};
+ requestBody.output_config.effort = budgetTokens;
+ // top_k is not allowed in adaptive mode
+ delete requestBody.top_k;
+ } else if (useThinking && Number.isInteger(budgetTokens)) {
+ // Traditional thinking: returns a numeric budget
fixThinkingPrefill = true;
const minThinkTokens = 1024;
if (requestBody.max_tokens <= minThinkTokens) {
@@ -332,8 +341,8 @@ async function sendClaudeRequest(request, response) {
convertedPrompt.messages[convertedPrompt.messages.length - 1].role = 'user';
}
- // Verbosity = 'effort' (same values as OpenAI)
- if (useVerbosity && request.body.verbosity) {
+ // Verbosity = 'effort' (same values as OpenAI) - only if not already set by adaptive thinking
+ if (useVerbosity && request.body.verbosity && !requestBody.output_config?.effort) {
betaHeaders.push('effort-2025-11-24');
requestBody.output_config ??= {};
requestBody.output_config.effort = request.body.verbosity;
diff --git a/src/prompt-converters.js b/src/prompt-converters.js
index f151a6b30..5941569c7 100644
--- a/src/prompt-converters.js
+++ b/src/prompt-converters.js
@@ -1110,12 +1110,33 @@ export function cachingSystemPromptForOpenRouter(messages, ttl = undefined) {
/**
* Calculate the Claude budget tokens for a given reasoning effort.
+ * Returns a string effort level for adaptive thinking (Opus 4.6+), a number for traditional thinking, or null for auto.
* @param {number} maxTokens Maximum tokens
* @param {string} reasoningEffort Reasoning effort
* @param {boolean} stream If streaming is enabled
- * @returns {number?} Budget tokens
+ * @param {boolean} isAdaptiveModel If the model supports adaptive thinking (Opus 4.6+)
+ * @returns {number|string|null} Budget tokens, effort string, or null
*/
-export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) {
+export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream, isAdaptiveModel) {
+ // Adaptive thinking for Opus 4.6+: return effort string (like Gemini 3)
+ if (isAdaptiveModel) {
+ switch (reasoningEffort) {
+ case REASONING_EFFORT.auto:
+ return null;
+ case REASONING_EFFORT.min:
+ return 'low';
+ case REASONING_EFFORT.low:
+ return 'low';
+ case REASONING_EFFORT.medium:
+ return 'medium';
+ case REASONING_EFFORT.high:
+ return 'high';
+ case REASONING_EFFORT.max:
+ return 'max';
+ }
+ return null;
+ }
+
let budgetTokens = 0;
switch (reasoningEffort) {