diff --git a/public/index.html b/public/index.html index 6969d30c9..056d8e7bb 100644 --- a/public/index.html +++ b/public/index.html @@ -2182,6 +2182,9 @@
Constrains the verbosity of the model's response.
+ + On Opus 4.6 / Sonnet 4.6, a non-automatic Reasoning Effort takes precedence over Verbosity. +
diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 56a83eeca..30372627e 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -228,6 +228,7 @@ async function sendClaudeRequest(request, response) { const isLimitedSampling = /^claude-(opus-4-1|sonnet-4-5|haiku-4-5|opus-4-5|opus-4-6|sonnet-4-6)/.test(request.body.model); const useVerbosity = /^claude-(opus-4-5|opus-4-6|sonnet-4-6)/.test(request.body.model); const noPrefillModel = /^claude-(opus-4-6|sonnet-4-6)/.test(request.body.model); + const isAdaptiveModel = /^claude-(opus-4-6|sonnet-4-6)/.test(request.body.model); let fixThinkingPrefill = false; // Add custom stop sequences const stopSequences = []; @@ -305,10 +306,18 @@ async function sendClaudeRequest(request, response) { } const reasoningEffort = request.body.reasoning_effort; - const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); + const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream, isAdaptiveModel); - if (useThinking && Number.isInteger(budgetTokens)) { - // No prefill when thinking + // Adaptive thinking: returns a string effort level (like Gemini 3) + if (useThinking && typeof budgetTokens === 'string') { + fixThinkingPrefill = true; + requestBody.thinking = { type: 'adaptive' }; + requestBody.output_config ??= {}; + requestBody.output_config.effort = budgetTokens; + // top_k is not allowed in adaptive mode + delete requestBody.top_k; + } else if (useThinking && Number.isInteger(budgetTokens)) { + // Traditional thinking: returns a numeric budget fixThinkingPrefill = true; const minThinkTokens = 1024; if (requestBody.max_tokens <= minThinkTokens) { @@ -332,8 +341,8 @@ async function sendClaudeRequest(request, response) { convertedPrompt.messages[convertedPrompt.messages.length - 1].role = 'user'; } - // Verbosity = 'effort' (same values as OpenAI) - if (useVerbosity && request.body.verbosity) { + // Verbosity = 'effort' (same values as OpenAI) - only if not already set by adaptive thinking + if (useVerbosity && request.body.verbosity && !requestBody.output_config?.effort) { betaHeaders.push('effort-2025-11-24'); requestBody.output_config ??= {}; requestBody.output_config.effort = request.body.verbosity; diff --git a/src/prompt-converters.js b/src/prompt-converters.js index f151a6b30..5941569c7 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -1110,12 +1110,33 @@ export function cachingSystemPromptForOpenRouter(messages, ttl = undefined) { /** * Calculate the Claude budget tokens for a given reasoning effort. + * Returns a string effort level for adaptive thinking (Opus 4.6+), a number for traditional thinking, or null for auto. * @param {number} maxTokens Maximum tokens * @param {string} reasoningEffort Reasoning effort * @param {boolean} stream If streaming is enabled - * @returns {number?} Budget tokens + * @param {boolean} isAdaptiveModel If the model supports adaptive thinking (Opus 4.6+) + * @returns {number|string|null} Budget tokens, effort string, or null */ -export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) { +export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream, isAdaptiveModel) { + // Adaptive thinking for Opus 4.6+: return effort string (like Gemini 3) + if (isAdaptiveModel) { + switch (reasoningEffort) { + case REASONING_EFFORT.auto: + return null; + case REASONING_EFFORT.min: + return 'low'; + case REASONING_EFFORT.low: + return 'low'; + case REASONING_EFFORT.medium: + return 'medium'; + case REASONING_EFFORT.high: + return 'high'; + case REASONING_EFFORT.max: + return 'max'; + } + return null; + } + let budgetTokens = 0; switch (reasoningEffort) {