Chutes integration (#4844)

* Chutes integration

* Fix eslint

* Fix key saving

* Fix logo coloration

* Fix tool checks

* Unhide image inlining controls

* Fix order of options

* Fix type use in TTS extension script

* Add Chutes as a vector storage source

* Change log levels to debug

* Fix streamed reasoning parsing

* Skip remote models update

* TTS: Fix API key highlight

* Sort image models A-Z

* TTS: Fixes

* Remove unused SD endpoint

* Skip setting context size if models list is not yet loaded

* remove chutes quota / balance

* Fix: streamed tool calling

* Hide reasoning effort control

* Add image request debug log

* Fix: scroll down on media load in extensions

* Unhide some samplers

* Bring back reasoning effort

* This code will never execute

* Reformat else if cases

* Add stop strings to request

* Remove conditional from reasoning_effort body param

* Preserve original pricing fields

* Unhide logit bias setting

* Pass repetition penalty and logit bias to backend

* Swap llama tokenizer for llama3

* Pass min_p, remove supported_sampling_parameters checks

* Enable logprobs

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
Ben
2025-11-30 17:17:49 -05:00
committed by GitHub
parent dc06abb364
commit 55a07d445d
28 changed files with 1049 additions and 33 deletions
+21
View File
@@ -699,6 +699,26 @@ export function getTokenizerModel() {
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.CHUTES && oai_settings.chutes_model) {
const model = oai_settings.chutes_model.toLowerCase();
if (model.includes('deepseek') || model.includes('mai-ds')) {
return deepseekTokenizer;
} else if (model.includes('qwen') || model.includes('qwq') || model.includes('tongyi') || model.includes('kimi')) {
return qwen2Tokenizer;
} else if (model.includes('llama') || model.includes('longcat') || model.includes('hermes')) {
return llama3Tokenizer;
} else if (model.includes('gemma')) {
return gemmaTokenizer;
} else if (model.includes('nemo')) {
return nemoTokenizer;
} else if (model.includes('mistral')) {
return mistralTokenizer;
} else if (model.includes('gpt-oss')) {
return gpt4oTokenizer;
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.COHERE) {
if (oai_settings.cohere_model.includes('command-a')) {
return commandATokenizer;
@@ -1206,3 +1226,4 @@ export async function initTokenizers() {
await loadTokenCache();
registerDebugFunction('resetTokenCache', 'Reset token cache', 'Purges the calculated token counts. Use this if you want to force a full re-tokenization of all chats or suspect the token counts are wrong.', resetTokenCache);
}