Chutes integration (#4844)
* Chutes integration * Fix eslint * Fix key saving * Fix logo coloration * Fix tool checks * Unhide image inlining controls * Fix order of options * Fix type use in TTS extension script * Add Chutes as a vector storage source * Change log levels to debug * Fix streamed reasoning parsing * Skip remote models update * TTS: Fix API key highlight * Sort image models A-Z * TTS: Fixes * Remove unused SD endpoint * Skip setting context size if models list is not yet loaded * remove chutes quota / balance * Fix: streamed tool calling * Hide reasoning effort control * Add image request debug log * Fix: scroll down on media load in extensions * Unhide some samplers * Bring back reasoning effort * This code will never execute * Reformat else if cases * Add stop strings to request * Remove conditional from reasoning_effort body param * Preserve original pricing fields * Unhide logit bias setting * Pass repetition penalty and logit bias to backend * Swap llama tokenizer for llama3 * Pass min_p, remove supported_sampling_parameters checks * Enable logprobs --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
@@ -699,6 +699,26 @@ export function getTokenizerModel() {
|
||||
}
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.CHUTES && oai_settings.chutes_model) {
|
||||
const model = oai_settings.chutes_model.toLowerCase();
|
||||
|
||||
if (model.includes('deepseek') || model.includes('mai-ds')) {
|
||||
return deepseekTokenizer;
|
||||
} else if (model.includes('qwen') || model.includes('qwq') || model.includes('tongyi') || model.includes('kimi')) {
|
||||
return qwen2Tokenizer;
|
||||
} else if (model.includes('llama') || model.includes('longcat') || model.includes('hermes')) {
|
||||
return llama3Tokenizer;
|
||||
} else if (model.includes('gemma')) {
|
||||
return gemmaTokenizer;
|
||||
} else if (model.includes('nemo')) {
|
||||
return nemoTokenizer;
|
||||
} else if (model.includes('mistral')) {
|
||||
return mistralTokenizer;
|
||||
} else if (model.includes('gpt-oss')) {
|
||||
return gpt4oTokenizer;
|
||||
}
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.COHERE) {
|
||||
if (oai_settings.cohere_model.includes('command-a')) {
|
||||
return commandATokenizer;
|
||||
@@ -1206,3 +1226,4 @@ export async function initTokenizers() {
|
||||
await loadTokenCache();
|
||||
registerDebugFunction('resetTokenCache', 'Reset token cache', 'Purges the calculated token counts. Use this if you want to force a full re-tokenization of all chats or suspect the token counts are wrong.', resetTokenCache);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user