* Fix missing model name in tokenize requests for llama.cpp (fixes #4962) The new router mode of llama.cpp allows to switch models on the fly, what is already supported by SillyTavern. The call to the `/tokenize` endpoint did not contain the model name, and failed in router mode. This patch adds the `model` parameter similar to the implementation for other backends. * fix: migrate vllm and aphrodite to new payload field --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
@@ -954,8 +954,7 @@ function getTextgenAPITokenizationParams(str) {
|
||||
text: str,
|
||||
api_type: textgen_settings.type,
|
||||
url: getTextGenServer(),
|
||||
vllm_model: textgen_settings.vllm_model,
|
||||
aphrodite_model: textgen_settings.aphrodite_model,
|
||||
model: getTextGenModel(),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1077,8 +1077,7 @@ router.post('/remote/textgenerationwebui/encode', async function (request, respo
|
||||
}
|
||||
const text = String(request.body.text) || '';
|
||||
const baseUrl = String(request.body.url);
|
||||
const vllmModel = String(request.body.vllm_model) || '';
|
||||
const aphroditeModel = String(request.body.aphrodite_model) || '';
|
||||
const model = String(request.body.model) || '';
|
||||
|
||||
try {
|
||||
const args = {
|
||||
@@ -1102,15 +1101,15 @@ router.post('/remote/textgenerationwebui/encode', async function (request, respo
|
||||
break;
|
||||
case TEXTGEN_TYPES.LLAMACPP:
|
||||
url += '/tokenize';
|
||||
args.body = JSON.stringify({ 'content': text });
|
||||
args.body = JSON.stringify({ 'model': model, 'content': text });
|
||||
break;
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
url += '/tokenize';
|
||||
args.body = JSON.stringify({ 'model': vllmModel, 'prompt': text });
|
||||
args.body = JSON.stringify({ 'model': model, 'prompt': text });
|
||||
break;
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
url += '/v1/tokenize';
|
||||
args.body = JSON.stringify({ 'model': aphroditeModel, 'prompt': text });
|
||||
args.body = JSON.stringify({ 'model': model, 'prompt': text });
|
||||
break;
|
||||
default:
|
||||
url += '/v1/internal/encode';
|
||||
|
||||
Reference in New Issue
Block a user