"N" support for llama.cpp (#4869)

* llama.cpp supports 'n' now

* Fix response parsing

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
Beinsezii
2025-12-08 12:50:34 -08:00
committed by GitHub
parent 9aff57c9c4
commit b5e20f2ff9
4 changed files with 43 additions and 14 deletions
+1 -1
View File
@@ -1249,7 +1249,7 @@
<div class="fa-solid fa-circle-info opacity50p" title="Customize displayed samplers or add custom samplers." data-i18n="[title]Customize displayed samplers or add custom samplers."></div>
</small>
</div>
<div data-tg-type="mancer, vllm, aphrodite, tabby, infermaticai" data-tg-samplers="n" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
<div data-tg-type="mancer, vllm, aphrodite, tabby, infermaticai, llamacpp" data-tg-samplers="n" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
<small data-i18n="Multiple swipes per generation">Multiple swipes per generation</small>
<input type="number" id="n_textgenerationwebui" class="text_pole textAlignCenter" min="1" value="1" step="1" />
</div>
+26 -10
View File
@@ -5872,7 +5872,7 @@ export function extractMessageFromData(data, activeApi = null) {
case 'koboldhorde':
return data.text;
case 'textgenerationwebui':
return data.choices?.[0]?.text ?? data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? '';
return data.choices?.[0]?.text ?? data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data[0]?.content ?? '';
case 'novel':
return data.output;
case 'openai':
@@ -5959,6 +5959,22 @@ function extractMultiSwipes(data, type) {
return swipes;
}
if (main_api === 'textgenerationwebui' && textgen_settings.type === textgen_types.LLAMACPP) {
if (!Array.isArray(data)) {
return swipes;
}
const multiSwipeCount = data.length - 1;
if (multiSwipeCount <= 0) {
return swipes;
}
for (let i = 1; i < data.length; i++) {
const text = data?.[i]?.content ?? '';
swipes.push(text);
}
}
if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [textgen_types.MANCER, textgen_types.VLLM, textgen_types.APHRODITE, textgen_types.TABBY, textgen_types.INFERMATICAI].includes(textgen_settings.type))) {
if (!Array.isArray(data.choices)) {
return swipes;
@@ -5972,18 +5988,18 @@ function extractMultiSwipes(data, type) {
for (let i = 1; i < data.choices.length; i++) {
const text = data?.choices[i]?.message?.content ?? data?.choices[i]?.text ?? '';
const cleanedText = cleanUpMessage({
getMessage: text,
isImpersonate: false,
isContinue: false,
displayIncompleteSentences: false,
});
swipes.push(cleanedText);
swipes.push(text);
}
}
return swipes;
const cleanedSwipes = swipes.map(text => cleanUpMessage({
getMessage: text,
isImpersonate: false,
isContinue: false,
displayIncompleteSentences: false,
}));
return cleanedSwipes;
}
/**
+11 -2
View File
@@ -1,6 +1,9 @@
import { power_user } from './power-user.js';
import { delay } from './utils.js';
// Symbol for not primary swipe error
const NOT_PRIMARY = Symbol('not_primary_swipe');
/**
* A stream which handles Server-Sent Events from a binary ReadableStream like you get from the fetch API.
*/
@@ -198,6 +201,10 @@ async function* parseStreamData(json) {
}
// llama.cpp?
else if (typeof json.content === 'string' && json.content.length > 0 && json.object !== 'chat.completion.chunk') {
const isNotPrimary = json?.index > 0;
if (isNotPrimary) {
throw new Error('Not a primary swipe', { cause: NOT_PRIMARY });
}
for (let i = 0; i < json.content.length; i++) {
const str = json.content[i];
yield {
@@ -211,7 +218,7 @@ async function* parseStreamData(json) {
else if (Array.isArray(json.choices)) {
const isNotPrimary = json?.choices?.[0]?.index > 0;
if (isNotPrimary || json.choices.length === 0) {
throw new Error('Not a primary swipe');
throw new Error('Not a primary swipe', { cause: NOT_PRIMARY });
}
if (typeof json.choices[0].text === 'string' && json.choices[0].text.length > 0) {
@@ -357,7 +364,9 @@ export class SmoothEventSourceStream extends EventSourceStream {
lastStr = parsed.chunk;
}
} catch (error) {
console.debug('Smooth Streaming parsing error', error);
if (error instanceof Error && error.cause !== NOT_PRIMARY) {
console.debug('Smooth Streaming parsing error', error);
}
controller.enqueue(event);
}
},
+5 -1
View File
@@ -1284,6 +1284,10 @@ export async function generateTextGenWithStreaming(generate_data, signal) {
if (data?.choices?.[0]?.index > 0) {
const swipeIndex = data.choices[0].index - 1;
swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text;
} else if (data?.index > 0) {
// llama.cpp streaming swipe
const swipeIndex = data.index - 1;
swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.content;
} else {
const newText = data?.choices?.[0]?.text || data?.content || '';
text += newText;
@@ -1724,7 +1728,7 @@ export function createTextGenGenerationData(settings, model, finalPrompt = null,
params.dry_sequence_breakers = params.parseSequenceBreakers();
}
if (settings.type === TABBY) {
if (settings.type === TABBY || settings.type === LLAMACPP) {
params.n = canMultiSwipe ? settings.n : 1;
}