"N" support for llama.cpp (#4869)

* llama.cpp supports 'n' now * Fix response parsing --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
2025-12-08 12:50:34 -08:00
parent 9aff57c9c4
commit b5e20f2ff9
4 changed files with 43 additions and 14 deletions
@@ -1249,7 +1249,7 @@
                                        <div class="fa-solid fa-circle-info opacity50p" title="Customize displayed samplers or add custom samplers." data-i18n="[title]Customize displayed samplers or add custom samplers."></div>
                                    </small>
                                </div>
-                                <div data-tg-type="mancer, vllm, aphrodite, tabby, infermaticai" data-tg-samplers="n" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
+                                <div data-tg-type="mancer, vllm, aphrodite, tabby, infermaticai, llamacpp" data-tg-samplers="n" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
                                    <small data-i18n="Multiple swipes per generation">Multiple swipes per generation</small>
                                    <input type="number" id="n_textgenerationwebui" class="text_pole textAlignCenter" min="1" value="1" step="1" />
                                </div>
@@ -5872,7 +5872,7 @@ export function extractMessageFromData(data, activeApi = null) {
            case 'koboldhorde':
                return data.text;
            case 'textgenerationwebui':
-                return data.choices?.[0]?.text ?? data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? '';
+                return data.choices?.[0]?.text ?? data.choices?.[0]?.message?.content ?? data.content ?? data.response ?? data[0]?.content ?? '';
            case 'novel':
                return data.output;
            case 'openai':
@@ -5959,6 +5959,22 @@ function extractMultiSwipes(data, type) {
        return swipes;
    }

+    if (main_api === 'textgenerationwebui' && textgen_settings.type === textgen_types.LLAMACPP) {
+        if (!Array.isArray(data)) {
+            return swipes;
+        }
+
+        const multiSwipeCount = data.length - 1;
+        if (multiSwipeCount <= 0) {
+            return swipes;
+        }
+
+        for (let i = 1; i < data.length; i++) {
+            const text = data?.[i]?.content ?? '';
+            swipes.push(text);
+        }
+    }
+
    if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [textgen_types.MANCER, textgen_types.VLLM, textgen_types.APHRODITE, textgen_types.TABBY, textgen_types.INFERMATICAI].includes(textgen_settings.type))) {
        if (!Array.isArray(data.choices)) {
            return swipes;
@@ -5972,18 +5988,18 @@ function extractMultiSwipes(data, type) {

        for (let i = 1; i < data.choices.length; i++) {
            const text = data?.choices[i]?.message?.content ?? data?.choices[i]?.text ?? '';
-            const cleanedText = cleanUpMessage({
-                getMessage: text,
-                isImpersonate: false,
-                isContinue: false,
-                displayIncompleteSentences: false,
-            });
-
-            swipes.push(cleanedText);
+            swipes.push(text);
        }
    }

-    return swipes;
+    const cleanedSwipes = swipes.map(text => cleanUpMessage({
+        getMessage: text,
+        isImpersonate: false,
+        isContinue: false,
+        displayIncompleteSentences: false,
+    }));
+
+    return cleanedSwipes;
 }

 /**
@@ -1,6 +1,9 @@
 import { power_user } from './power-user.js';
 import { delay } from './utils.js';

+// Symbol for not primary swipe error
+const NOT_PRIMARY = Symbol('not_primary_swipe');
+
 /**
 * A stream which handles Server-Sent Events from a binary ReadableStream like you get from the fetch API.
 */
@@ -198,6 +201,10 @@ async function* parseStreamData(json) {
    }
    // llama.cpp?
    else if (typeof json.content === 'string' && json.content.length > 0 && json.object !== 'chat.completion.chunk') {
+        const isNotPrimary = json?.index > 0;
+        if (isNotPrimary) {
+            throw new Error('Not a primary swipe', { cause: NOT_PRIMARY });
+        }
        for (let i = 0; i < json.content.length; i++) {
            const str = json.content[i];
            yield {
@@ -211,7 +218,7 @@ async function* parseStreamData(json) {
    else if (Array.isArray(json.choices)) {
        const isNotPrimary = json?.choices?.[0]?.index > 0;
        if (isNotPrimary || json.choices.length === 0) {
-            throw new Error('Not a primary swipe');
+            throw new Error('Not a primary swipe', { cause: NOT_PRIMARY });
        }

        if (typeof json.choices[0].text === 'string' && json.choices[0].text.length > 0) {
@@ -357,7 +364,9 @@ export class SmoothEventSourceStream extends EventSourceStream {
                        lastStr = parsed.chunk;
                    }
                } catch (error) {
-                    console.debug('Smooth Streaming parsing error', error);
+                    if (error instanceof Error && error.cause !== NOT_PRIMARY) {
+                        console.debug('Smooth Streaming parsing error', error);
+                    }
                    controller.enqueue(event);
                }
            },
@@ -1284,6 +1284,10 @@ export async function generateTextGenWithStreaming(generate_data, signal) {
            if (data?.choices?.[0]?.index > 0) {
                const swipeIndex = data.choices[0].index - 1;
                swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text;
+            } else if (data?.index > 0) {
+                // llama.cpp streaming swipe
+                const swipeIndex = data.index - 1;
+                swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.content;
            } else {
                const newText = data?.choices?.[0]?.text || data?.content || '';
                text += newText;
@@ -1724,7 +1728,7 @@ export function createTextGenGenerationData(settings, model, finalPrompt = null,
        params.dry_sequence_breakers = params.parseSequenceBreakers();
    }

-    if (settings.type === TABBY) {
+    if (settings.type === TABBY || settings.type === LLAMACPP) {
        params.n = canMultiSwipe ? settings.n : 1;
    }