Add model selection support for llama.cpp router mode (#4910)

* Add model selection support for llama.cpp router mode - Add llamacpp_model setting to textgen-settings.js - Implement loadLlamaCppModels() function to fetch and populate models - Add onLlamaCppModelSelect() handler for model selection - Update status check to load llama.cpp models when connecting - Update getTextGenModel() to return selected llama.cpp model - Add model dropdown to HTML UI in llama.cpp section - Initialize event handlers and Select2 for better UX - Add llamacpp_model to preset manager for save/load support - Add llamacpp_model to slash commands support This implements model selection for llama.cpp router mode, allowing users to select from multiple models without restarting the server. Follows the same pattern as Ollama, Tabby, and vLLM implementations. * Correct spelling * Fix clear selection position --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
2025-12-22 04:51:44 +08:00
parent 39cc1b9039
commit 7f98e3e213
6 changed files with 66 additions and 1 deletions
@@ -16,6 +16,11 @@
    user-select: none;
 }

+.select2-container .select2-selection.select2-selection--single.select2-selection--clearable .select2-selection__clear {
+    top: 0;
+    right: 25px;
+}
+
 .select2-container .select2-selection .select2-selection__clear {
    color: var(--SmartThemeBodyColor);
    font-size: 20px;
@@ -2679,6 +2679,16 @@
                                    <small data-i18n="Example: http://127.0.0.1:8080">Example: http://127.0.0.1:8080</small>
                                    <input id="llamacpp_api_url_text" class="text_pole wide100p" value="" autocomplete="off" data-server-history="llamacpp">
                                </div>
+                                <div>
+                                    <h4>
+                                        <span data-i18n="llama.cpp Model">llama.cpp Model</span>
+                                    </h4>
+                                    <select id="llamacpp_model">
+                                        <option value="" data-i18n="-- Connect to the API --">
+                                            -- Connect to the API --
+                                        </option>
+                                    </select>
+                                </div>
                            </div>
                            <div data-tg-type="ollama">
                                <div class="flex-container flexFlowColumn">
@@ -701,6 +701,7 @@ class PresetManager {
            'ollama_model',
            'vllm_model',
            'aphrodite_model',
+            'llamacpp_model',
            'server_urls',
            'type',
            'custom_model',
@@ -4834,6 +4834,7 @@ function getModelOptions(quiet) {
        { id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
        { id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
        { id: 'tabby_model', api: 'textgenerationwebui', type: textgen_types.TABBY },
+        { id: 'llamacpp_model', api: 'textgenerationwebui', type: textgen_types.LLAMACPP },
        { id: 'featherless_model', api: 'textgenerationwebui', type: textgen_types.FEATHERLESS },
        { id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },
        { id: 'model_claude_select', api: 'openai', type: chat_completion_sources.CLAUDE },
@@ -17,6 +17,7 @@ let vllmModels = [];
 let aphroditeModels = [];
 let featherlessModels = [];
 let tabbyModels = [];
+let llamacppModels = [];
 export let openRouterModels = [];

 /**
@@ -139,6 +140,30 @@ export async function loadTabbyModels(data) {
    }
 }

+export async function loadLlamaCppModels(data) {
+    if (!Array.isArray(data)) {
+        console.error('Invalid llama.cpp models data', data);
+        return;
+    }
+
+    llamacppModels = data;
+    llamacppModels.sort((a, b) => a.id.localeCompare(b.id));
+    llamacppModels.unshift({ id: '' });
+
+    if (!llamacppModels.find(x => x.id === textgen_settings.llamacpp_model)) {
+        textgen_settings.llamacpp_model = llamacppModels[0]?.id || '';
+    }
+
+    $('#llamacpp_model').empty();
+    for (const model of llamacppModels) {
+        const option = document.createElement('option');
+        option.value = model.id;
+        option.text = model.id;
+        option.selected = model.id === textgen_settings.llamacpp_model;
+        $('#llamacpp_model').append(option);
+    }
+}
+
 export async function loadTogetherAIModels(data) {
    if (!Array.isArray(data)) {
        console.error('Invalid Together AI models data', data);
@@ -637,6 +662,12 @@ function onTabbyModelSelect() {
    $('#api_button_textgenerationwebui').trigger('click');
 }

+function onLlamaCppModelSelect() {
+    const modelId = String($('#llamacpp_model').val());
+    textgen_settings.llamacpp_model = modelId;
+    $('#api_button_textgenerationwebui').trigger('click');
+}
+
 function onOpenRouterModelSelect() {
    const modelId = String($('#openrouter_model').val());
    textgen_settings.openrouter_model = modelId;
@@ -952,6 +983,7 @@ export function initTextGenModels() {
    $('#aphrodite_model').on('change', onAphroditeModelSelect);
    $('#tabby_download_model').on('click', downloadTabbyModel);
    $('#tabby_model').on('change', onTabbyModelSelect);
+    $('#llamacpp_model').on('change', onLlamaCppModelSelect);
    $('#featherless_model').on('change', () => onFeatherlessModelSelect(String($('#featherless_model').val())));

    const providersSelect = $('.openrouter_providers');
@@ -990,6 +1022,13 @@ export function initTextGenModels() {
            width: '100%',
            allowClear: true,
        });
+        $('#llamacpp_model').select2({
+            placeholder: t`[Currently loaded]`,
+            searchInputPlaceholder: t`Search models...`,
+            searchInputCssClass: 'text_pole',
+            width: '100%',
+            allowClear: true,
+        });
        $('#model_infermaticai_select').select2({
            placeholder: t`Select a model`,
            searchInputPlaceholder: t`Search models...`,
@@ -23,7 +23,7 @@ import { power_user, registerDebugFunction } from './power-user.js';
 import { getActiveManualApiSamplers, loadApiSelectedSamplers, isSamplerManualPriorityEnabled } from './samplerSelect.js';
 import { SECRET_KEYS, writeSecret } from './secrets.js';
 import { getEventSourceStream } from './sse-stream.js';
-import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, loadAphroditeModels, loadDreamGenModels, loadFeatherlessModels, loadGenericModels, loadInfermaticAIModels, loadMancerModels, loadOllamaModels, loadOpenRouterModels, loadTabbyModels, loadTogetherAIModels, loadVllmModels } from './textgen-models.js';
+import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, loadAphroditeModels, loadDreamGenModels, loadFeatherlessModels, loadGenericModels, loadInfermaticAIModels, loadLlamaCppModels, loadMancerModels, loadOllamaModels, loadOpenRouterModels, loadTabbyModels, loadTogetherAIModels, loadVllmModels } from './textgen-models.js';
 import { ENCODE_TOKENIZERS, TEXTGEN_TOKENIZERS, TOKENIZER_SUPPORTED_KEY, getTextTokens, tokenizers } from './tokenizers.js';
 import { AbortReason } from './util/AbortReason.js';
 import { getSortableDelay, onlyUnique, arraysEqual, isObject } from './utils.js';
@@ -214,6 +214,7 @@ export const textgenerationwebui_settings = {
    aphrodite_model: '',
    dreamgen_model: 'lucid-v1-extra-large/text',
    tabby_model: '',
+    llamacpp_model: '',
    sampler_order: KOBOLDCPP_ORDER,
    logit_bias: [],
    n: 1,
@@ -701,6 +702,9 @@ async function getStatusTextgen() {
        } else if (textgenerationwebui_settings.type === textgen_types.TABBY) {
            loadTabbyModels(data?.data);
            setOnlineStatus(textgenerationwebui_settings.tabby_model || data?.result);
+        } else if (textgenerationwebui_settings.type === textgen_types.LLAMACPP) {
+            loadLlamaCppModels(data?.data);
+            setOnlineStatus(textgenerationwebui_settings.llamacpp_model || data?.result || t`Connected`);
        } else if (textgenerationwebui_settings.type === textgen_types.GENERIC) {
            loadGenericModels(data?.data);
            setOnlineStatus(textgenerationwebui_settings.generic_model || data?.result || t`Connected`);
@@ -1462,6 +1466,11 @@ export function getTextGenModel(settings = null) {
                return settings.tabby_model;
            }
            break;
+        case LLAMACPP:
+            if (settings.llamacpp_model) {
+                return settings.llamacpp_model;
+            }
+            break;
        default:
            return undefined;
    }