From 7f98e3e21305ea41a917a2f4d10bc0d69a78b7c5 Mon Sep 17 00:00:00 2001
From: my-alt <250851737+my-alt-acct@users.noreply.github.com>
Date: Mon, 22 Dec 2025 04:51:44 +0800
Subject: [PATCH] Add model selection support for llama.cpp router mode (#4910)

* Add model selection support for llama.cpp router mode

- Add llamacpp_model setting to textgen-settings.js
- Implement loadLlamaCppModels() function to fetch and populate models
- Add onLlamaCppModelSelect() handler for model selection
- Update status check to load llama.cpp models when connecting
- Update getTextGenModel() to return selected llama.cpp model
- Add model dropdown to HTML UI in llama.cpp section
- Initialize event handlers and Select2 for better UX
- Add llamacpp_model to preset manager for save/load support
- Add llamacpp_model to slash commands support

This implements model selection for llama.cpp router mode, allowing
users to select from multiple models without restarting the server.
Follows the same pattern as Ollama, Tabby, and vLLM implementations.

* Correct spelling

* Fix clear selection position

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
---
 public/css/select2-overrides.css   |  5 ++++
 public/index.html                  | 10 ++++++++
 public/scripts/preset-manager.js   |  1 +
 public/scripts/slash-commands.js   |  1 +
 public/scripts/textgen-models.js   | 39 ++++++++++++++++++++++++++++++
 public/scripts/textgen-settings.js | 11 ++++++++-
 6 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/public/css/select2-overrides.css b/public/css/select2-overrides.css
index 8b11fbac6..ab202d761 100644
--- a/public/css/select2-overrides.css
+++ b/public/css/select2-overrides.css
@@ -16,6 +16,11 @@
     user-select: none;
 }
 
+.select2-container .select2-selection.select2-selection--single.select2-selection--clearable .select2-selection__clear {
+    top: 0;
+    right: 25px;
+}
+
 .select2-container .select2-selection .select2-selection__clear {
     color: var(--SmartThemeBodyColor);
     font-size: 20px;
diff --git a/public/index.html b/public/index.html
index c832b0cc6..57246df3e 100644
--- a/public/index.html
+++ b/public/index.html
@@ -2679,6 +2679,16 @@
                                     <small data-i18n="Example: http://127.0.0.1:8080">Example: http://127.0.0.1:8080</small>
                                     <input id="llamacpp_api_url_text" class="text_pole wide100p" value="" autocomplete="off" data-server-history="llamacpp">
                                 </div>
+                                <div>
+                                    <h4>
+                                        <span data-i18n="llama.cpp Model">llama.cpp Model</span>
+                                    </h4>
+                                    <select id="llamacpp_model">
+                                        <option value="" data-i18n="-- Connect to the API --">
+                                            -- Connect to the API --
+                                        </option>
+                                    </select>
+                                </div>
                             </div>
                             <div data-tg-type="ollama">
                                 <div class="flex-container flexFlowColumn">
diff --git a/public/scripts/preset-manager.js b/public/scripts/preset-manager.js
index 3608e40d3..8769ab094 100644
--- a/public/scripts/preset-manager.js
+++ b/public/scripts/preset-manager.js
@@ -701,6 +701,7 @@ class PresetManager {
             'ollama_model',
             'vllm_model',
             'aphrodite_model',
+            'llamacpp_model',
             'server_urls',
             'type',
             'custom_model',
diff --git a/public/scripts/slash-commands.js b/public/scripts/slash-commands.js
index e038b19cc..3514e717a 100644
--- a/public/scripts/slash-commands.js
+++ b/public/scripts/slash-commands.js
@@ -4834,6 +4834,7 @@ function getModelOptions(quiet) {
         { id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
         { id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
         { id: 'tabby_model', api: 'textgenerationwebui', type: textgen_types.TABBY },
+        { id: 'llamacpp_model', api: 'textgenerationwebui', type: textgen_types.LLAMACPP },
         { id: 'featherless_model', api: 'textgenerationwebui', type: textgen_types.FEATHERLESS },
         { id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },
         { id: 'model_claude_select', api: 'openai', type: chat_completion_sources.CLAUDE },
diff --git a/public/scripts/textgen-models.js b/public/scripts/textgen-models.js
index fe16efa62..0d1358870 100644
--- a/public/scripts/textgen-models.js
+++ b/public/scripts/textgen-models.js
@@ -17,6 +17,7 @@ let vllmModels = [];
 let aphroditeModels = [];
 let featherlessModels = [];
 let tabbyModels = [];
+let llamacppModels = [];
 export let openRouterModels = [];
 
 /**
@@ -139,6 +140,30 @@ export async function loadTabbyModels(data) {
     }
 }
 
+export async function loadLlamaCppModels(data) {
+    if (!Array.isArray(data)) {
+        console.error('Invalid llama.cpp models data', data);
+        return;
+    }
+
+    llamacppModels = data;
+    llamacppModels.sort((a, b) => a.id.localeCompare(b.id));
+    llamacppModels.unshift({ id: '' });
+
+    if (!llamacppModels.find(x => x.id === textgen_settings.llamacpp_model)) {
+        textgen_settings.llamacpp_model = llamacppModels[0]?.id || '';
+    }
+
+    $('#llamacpp_model').empty();
+    for (const model of llamacppModels) {
+        const option = document.createElement('option');
+        option.value = model.id;
+        option.text = model.id;
+        option.selected = model.id === textgen_settings.llamacpp_model;
+        $('#llamacpp_model').append(option);
+    }
+}
+
 export async function loadTogetherAIModels(data) {
     if (!Array.isArray(data)) {
         console.error('Invalid Together AI models data', data);
@@ -637,6 +662,12 @@ function onTabbyModelSelect() {
     $('#api_button_textgenerationwebui').trigger('click');
 }
 
+function onLlamaCppModelSelect() {
+    const modelId = String($('#llamacpp_model').val());
+    textgen_settings.llamacpp_model = modelId;
+    $('#api_button_textgenerationwebui').trigger('click');
+}
+
 function onOpenRouterModelSelect() {
     const modelId = String($('#openrouter_model').val());
     textgen_settings.openrouter_model = modelId;
@@ -952,6 +983,7 @@ export function initTextGenModels() {
     $('#aphrodite_model').on('change', onAphroditeModelSelect);
     $('#tabby_download_model').on('click', downloadTabbyModel);
     $('#tabby_model').on('change', onTabbyModelSelect);
+    $('#llamacpp_model').on('change', onLlamaCppModelSelect);
     $('#featherless_model').on('change', () => onFeatherlessModelSelect(String($('#featherless_model').val())));
 
     const providersSelect = $('.openrouter_providers');
@@ -990,6 +1022,13 @@ export function initTextGenModels() {
             width: '100%',
             allowClear: true,
         });
+        $('#llamacpp_model').select2({
+            placeholder: t`[Currently loaded]`,
+            searchInputPlaceholder: t`Search models...`,
+            searchInputCssClass: 'text_pole',
+            width: '100%',
+            allowClear: true,
+        });
         $('#model_infermaticai_select').select2({
             placeholder: t`Select a model`,
             searchInputPlaceholder: t`Search models...`,
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index d9ae3e73a..4d9be2207 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -23,7 +23,7 @@ import { power_user, registerDebugFunction } from './power-user.js';
 import { getActiveManualApiSamplers, loadApiSelectedSamplers, isSamplerManualPriorityEnabled } from './samplerSelect.js';
 import { SECRET_KEYS, writeSecret } from './secrets.js';
 import { getEventSourceStream } from './sse-stream.js';
-import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, loadAphroditeModels, loadDreamGenModels, loadFeatherlessModels, loadGenericModels, loadInfermaticAIModels, loadMancerModels, loadOllamaModels, loadOpenRouterModels, loadTabbyModels, loadTogetherAIModels, loadVllmModels } from './textgen-models.js';
+import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, loadAphroditeModels, loadDreamGenModels, loadFeatherlessModels, loadGenericModels, loadInfermaticAIModels, loadLlamaCppModels, loadMancerModels, loadOllamaModels, loadOpenRouterModels, loadTabbyModels, loadTogetherAIModels, loadVllmModels } from './textgen-models.js';
 import { ENCODE_TOKENIZERS, TEXTGEN_TOKENIZERS, TOKENIZER_SUPPORTED_KEY, getTextTokens, tokenizers } from './tokenizers.js';
 import { AbortReason } from './util/AbortReason.js';
 import { getSortableDelay, onlyUnique, arraysEqual, isObject } from './utils.js';
@@ -214,6 +214,7 @@ export const textgenerationwebui_settings = {
     aphrodite_model: '',
     dreamgen_model: 'lucid-v1-extra-large/text',
     tabby_model: '',
+    llamacpp_model: '',
     sampler_order: KOBOLDCPP_ORDER,
     logit_bias: [],
     n: 1,
@@ -701,6 +702,9 @@ async function getStatusTextgen() {
         } else if (textgenerationwebui_settings.type === textgen_types.TABBY) {
             loadTabbyModels(data?.data);
             setOnlineStatus(textgenerationwebui_settings.tabby_model || data?.result);
+        } else if (textgenerationwebui_settings.type === textgen_types.LLAMACPP) {
+            loadLlamaCppModels(data?.data);
+            setOnlineStatus(textgenerationwebui_settings.llamacpp_model || data?.result || t`Connected`);
         } else if (textgenerationwebui_settings.type === textgen_types.GENERIC) {
             loadGenericModels(data?.data);
             setOnlineStatus(textgenerationwebui_settings.generic_model || data?.result || t`Connected`);
@@ -1462,6 +1466,11 @@ export function getTextGenModel(settings = null) {
                 return settings.tabby_model;
             }
             break;
+        case LLAMACPP:
+            if (settings.llamacpp_model) {
+                return settings.llamacpp_model;
+            }
+            break;
         default:
             return undefined;
     }