Add model selection support for llama.cpp router mode (#4910)

* Add model selection support for llama.cpp router mode

- Add llamacpp_model setting to textgen-settings.js
- Implement loadLlamaCppModels() function to fetch and populate models
- Add onLlamaCppModelSelect() handler for model selection
- Update status check to load llama.cpp models when connecting
- Update getTextGenModel() to return selected llama.cpp model
- Add model dropdown to HTML UI in llama.cpp section
- Initialize event handlers and Select2 for better UX
- Add llamacpp_model to preset manager for save/load support
- Add llamacpp_model to slash commands support

This implements model selection for llama.cpp router mode, allowing
users to select from multiple models without restarting the server.
Follows the same pattern as Ollama, Tabby, and vLLM implementations.

* Correct spelling

* Fix clear selection position

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
my-alt
2025-12-22 04:51:44 +08:00
committed by GitHub
parent 39cc1b9039
commit 7f98e3e213
6 changed files with 66 additions and 1 deletions
+5
View File
@@ -16,6 +16,11 @@
user-select: none;
}
.select2-container .select2-selection.select2-selection--single.select2-selection--clearable .select2-selection__clear {
top: 0;
right: 25px;
}
.select2-container .select2-selection .select2-selection__clear {
color: var(--SmartThemeBodyColor);
font-size: 20px;
+10
View File
@@ -2679,6 +2679,16 @@
<small data-i18n="Example: http://127.0.0.1:8080">Example: http://127.0.0.1:8080</small>
<input id="llamacpp_api_url_text" class="text_pole wide100p" value="" autocomplete="off" data-server-history="llamacpp">
</div>
<div>
<h4>
<span data-i18n="llama.cpp Model">llama.cpp Model</span>
</h4>
<select id="llamacpp_model">
<option value="" data-i18n="-- Connect to the API --">
-- Connect to the API --
</option>
</select>
</div>
</div>
<div data-tg-type="ollama">
<div class="flex-container flexFlowColumn">
+1
View File
@@ -701,6 +701,7 @@ class PresetManager {
'ollama_model',
'vllm_model',
'aphrodite_model',
'llamacpp_model',
'server_urls',
'type',
'custom_model',
+1
View File
@@ -4834,6 +4834,7 @@ function getModelOptions(quiet) {
{ id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
{ id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
{ id: 'tabby_model', api: 'textgenerationwebui', type: textgen_types.TABBY },
{ id: 'llamacpp_model', api: 'textgenerationwebui', type: textgen_types.LLAMACPP },
{ id: 'featherless_model', api: 'textgenerationwebui', type: textgen_types.FEATHERLESS },
{ id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },
{ id: 'model_claude_select', api: 'openai', type: chat_completion_sources.CLAUDE },
+39
View File
@@ -17,6 +17,7 @@ let vllmModels = [];
let aphroditeModels = [];
let featherlessModels = [];
let tabbyModels = [];
let llamacppModels = [];
export let openRouterModels = [];
/**
@@ -139,6 +140,30 @@ export async function loadTabbyModels(data) {
}
}
export async function loadLlamaCppModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid llama.cpp models data', data);
return;
}
llamacppModels = data;
llamacppModels.sort((a, b) => a.id.localeCompare(b.id));
llamacppModels.unshift({ id: '' });
if (!llamacppModels.find(x => x.id === textgen_settings.llamacpp_model)) {
textgen_settings.llamacpp_model = llamacppModels[0]?.id || '';
}
$('#llamacpp_model').empty();
for (const model of llamacppModels) {
const option = document.createElement('option');
option.value = model.id;
option.text = model.id;
option.selected = model.id === textgen_settings.llamacpp_model;
$('#llamacpp_model').append(option);
}
}
export async function loadTogetherAIModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid Together AI models data', data);
@@ -637,6 +662,12 @@ function onTabbyModelSelect() {
$('#api_button_textgenerationwebui').trigger('click');
}
function onLlamaCppModelSelect() {
const modelId = String($('#llamacpp_model').val());
textgen_settings.llamacpp_model = modelId;
$('#api_button_textgenerationwebui').trigger('click');
}
function onOpenRouterModelSelect() {
const modelId = String($('#openrouter_model').val());
textgen_settings.openrouter_model = modelId;
@@ -952,6 +983,7 @@ export function initTextGenModels() {
$('#aphrodite_model').on('change', onAphroditeModelSelect);
$('#tabby_download_model').on('click', downloadTabbyModel);
$('#tabby_model').on('change', onTabbyModelSelect);
$('#llamacpp_model').on('change', onLlamaCppModelSelect);
$('#featherless_model').on('change', () => onFeatherlessModelSelect(String($('#featherless_model').val())));
const providersSelect = $('.openrouter_providers');
@@ -990,6 +1022,13 @@ export function initTextGenModels() {
width: '100%',
allowClear: true,
});
$('#llamacpp_model').select2({
placeholder: t`[Currently loaded]`,
searchInputPlaceholder: t`Search models...`,
searchInputCssClass: 'text_pole',
width: '100%',
allowClear: true,
});
$('#model_infermaticai_select').select2({
placeholder: t`Select a model`,
searchInputPlaceholder: t`Search models...`,
+10 -1
View File
@@ -23,7 +23,7 @@ import { power_user, registerDebugFunction } from './power-user.js';
import { getActiveManualApiSamplers, loadApiSelectedSamplers, isSamplerManualPriorityEnabled } from './samplerSelect.js';
import { SECRET_KEYS, writeSecret } from './secrets.js';
import { getEventSourceStream } from './sse-stream.js';
import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, loadAphroditeModels, loadDreamGenModels, loadFeatherlessModels, loadGenericModels, loadInfermaticAIModels, loadMancerModels, loadOllamaModels, loadOpenRouterModels, loadTabbyModels, loadTogetherAIModels, loadVllmModels } from './textgen-models.js';
import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, loadAphroditeModels, loadDreamGenModels, loadFeatherlessModels, loadGenericModels, loadInfermaticAIModels, loadLlamaCppModels, loadMancerModels, loadOllamaModels, loadOpenRouterModels, loadTabbyModels, loadTogetherAIModels, loadVllmModels } from './textgen-models.js';
import { ENCODE_TOKENIZERS, TEXTGEN_TOKENIZERS, TOKENIZER_SUPPORTED_KEY, getTextTokens, tokenizers } from './tokenizers.js';
import { AbortReason } from './util/AbortReason.js';
import { getSortableDelay, onlyUnique, arraysEqual, isObject } from './utils.js';
@@ -214,6 +214,7 @@ export const textgenerationwebui_settings = {
aphrodite_model: '',
dreamgen_model: 'lucid-v1-extra-large/text',
tabby_model: '',
llamacpp_model: '',
sampler_order: KOBOLDCPP_ORDER,
logit_bias: [],
n: 1,
@@ -701,6 +702,9 @@ async function getStatusTextgen() {
} else if (textgenerationwebui_settings.type === textgen_types.TABBY) {
loadTabbyModels(data?.data);
setOnlineStatus(textgenerationwebui_settings.tabby_model || data?.result);
} else if (textgenerationwebui_settings.type === textgen_types.LLAMACPP) {
loadLlamaCppModels(data?.data);
setOnlineStatus(textgenerationwebui_settings.llamacpp_model || data?.result || t`Connected`);
} else if (textgenerationwebui_settings.type === textgen_types.GENERIC) {
loadGenericModels(data?.data);
setOnlineStatus(textgenerationwebui_settings.generic_model || data?.result || t`Connected`);
@@ -1462,6 +1466,11 @@ export function getTextGenModel(settings = null) {
return settings.tabby_model;
}
break;
case LLAMACPP:
if (settings.llamacpp_model) {
return settings.llamacpp_model;
}
break;
default:
return undefined;
}