feat(openrouter): add model quantizations setting (#5080)
* feat(openrouter): add model quantizations setting * Remove bogus setting * Simplify nullish coalescing assignment --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
@@ -2441,6 +2441,20 @@
|
||||
<span data-i18n="Allow fallback providers">Allow fallback providers</span>
|
||||
</label>
|
||||
</div>
|
||||
<div>
|
||||
<h4 data-i18n="Model Quantizations">Model Quantizations</h4>
|
||||
<select id="openrouter_quantizations_text" class="openrouter_quantizations" multiple>
|
||||
<option data-i18n="Integer (4 bit)" value="int4">Integer (4 bit)</option>
|
||||
<option data-i18n="Integer (8 bit)" value="int8">Integer (8 bit)</option>
|
||||
<option data-i18n="Floating point (4 bit)" value="fp4">Floating point (4 bit)</option>
|
||||
<option data-i18n="Floating point (6 bit)" value="fp6">Floating point (6 bit)</option>
|
||||
<option data-i18n="Floating point (8 bit)" value="fp8">Floating point (8 bit)</option>
|
||||
<option data-i18n="Floating point (16 bit)" value="fp16">Floating point (16 bit)</option>
|
||||
<option data-i18n="Brain floating point (16 bit)" value="bf16">Brain floating point (16 bit)</option>
|
||||
<option data-i18n="Floating point (32 bit)" value="fp32">Floating point (32 bit)</option>
|
||||
<option data-i18n="Unknown" value="unknown">Unknown</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div data-tg-type="infermaticai" class="flex-container flexFlowColumn">
|
||||
<h4 data-i18n="InfermaticAI API Key">InfermaticAI API Key</h4>
|
||||
@@ -3183,6 +3197,20 @@
|
||||
<i class="fa-solid fa-lightbulb"></i>
|
||||
<span data-i18n="To use instruct formatting, switch to OpenRouter under Text Completion API.">To use instruct formatting, switch to OpenRouter under Text Completion API.</span>
|
||||
</small>
|
||||
<div>
|
||||
<h4 data-i18n="Model Quantizations">Model Quantizations</h4>
|
||||
<select id="openrouter_quantizations_chat" class="openrouter_quantizations" multiple>
|
||||
<option data-i18n="Integer (4 bit)" value="int4">Integer (4 bit)</option>
|
||||
<option data-i18n="Integer (8 bit)" value="int8">Integer (8 bit)</option>
|
||||
<option data-i18n="Floating point (4 bit)" value="fp4">Floating point (4 bit)</option>
|
||||
<option data-i18n="Floating point (6 bit)" value="fp6">Floating point (6 bit)</option>
|
||||
<option data-i18n="Floating point (8 bit)" value="fp8">Floating point (8 bit)</option>
|
||||
<option data-i18n="Floating point (16 bit)" value="fp16">Floating point (16 bit)</option>
|
||||
<option data-i18n="Brain floating point (16 bit)" value="bf16">Brain floating point (16 bit)</option>
|
||||
<option data-i18n="Floating point (32 bit)" value="fp32">Floating point (32 bit)</option>
|
||||
<option data-i18n="Unknown" value="unknown">Unknown</option>
|
||||
</select>
|
||||
</div>
|
||||
</form>
|
||||
<form id="ai21_form" data-source="ai21" action="javascript:void(null);" method="post" enctype="multipart/form-data">
|
||||
<h4 data-i18n="AI21 API Key">AI21 API Key</h4>
|
||||
|
||||
@@ -290,6 +290,8 @@
|
||||
"View Remaining Credits": "Afficher les crédits restants",
|
||||
"OpenRouter Model": "Modèle OpenRouter",
|
||||
"Model Providers": "Fournisseurs de modèles",
|
||||
"Model Quantizations": "Quantifications du modèle",
|
||||
"Select quantizations. No selection = all quantizations.": "Sélectionnez les quantifications. Aucune sélection = toutes les quantifications.",
|
||||
"InfermaticAI API Key": "Clé API InfermaticAI",
|
||||
"InfermaticAI Model": "Modèle InfermaticAI",
|
||||
"DreamGen API key": "Clé API DreamGen",
|
||||
|
||||
@@ -288,6 +288,7 @@ export const settingsToUpdate = {
|
||||
openrouter_group_models: ['#openrouter_group_models', 'openrouter_group_models', false, true],
|
||||
openrouter_sort_models: ['#openrouter_sort_models', 'openrouter_sort_models', false, true],
|
||||
openrouter_providers: ['#openrouter_providers_chat', 'openrouter_providers', false, true],
|
||||
openrouter_quantizations: ['#openrouter_quantizations_chat', 'openrouter_quantizations', false, true],
|
||||
openrouter_allow_fallbacks: ['#openrouter_allow_fallbacks', 'openrouter_allow_fallbacks', true, true],
|
||||
openrouter_middleout: ['#openrouter_middleout', 'openrouter_middleout', false, true],
|
||||
ai21_model: ['#model_ai21_select', 'ai21_model', false, true],
|
||||
@@ -434,6 +435,7 @@ const default_settings = {
|
||||
openrouter_group_models: false,
|
||||
openrouter_sort_models: 'alphabetically',
|
||||
openrouter_providers: [],
|
||||
openrouter_quantizations: [],
|
||||
openrouter_allow_fallbacks: true,
|
||||
openrouter_middleout: openrouter_middleout_types.ON,
|
||||
reverse_proxy: '',
|
||||
@@ -2628,6 +2630,7 @@ export async function createGenerationParameters(settings, model, type, messages
|
||||
generate_data.top_a = Number(settings.top_a_openai);
|
||||
generate_data.use_fallback = settings.openrouter_use_fallback;
|
||||
generate_data.provider = settings.openrouter_providers;
|
||||
generate_data.quantizations = settings.openrouter_quantizations;
|
||||
generate_data.allow_fallbacks = settings.openrouter_allow_fallbacks;
|
||||
generate_data.middleout = settings.openrouter_middleout;
|
||||
}
|
||||
@@ -4048,6 +4051,7 @@ function loadOpenAISettings(data, settings) {
|
||||
setContinuePostfixControls();
|
||||
|
||||
$('#openrouter_providers_chat').trigger('change');
|
||||
$('#openrouter_quantizations_chat').trigger('change');
|
||||
$('#chat_completion_source').trigger('change');
|
||||
}
|
||||
|
||||
@@ -4676,6 +4680,7 @@ function onSettingsPresetChange() {
|
||||
if (oai_settings.bind_preset_to_connection) {
|
||||
$('#chat_completion_source').trigger('change');
|
||||
$('#openrouter_providers_chat').trigger('change');
|
||||
$('#openrouter_quantizations_chat').trigger('change');
|
||||
}
|
||||
|
||||
$('#openai_logit_bias_preset').trigger('change');
|
||||
@@ -6805,6 +6810,19 @@ export function initOpenAI() {
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#openrouter_quantizations_chat').on('change', function () {
|
||||
const selectedQuantizations = $(this).val();
|
||||
|
||||
// Not a multiple select?
|
||||
if (!Array.isArray(selectedQuantizations)) {
|
||||
return;
|
||||
}
|
||||
|
||||
oai_settings.openrouter_quantizations = selectedQuantizations;
|
||||
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#bind_preset_to_connection').on('input', function () {
|
||||
oai_settings.bind_preset_to_connection = !!$(this).prop('checked');
|
||||
saveSettingsDebounced();
|
||||
|
||||
@@ -712,6 +712,7 @@ class PresetManager {
|
||||
'featherless_model',
|
||||
'max_tokens_second',
|
||||
'openrouter_providers',
|
||||
'openrouter_quantizations',
|
||||
'openrouter_allow_fallbacks',
|
||||
'tabby_model',
|
||||
'derived',
|
||||
|
||||
@@ -1065,6 +1065,13 @@ export function initTextGenModels() {
|
||||
width: '100%',
|
||||
templateResult: getAphroditeModelTemplate,
|
||||
});
|
||||
$('.openrouter_quantizations').select2({
|
||||
closeOnSelect: false,
|
||||
placeholder: t`Select quantizations. No selection = all quantizations.`,
|
||||
searchInputCssClass: 'text_pole',
|
||||
searchInputPlaceholder: t`Search quantizations...`,
|
||||
width: '100%',
|
||||
});
|
||||
providersSelect.select2({
|
||||
sorter: data => data.sort((a, b) => a.text.localeCompare(b.text)),
|
||||
placeholder: t`Select providers. No selection = all providers.`,
|
||||
|
||||
@@ -211,6 +211,7 @@ export const textgenerationwebui_settings = {
|
||||
ollama_model: '',
|
||||
openrouter_model: 'openrouter/auto',
|
||||
openrouter_providers: [],
|
||||
openrouter_quantizations: [],
|
||||
vllm_model: '',
|
||||
aphrodite_model: '',
|
||||
dreamgen_model: 'lucid-v1-extra-large/text',
|
||||
@@ -590,6 +591,7 @@ export async function loadTextGenSettings(data, loadedSettings) {
|
||||
|
||||
$('#textgen_type').val(textgenerationwebui_settings.type);
|
||||
$('#openrouter_providers_text').val(textgenerationwebui_settings.openrouter_providers).trigger('change');
|
||||
$('#openrouter_quantizations_text').val(textgenerationwebui_settings.openrouter_quantizations).trigger('change');
|
||||
showSamplerControls(textgenerationwebui_settings.type);
|
||||
BIAS_CACHE.delete(BIAS_KEY);
|
||||
displayLogitBias(textgenerationwebui_settings.logit_bias, BIAS_KEY);
|
||||
@@ -1073,6 +1075,19 @@ export function initTextGenSettings() {
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#openrouter_quantizations_text').on('change', function () {
|
||||
const selectedQuantizations = $(this).val();
|
||||
|
||||
// Not a multiple select?
|
||||
if (!Array.isArray(selectedQuantizations)) {
|
||||
return;
|
||||
}
|
||||
|
||||
textgenerationwebui_settings.openrouter_quantizations = selectedQuantizations;
|
||||
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#api_button_textgenerationwebui').on('click', async function (e) {
|
||||
const keys = [
|
||||
{ id: 'api_key_mancer', secret: SECRET_KEYS.MANCER },
|
||||
@@ -1735,6 +1750,7 @@ export function createTextGenGenerationData(settings, model, finalPrompt = null,
|
||||
|
||||
if (settings.type === OPENROUTER) {
|
||||
params.provider = settings.openrouter_providers;
|
||||
params.quantizations = settings.openrouter_quantizations;
|
||||
params.allow_fallbacks = settings.openrouter_allow_fallbacks;
|
||||
}
|
||||
|
||||
|
||||
@@ -2097,6 +2097,11 @@ router.post('/generate', async function (request, response) {
|
||||
};
|
||||
}
|
||||
|
||||
if (Array.isArray(request.body.quantizations) && request.body.quantizations.length > 0) {
|
||||
bodyParams['provider'] ??= {};
|
||||
bodyParams['provider']['quantizations'] = request.body.quantizations;
|
||||
}
|
||||
|
||||
if (request.body.use_fallback) {
|
||||
bodyParams['route'] = 'fallback';
|
||||
}
|
||||
|
||||
@@ -367,6 +367,12 @@ router.post('/generate', async function (request, response) {
|
||||
} else {
|
||||
delete request.body.provider;
|
||||
}
|
||||
|
||||
if (Array.isArray(request.body.quantizations) && request.body.quantizations.length > 0) {
|
||||
request.body.provider ??= {};
|
||||
request.body.provider.quantizations = request.body.quantizations;
|
||||
}
|
||||
|
||||
request.body = _.pickBy(request.body, (_, key) => OPENROUTER_KEYS.includes(key));
|
||||
args.body = JSON.stringify(request.body);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user