Use Ollama /api/embed endpoint for vector embeddings (#5221)

* Use Ollama /api/embed endpoint for vector embeddings

The deprecated /api/embeddings endpoint does not properly support the
truncate parameter, causing "input length exceeds context length" errors
when vectorizing files. Migrate to /api/embed which correctly handles
truncation and supports native batch input.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Wrap single Ollama vector calculation into batch
Fixes https://github.com/SillyTavern/SillyTavern/pull/5221/changes#r2850052729

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
shifusen329
2026-02-25 15:44:12 -06:00
committed by GitHub
parent 4fa37e52f7
commit d789efba07
+34 -38
View File
@@ -12,12 +12,39 @@ import { TEXTGEN_TYPES } from '../constants.js';
* @returns {Promise<number[][]>} - The array of vectors for the texts
*/
export async function getOllamaBatchVector(texts, apiUrl, model, keep, directories) {
const result = [];
for (const text of texts) {
const vector = await getOllamaVector(text, apiUrl, model, keep, directories);
result.push(vector);
const url = new URL(apiUrl);
url.pathname = '/api/embed';
const headers = {};
setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...headers,
},
body: JSON.stringify({
input: texts,
model: model,
keep_alive: keep ? -1 : undefined,
truncate: true,
}),
});
if (!response.ok) {
const responseText = await response.text();
throw new Error(`Ollama: Failed to get batch vectors: ${response.statusText} ${responseText}`);
}
return result;
/** @type {any} */
const data = await response.json();
if (!Array.isArray(data?.embeddings)) {
throw new Error('API response was not an array');
}
return data.embeddings;
}
/**
@@ -30,37 +57,6 @@ export async function getOllamaBatchVector(texts, apiUrl, model, keep, directori
* @returns {Promise<number[]>} - The vector for the text
*/
export async function getOllamaVector(text, apiUrl, model, keep, directories) {
const url = new URL(apiUrl);
url.pathname = '/api/embeddings';
const headers = {};
setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...headers,
},
body: JSON.stringify({
prompt: text,
model: model,
keep_alive: keep ? -1 : undefined,
truncate: true,
}),
});
if (!response.ok) {
const responseText = await response.text();
throw new Error(`Ollama: Failed to get vector for text: ${response.statusText} ${responseText}`);
}
/** @type {any} */
const data = await response.json();
if (!Array.isArray(data?.embedding)) {
throw new Error('API response was not an array');
}
return data.embedding;
const vectors = await getOllamaBatchVector([text], apiUrl, model, keep, directories);
return vectors[0];
}