From d789efba0708097fb5716b6365ea3fcf830aa6f2 Mon Sep 17 00:00:00 2001 From: shifusen329 Date: Wed, 25 Feb 2026 15:44:12 -0600 Subject: [PATCH] Use Ollama /api/embed endpoint for vector embeddings (#5221) * Use Ollama /api/embed endpoint for vector embeddings The deprecated /api/embeddings endpoint does not properly support the truncate parameter, causing "input length exceeds context length" errors when vectorizing files. Migrate to /api/embed which correctly handles truncation and supports native batch input. Co-Authored-By: Claude Opus 4.6 * Wrap single Ollama vector calculation into batch Fixes https://github.com/SillyTavern/SillyTavern/pull/5221/changes#r2850052729 --------- Co-authored-by: Claude Opus 4.6 Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com> --- src/vectors/ollama-vectors.js | 72 +++++++++++++++++------------------ 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/src/vectors/ollama-vectors.js b/src/vectors/ollama-vectors.js index cada95cd8..ea7e37340 100644 --- a/src/vectors/ollama-vectors.js +++ b/src/vectors/ollama-vectors.js @@ -12,12 +12,39 @@ import { TEXTGEN_TYPES } from '../constants.js'; * @returns {Promise} - The array of vectors for the texts */ export async function getOllamaBatchVector(texts, apiUrl, model, keep, directories) { - const result = []; - for (const text of texts) { - const vector = await getOllamaVector(text, apiUrl, model, keep, directories); - result.push(vector); + const url = new URL(apiUrl); + url.pathname = '/api/embed'; + + const headers = {}; + setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories); + + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + body: JSON.stringify({ + input: texts, + model: model, + keep_alive: keep ? -1 : undefined, + truncate: true, + }), + }); + + if (!response.ok) { + const responseText = await response.text(); + throw new Error(`Ollama: Failed to get batch vectors: ${response.statusText} ${responseText}`); } - return result; + + /** @type {any} */ + const data = await response.json(); + + if (!Array.isArray(data?.embeddings)) { + throw new Error('API response was not an array'); + } + + return data.embeddings; } /** @@ -30,37 +57,6 @@ export async function getOllamaBatchVector(texts, apiUrl, model, keep, directori * @returns {Promise} - The vector for the text */ export async function getOllamaVector(text, apiUrl, model, keep, directories) { - const url = new URL(apiUrl); - url.pathname = '/api/embeddings'; - - const headers = {}; - setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories); - - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - ...headers, - }, - body: JSON.stringify({ - prompt: text, - model: model, - keep_alive: keep ? -1 : undefined, - truncate: true, - }), - }); - - if (!response.ok) { - const responseText = await response.text(); - throw new Error(`Ollama: Failed to get vector for text: ${response.statusText} ${responseText}`); - } - - /** @type {any} */ - const data = await response.json(); - - if (!Array.isArray(data?.embedding)) { - throw new Error('API response was not an array'); - } - - return data.embedding; + const vectors = await getOllamaBatchVector([text], apiUrl, model, keep, directories); + return vectors[0]; }