Use Ollama /api/embed endpoint for vector embeddings (#5221)

* Use Ollama /api/embed endpoint for vector embeddings The deprecated /api/embeddings endpoint does not properly support the truncate parameter, causing "input length exceeds context length" errors when vectorizing files. Migrate to /api/embed which correctly handles truncation and supports native batch input. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Wrap single Ollama vector calculation into batch Fixes https://github.com/SillyTavern/SillyTavern/pull/5221/changes#r2850052729 --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
2026-02-25 15:44:12 -06:00
parent 4fa37e52f7
commit d789efba07
1 changed files with 34 additions and 38 deletions
@@ -12,12 +12,39 @@ import { TEXTGEN_TYPES } from '../constants.js';
 * @returns {Promise<number[][]>} - The array of vectors for the texts
 */
 export async function getOllamaBatchVector(texts, apiUrl, model, keep, directories) {
-    const result = [];
-    for (const text of texts) {
-        const vector = await getOllamaVector(text, apiUrl, model, keep, directories);
-        result.push(vector);
+    const url = new URL(apiUrl);
+    url.pathname = '/api/embed';
+
+    const headers = {};
+    setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
+
+    const response = await fetch(url, {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            ...headers,
+        },
+        body: JSON.stringify({
+            input: texts,
+            model: model,
+            keep_alive: keep ? -1 : undefined,
+            truncate: true,
+        }),
+    });
+
+    if (!response.ok) {
+        const responseText = await response.text();
+        throw new Error(`Ollama: Failed to get batch vectors: ${response.statusText} ${responseText}`);
    }
-    return result;
+
+    /** @type {any} */
+    const data = await response.json();
+
+    if (!Array.isArray(data?.embeddings)) {
+        throw new Error('API response was not an array');
+    }
+
+    return data.embeddings;
 }

 /**
@@ -30,37 +57,6 @@ export async function getOllamaBatchVector(texts, apiUrl, model, keep, directori
 * @returns {Promise<number[]>} - The vector for the text
 */
 export async function getOllamaVector(text, apiUrl, model, keep, directories) {
-    const url = new URL(apiUrl);
-    url.pathname = '/api/embeddings';
-
-    const headers = {};
-    setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
-
-    const response = await fetch(url, {
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json',
-            ...headers,
-        },
-        body: JSON.stringify({
-            prompt: text,
-            model: model,
-            keep_alive: keep ? -1 : undefined,
-            truncate: true,
-        }),
-    });
-
-    if (!response.ok) {
-        const responseText = await response.text();
-        throw new Error(`Ollama: Failed to get vector for text: ${response.statusText} ${responseText}`);
-    }
-
-    /** @type {any} */
-    const data = await response.json();
-
-    if (!Array.isArray(data?.embedding)) {
-        throw new Error('API response was not an array');
-    }
-
-    return data.embedding;
+    const vectors = await getOllamaBatchVector([text], apiUrl, model, keep, directories);
+    return vectors[0];
 }