From d789efba0708097fb5716b6365ea3fcf830aa6f2 Mon Sep 17 00:00:00 2001
From: shifusen329 <quanhong329@gmail.com>
Date: Wed, 25 Feb 2026 15:44:12 -0600
Subject: [PATCH] Use Ollama /api/embed endpoint for vector embeddings (#5221)

* Use Ollama /api/embed endpoint for vector embeddings

The deprecated /api/embeddings endpoint does not properly support the
truncate parameter, causing "input length exceeds context length" errors
when vectorizing files. Migrate to /api/embed which correctly handles
truncation and supports native batch input.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Wrap single Ollama vector calculation into batch
Fixes https://github.com/SillyTavern/SillyTavern/pull/5221/changes#r2850052729

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
---
 src/vectors/ollama-vectors.js | 72 +++++++++++++++++------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/src/vectors/ollama-vectors.js b/src/vectors/ollama-vectors.js
index cada95cd8..ea7e37340 100644
--- a/src/vectors/ollama-vectors.js
+++ b/src/vectors/ollama-vectors.js
@@ -12,12 +12,39 @@ import { TEXTGEN_TYPES } from '../constants.js';
  * @returns {Promise<number[][]>} - The array of vectors for the texts
  */
 export async function getOllamaBatchVector(texts, apiUrl, model, keep, directories) {
-    const result = [];
-    for (const text of texts) {
-        const vector = await getOllamaVector(text, apiUrl, model, keep, directories);
-        result.push(vector);
+    const url = new URL(apiUrl);
+    url.pathname = '/api/embed';
+
+    const headers = {};
+    setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
+
+    const response = await fetch(url, {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            ...headers,
+        },
+        body: JSON.stringify({
+            input: texts,
+            model: model,
+            keep_alive: keep ? -1 : undefined,
+            truncate: true,
+        }),
+    });
+
+    if (!response.ok) {
+        const responseText = await response.text();
+        throw new Error(`Ollama: Failed to get batch vectors: ${response.statusText} ${responseText}`);
     }
-    return result;
+
+    /** @type {any} */
+    const data = await response.json();
+
+    if (!Array.isArray(data?.embeddings)) {
+        throw new Error('API response was not an array');
+    }
+
+    return data.embeddings;
 }
 
 /**
@@ -30,37 +57,6 @@ export async function getOllamaBatchVector(texts, apiUrl, model, keep, directori
  * @returns {Promise<number[]>} - The vector for the text
  */
 export async function getOllamaVector(text, apiUrl, model, keep, directories) {
-    const url = new URL(apiUrl);
-    url.pathname = '/api/embeddings';
-
-    const headers = {};
-    setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
-
-    const response = await fetch(url, {
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json',
-            ...headers,
-        },
-        body: JSON.stringify({
-            prompt: text,
-            model: model,
-            keep_alive: keep ? -1 : undefined,
-            truncate: true,
-        }),
-    });
-
-    if (!response.ok) {
-        const responseText = await response.text();
-        throw new Error(`Ollama: Failed to get vector for text: ${response.statusText} ${responseText}`);
-    }
-
-    /** @type {any} */
-    const data = await response.json();
-
-    if (!Array.isArray(data?.embedding)) {
-        throw new Error('API response was not an array');
-    }
-
-    return data.embedding;
+    const vectors = await getOllamaBatchVector([text], apiUrl, model, keep, directories);
+    return vectors[0];
 }