Use Ollama /api/embed endpoint for vector embeddings (#5221)
* Use Ollama /api/embed endpoint for vector embeddings The deprecated /api/embeddings endpoint does not properly support the truncate parameter, causing "input length exceeds context length" errors when vectorizing files. Migrate to /api/embed which correctly handles truncation and supports native batch input. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Wrap single Ollama vector calculation into batch Fixes https://github.com/SillyTavern/SillyTavern/pull/5221/changes#r2850052729 --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
@@ -12,12 +12,39 @@ import { TEXTGEN_TYPES } from '../constants.js';
|
||||
* @returns {Promise<number[][]>} - The array of vectors for the texts
|
||||
*/
|
||||
export async function getOllamaBatchVector(texts, apiUrl, model, keep, directories) {
|
||||
const result = [];
|
||||
for (const text of texts) {
|
||||
const vector = await getOllamaVector(text, apiUrl, model, keep, directories);
|
||||
result.push(vector);
|
||||
const url = new URL(apiUrl);
|
||||
url.pathname = '/api/embed';
|
||||
|
||||
const headers = {};
|
||||
setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...headers,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
input: texts,
|
||||
model: model,
|
||||
keep_alive: keep ? -1 : undefined,
|
||||
truncate: true,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const responseText = await response.text();
|
||||
throw new Error(`Ollama: Failed to get batch vectors: ${response.statusText} ${responseText}`);
|
||||
}
|
||||
return result;
|
||||
|
||||
/** @type {any} */
|
||||
const data = await response.json();
|
||||
|
||||
if (!Array.isArray(data?.embeddings)) {
|
||||
throw new Error('API response was not an array');
|
||||
}
|
||||
|
||||
return data.embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -30,37 +57,6 @@ export async function getOllamaBatchVector(texts, apiUrl, model, keep, directori
|
||||
* @returns {Promise<number[]>} - The vector for the text
|
||||
*/
|
||||
export async function getOllamaVector(text, apiUrl, model, keep, directories) {
|
||||
const url = new URL(apiUrl);
|
||||
url.pathname = '/api/embeddings';
|
||||
|
||||
const headers = {};
|
||||
setAdditionalHeadersByType(headers, TEXTGEN_TYPES.OLLAMA, apiUrl, directories);
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...headers,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: text,
|
||||
model: model,
|
||||
keep_alive: keep ? -1 : undefined,
|
||||
truncate: true,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const responseText = await response.text();
|
||||
throw new Error(`Ollama: Failed to get vector for text: ${response.statusText} ${responseText}`);
|
||||
}
|
||||
|
||||
/** @type {any} */
|
||||
const data = await response.json();
|
||||
|
||||
if (!Array.isArray(data?.embedding)) {
|
||||
throw new Error('API response was not an array');
|
||||
}
|
||||
|
||||
return data.embedding;
|
||||
const vectors = await getOllamaBatchVector([text], apiUrl, model, keep, directories);
|
||||
return vectors[0];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user