Add vectorization via Vertex AI (#4311)

* Add vectorization via Vertex AI

* Enable batching for Google vectors

* Add batch embeddings for Vertex

* Split embed methods for Vertex/AI Studio
This commit is contained in:
Cohee
2025-07-25 23:59:30 +03:00
committed by GitHub
parent c094e5d8ef
commit 82d9fa79e8
5 changed files with 127 additions and 66 deletions
+12 -2
View File
@@ -36,6 +36,7 @@ import { slashCommandReturnHelper } from '../../slash-commands/SlashCommandRetur
import { generateWebLlmChatPrompt, isWebLlmSupported } from '../shared.js';
import { WebLlmVectorProvider } from './webllm.js';
import { removeReasoningFromString } from '../../reasoning.js';
import { oai_settings } from '../../openai.js';
/**
* @typedef {object} HashedMessage
@@ -50,7 +51,7 @@ export const EXTENSION_PROMPT_TAG = '3_vectors';
export const EXTENSION_PROMPT_TAG_DB = '4_vectors_data_bank';
// Force solo chunks for sources that don't support batching.
const getBatchSize = () => ['transformers', 'palm', 'ollama'].includes(settings.source) ? 1 : 5;
const getBatchSize = () => ['transformers', 'ollama'].includes(settings.source) ? 1 : 5;
const settings = {
// For both
@@ -796,6 +797,14 @@ function getVectorsRequestBody(args = {}) {
break;
case 'palm':
body.model = extension_settings.vectors.google_model;
body.api = 'makersuite';
break;
case 'vertexai':
body.model = extension_settings.vectors.google_model;
body.api = 'vertexai';
body.vertexai_auth_mode = oai_settings.vertexai_auth_mode;
body.vertexai_region = oai_settings.vertexai_region;
body.vertexai_express_project_id = oai_settings.vertexai_express_project_id;
break;
default:
break;
@@ -881,6 +890,7 @@ async function insertVectorItems(collectionId, items) {
function throwIfSourceInvalid() {
if (settings.source === 'openai' && !secret_state[SECRET_KEYS.OPENAI] ||
settings.source === 'palm' && !secret_state[SECRET_KEYS.MAKERSUITE] ||
settings.source === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !secret_state[SECRET_KEYS.VERTEXAI_SERVICE_ACCOUNT] ||
settings.source === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] ||
settings.source === 'togetherai' && !secret_state[SECRET_KEYS.TOGETHERAI] ||
settings.source === 'nomicai' && !secret_state[SECRET_KEYS.NOMICAI] ||
@@ -1098,7 +1108,7 @@ function toggleSettings() {
$('#nomicai_apiKey').toggle(settings.source === 'nomicai');
$('#webllm_vectorsModel').toggle(settings.source === 'webllm');
$('#koboldcpp_vectorsModel').toggle(settings.source === 'koboldcpp');
$('#google_vectorsModel').toggle(settings.source === 'palm');
$('#google_vectorsModel').toggle(settings.source === 'palm' || settings.source === 'vertexai');
$('#vector_altEndpointUrl').toggle(vectorApiRequiresUrl.includes(settings.source));
if (settings.source === 'webllm') {
loadWebLlmModels();
@@ -13,6 +13,7 @@
<option value="cohere">Cohere</option>
<option value="extras">Extras (deprecated)</option>
<option value="palm">Google AI Studio</option>
<option value="vertexai">Google Vertex AI</option>
<option value="koboldcpp">KoboldCpp</option>
<option value="llamacpp">llama.cpp</option>
<option value="transformers" data-i18n="Local (Transformers)">Local (Transformers)</option>
@@ -136,6 +137,7 @@
Vectorization Model
</label>
<select id="vectors_google_model" class="text_pole">
<option value="gemini-embedding-001">gemini-embedding-001</option>
<option value="gemini-embedding-exp-03-07">gemini-embedding-exp-03-07</option>
<option value="text-embedding-004">text-embedding-004</option>
<option value="embedding-001">embedding-001</option>
+12 -3
View File
@@ -11,7 +11,8 @@ import { getNomicAIBatchVector, getNomicAIVector } from '../vectors/nomicai-vect
import { getOpenAIVector, getOpenAIBatchVector } from '../vectors/openai-vectors.js';
import { getTransformersVector, getTransformersBatchVector } from '../vectors/embedding.js';
import { getExtrasVector, getExtrasBatchVector } from '../vectors/extras-vectors.js';
import { getMakerSuiteVector, getMakerSuiteBatchVector } from '../vectors/makersuite-vectors.js';
import { getMakerSuiteVector, getMakerSuiteBatchVector } from '../vectors/google-vectors.js';
import { getVertexVector, getVertexBatchVector } from '../vectors/google-vectors.js';
import { getCohereVector, getCohereBatchVector } from '../vectors/cohere-vectors.js';
import { getLlamaCppVector, getLlamaCppBatchVector } from '../vectors/llamacpp-vectors.js';
import { getVllmVector, getVllmBatchVector } from '../vectors/vllm-vectors.js';
@@ -32,6 +33,7 @@ const SOURCES = [
'vllm',
'webllm',
'koboldcpp',
'vertexai',
];
/**
@@ -56,7 +58,9 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
case 'extras':
return getExtrasVector(text, sourceSettings.extrasUrl, sourceSettings.extrasKey);
case 'palm':
return getMakerSuiteVector(text, directories, sourceSettings.model);
return getMakerSuiteVector(text, sourceSettings.model, sourceSettings.request);
case 'vertexai':
return getVertexVector(text, sourceSettings.model, sourceSettings.request);
case 'cohere':
return getCohereVector(text, isQuery, directories, sourceSettings.model);
case 'llamacpp':
@@ -105,7 +109,10 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
results.push(...await getExtrasBatchVector(batch, sourceSettings.extrasUrl, sourceSettings.extrasKey));
break;
case 'palm':
results.push(...await getMakerSuiteBatchVector(batch, directories, sourceSettings.model));
results.push(...await getMakerSuiteBatchVector(batch, sourceSettings.model, sourceSettings.request));
break;
case 'vertexai':
results.push(...await getVertexBatchVector(batch, sourceSettings.model, sourceSettings.request));
break;
case 'cohere':
results.push(...await getCohereBatchVector(batch, isQuery, directories, sourceSettings.model));
@@ -178,8 +185,10 @@ function getSourceSettings(source, request) {
model: getConfigValue('extensions.models.embedding', ''),
};
case 'palm':
case 'vertexai':
return {
model: String(request.body.model || 'text-embedding-004'),
request: request, // Pass the request object to get API key and URL
};
case 'mistral':
return {
+101
View File
@@ -0,0 +1,101 @@
import fetch from 'node-fetch';
import { getGoogleApiConfig } from '../endpoints/google.js';
/**
* Gets the vector for the given text from Google AI Studio
* @param {string[]} texts - The array of texts to get the vector for
* @param {string} model - The model to use for embedding
* @param {import('express').Request} request - The request object to get API key and URL
* @returns {Promise<number[][]>} - The array of vectors for the texts
*/
export async function getMakerSuiteBatchVector(texts, model, request) {
const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'batchEmbedContents');
const body = {
requests: texts.map(text => ({
model: `models/${model}`,
content: { parts: [{ text }] },
})),
};
const response = await fetch(url, {
body: JSON.stringify(body),
method: 'POST',
headers: headers,
});
if (!response.ok) {
const text = await response.text();
console.warn(`${apiName} batch request failed`, response.statusText, text);
throw new Error(`${apiName} batch request failed`);
}
/** @type {any} */
const data = await response.json();
if (!Array.isArray(data?.embeddings)) {
throw new Error(`${apiName} did not return an array`);
}
const embeddings = data.embeddings.map(embedding => embedding.values);
return embeddings;
}
/**
* Gets the vector for the given text from Google Vertex AI
* @param {string[]} texts - The array of texts to get the vector for
* @param {string} model - The model to use for embedding
* @param {import('express').Request} request - The request object to get API key and URL
* @returns {Promise<number[][]>} - The array of vectors for the texts
*/
export async function getVertexBatchVector(texts, model, request) {
const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'predict');
const body = {
instances: texts.map(text => ({ content: text })),
};
const response = await fetch(url, {
body: JSON.stringify(body),
method: 'POST',
headers: headers,
});
if (!response.ok) {
const text = await response.text();
console.warn(`${apiName} batch request failed`, response.statusText, text);
throw new Error(`${apiName} batch request failed`);
}
/** @type {any} */
const data = await response.json();
if (!Array.isArray(data?.predictions)) {
throw new Error(`${apiName} did not return an array`);
}
const embeddings = data.predictions.map(p => p.embeddings.values);
return embeddings;
}
/**
* Gets the vector for the given text from Google AI Studio
* @param {string} text - The text to get the vector for
* @param {string} model - The model to use for embedding
* @param {import('express').Request} request - The request object to get API key and URL
* @returns {Promise<number[]>} - The vector for the text
*/
export async function getMakerSuiteVector(text, model, request) {
const [embedding] = await getMakerSuiteBatchVector([text], model, request);
return embedding;
}
/**
* Gets the vector for the given text from Google Vertex AI
* @param {string} text - The text to get the vector for
* @param {string} model - The model to use for embedding
* @param {import('express').Request} request - The request object to get API key and URL
* @returns {Promise<number[]>} - The vector for the text
*/
export async function getVertexVector(text, model, request) {
const [embedding] = await getVertexBatchVector([text], model, request);
return embedding;
}
-61
View File
@@ -1,61 +0,0 @@
import fetch from 'node-fetch';
import { SECRET_KEYS, readSecret } from '../endpoints/secrets.js';
import { trimTrailingSlash } from '../util.js';
const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
/**
* Gets the vector for the given text from gecko model
* @param {string[]} texts - The array of texts to get the vector for
* @param {import('../users.js').UserDirectoryList} directories - The directories object for the user
* @param {string} model - The model to use for embedding
* @returns {Promise<number[][]>} - The array of vectors for the texts
*/
export async function getMakerSuiteBatchVector(texts, directories, model) {
const promises = texts.map(text => getMakerSuiteVector(text, directories, model));
return await Promise.all(promises);
}
/**
* Gets the vector for the given text from Gemini API text-embedding-004 model
* @param {string} text - The text to get the vector for
* @param {import('../users.js').UserDirectoryList} directories - The directories object for the user
* @param {string} model - The model to use for embedding (default is 'text-embedding-004')
* @returns {Promise<number[]>} - The vector for the text
*/
export async function getMakerSuiteVector(text, directories, model) {
const key = readSecret(directories, SECRET_KEYS.MAKERSUITE);
if (!key) {
console.warn('No Google AI Studio key found');
throw new Error('No Google AI Studio key found');
}
const apiUrl = trimTrailingSlash(API_MAKERSUITE);
const url = `${apiUrl}/v1beta/models/${model}:embedContent?key=${key}`;
const body = {
content: {
parts: [
{ text: text },
],
},
};
const response = await fetch(url, {
body: JSON.stringify(body),
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
});
if (!response.ok) {
const text = await response.text();
console.warn('Google AI Studio request failed', response.statusText, text);
throw new Error('Google AI Studio request failed');
}
/** @type {any} */
const data = await response.json();
// noinspection JSValidateTypes
return data['embedding']['values'];
}