Add vectorization via Vertex AI (#4311)

* Add vectorization via Vertex AI * Enable batching for Google vectors * Add batch embeddings for Vertex * Split embed methods for Vertex/AI Studio
2025-07-25 23:59:30 +03:00
parent c094e5d8ef
commit 82d9fa79e8
5 changed files with 127 additions and 66 deletions
@@ -36,6 +36,7 @@ import { slashCommandReturnHelper } from '../../slash-commands/SlashCommandRetur
 import { generateWebLlmChatPrompt, isWebLlmSupported } from '../shared.js';
 import { WebLlmVectorProvider } from './webllm.js';
 import { removeReasoningFromString } from '../../reasoning.js';
+import { oai_settings } from '../../openai.js';

 /**
 * @typedef {object} HashedMessage
@@ -50,7 +51,7 @@ export const EXTENSION_PROMPT_TAG = '3_vectors';
 export const EXTENSION_PROMPT_TAG_DB = '4_vectors_data_bank';

 // Force solo chunks for sources that don't support batching.
-const getBatchSize = () => ['transformers', 'palm', 'ollama'].includes(settings.source) ? 1 : 5;
+const getBatchSize = () => ['transformers', 'ollama'].includes(settings.source) ? 1 : 5;

 const settings = {
    // For both
@@ -796,6 +797,14 @@ function getVectorsRequestBody(args = {}) {
            break;
        case 'palm':
            body.model = extension_settings.vectors.google_model;
+            body.api = 'makersuite';
+            break;
+        case 'vertexai':
+            body.model = extension_settings.vectors.google_model;
+            body.api = 'vertexai';
+            body.vertexai_auth_mode = oai_settings.vertexai_auth_mode;
+            body.vertexai_region = oai_settings.vertexai_region;
+            body.vertexai_express_project_id = oai_settings.vertexai_express_project_id;
            break;
        default:
            break;
@@ -881,6 +890,7 @@ async function insertVectorItems(collectionId, items) {
 function throwIfSourceInvalid() {
    if (settings.source === 'openai' && !secret_state[SECRET_KEYS.OPENAI] ||
        settings.source === 'palm' && !secret_state[SECRET_KEYS.MAKERSUITE] ||
+        settings.source === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !secret_state[SECRET_KEYS.VERTEXAI_SERVICE_ACCOUNT] ||
        settings.source === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] ||
        settings.source === 'togetherai' && !secret_state[SECRET_KEYS.TOGETHERAI] ||
        settings.source === 'nomicai' && !secret_state[SECRET_KEYS.NOMICAI] ||
@@ -1098,7 +1108,7 @@ function toggleSettings() {
    $('#nomicai_apiKey').toggle(settings.source === 'nomicai');
    $('#webllm_vectorsModel').toggle(settings.source === 'webllm');
    $('#koboldcpp_vectorsModel').toggle(settings.source === 'koboldcpp');
-    $('#google_vectorsModel').toggle(settings.source === 'palm');
+    $('#google_vectorsModel').toggle(settings.source === 'palm' || settings.source === 'vertexai');
    $('#vector_altEndpointUrl').toggle(vectorApiRequiresUrl.includes(settings.source));
    if (settings.source === 'webllm') {
        loadWebLlmModels();
@@ -13,6 +13,7 @@
                    <option value="cohere">Cohere</option>
                    <option value="extras">Extras (deprecated)</option>
                    <option value="palm">Google AI Studio</option>
+                    <option value="vertexai">Google Vertex AI</option>
                    <option value="koboldcpp">KoboldCpp</option>
                    <option value="llamacpp">llama.cpp</option>
                    <option value="transformers" data-i18n="Local (Transformers)">Local (Transformers)</option>
@@ -136,6 +137,7 @@
                    Vectorization Model
                </label>
                <select id="vectors_google_model" class="text_pole">
+                    <option value="gemini-embedding-001">gemini-embedding-001</option>
                    <option value="gemini-embedding-exp-03-07">gemini-embedding-exp-03-07</option>
                    <option value="text-embedding-004">text-embedding-004</option>
                    <option value="embedding-001">embedding-001</option>
@@ -11,7 +11,8 @@ import { getNomicAIBatchVector, getNomicAIVector } from '../vectors/nomicai-vect
 import { getOpenAIVector, getOpenAIBatchVector } from '../vectors/openai-vectors.js';
 import { getTransformersVector, getTransformersBatchVector } from '../vectors/embedding.js';
 import { getExtrasVector, getExtrasBatchVector } from '../vectors/extras-vectors.js';
-import { getMakerSuiteVector, getMakerSuiteBatchVector } from '../vectors/makersuite-vectors.js';
+import { getMakerSuiteVector, getMakerSuiteBatchVector } from '../vectors/google-vectors.js';
+import { getVertexVector, getVertexBatchVector } from '../vectors/google-vectors.js';
 import { getCohereVector, getCohereBatchVector } from '../vectors/cohere-vectors.js';
 import { getLlamaCppVector, getLlamaCppBatchVector } from '../vectors/llamacpp-vectors.js';
 import { getVllmVector, getVllmBatchVector } from '../vectors/vllm-vectors.js';
@@ -32,6 +33,7 @@ const SOURCES = [
    'vllm',
    'webllm',
    'koboldcpp',
+    'vertexai',
 ];

 /**
@@ -56,7 +58,9 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
        case 'extras':
            return getExtrasVector(text, sourceSettings.extrasUrl, sourceSettings.extrasKey);
        case 'palm':
-            return getMakerSuiteVector(text, directories, sourceSettings.model);
+            return getMakerSuiteVector(text, sourceSettings.model, sourceSettings.request);
+        case 'vertexai':
+            return getVertexVector(text, sourceSettings.model, sourceSettings.request);
        case 'cohere':
            return getCohereVector(text, isQuery, directories, sourceSettings.model);
        case 'llamacpp':
@@ -105,7 +109,10 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
                results.push(...await getExtrasBatchVector(batch, sourceSettings.extrasUrl, sourceSettings.extrasKey));
                break;
            case 'palm':
-                results.push(...await getMakerSuiteBatchVector(batch, directories, sourceSettings.model));
+                results.push(...await getMakerSuiteBatchVector(batch, sourceSettings.model, sourceSettings.request));
+                break;
+            case 'vertexai':
+                results.push(...await getVertexBatchVector(batch, sourceSettings.model, sourceSettings.request));
                break;
            case 'cohere':
                results.push(...await getCohereBatchVector(batch, isQuery, directories, sourceSettings.model));
@@ -178,8 +185,10 @@ function getSourceSettings(source, request) {
                model: getConfigValue('extensions.models.embedding', ''),
            };
        case 'palm':
+        case 'vertexai':
            return {
                model: String(request.body.model || 'text-embedding-004'),
+                request: request, // Pass the request object to get API key and URL
            };
        case 'mistral':
            return {
@@ -0,0 +1,101 @@
+import fetch from 'node-fetch';
+import { getGoogleApiConfig } from '../endpoints/google.js';
+
+/**
+ * Gets the vector for the given text from Google AI Studio
+ * @param {string[]} texts - The array of texts to get the vector for
+ * @param {string} model - The model to use for embedding
+ * @param {import('express').Request} request - The request object to get API key and URL
+ * @returns {Promise<number[][]>} - The array of vectors for the texts
+ */
+export async function getMakerSuiteBatchVector(texts, model, request) {
+    const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'batchEmbedContents');
+
+    const body = {
+        requests: texts.map(text => ({
+            model: `models/${model}`,
+            content: { parts: [{ text }] },
+        })),
+    };
+
+    const response = await fetch(url, {
+        body: JSON.stringify(body),
+        method: 'POST',
+        headers: headers,
+    });
+
+    if (!response.ok) {
+        const text = await response.text();
+        console.warn(`${apiName} batch request failed`, response.statusText, text);
+        throw new Error(`${apiName} batch request failed`);
+    }
+
+    /** @type {any} */
+    const data = await response.json();
+    if (!Array.isArray(data?.embeddings)) {
+        throw new Error(`${apiName} did not return an array`);
+    }
+
+    const embeddings = data.embeddings.map(embedding => embedding.values);
+    return embeddings;
+}
+
+/**
+ * Gets the vector for the given text from Google Vertex AI
+ * @param {string[]} texts - The array of texts to get the vector for
+ * @param {string} model - The model to use for embedding
+ * @param {import('express').Request} request - The request object to get API key and URL
+ * @returns {Promise<number[][]>} - The array of vectors for the texts
+ */
+export async function getVertexBatchVector(texts, model, request) {
+    const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'predict');
+
+    const body = {
+        instances: texts.map(text => ({ content: text })),
+    };
+
+    const response = await fetch(url, {
+        body: JSON.stringify(body),
+        method: 'POST',
+        headers: headers,
+    });
+
+    if (!response.ok) {
+        const text = await response.text();
+        console.warn(`${apiName} batch request failed`, response.statusText, text);
+        throw new Error(`${apiName} batch request failed`);
+    }
+
+    /** @type {any} */
+    const data = await response.json();
+    if (!Array.isArray(data?.predictions)) {
+        throw new Error(`${apiName} did not return an array`);
+    }
+
+    const embeddings = data.predictions.map(p => p.embeddings.values);
+    return embeddings;
+}
+
+/**
+ * Gets the vector for the given text from Google AI Studio
+ * @param {string} text - The text to get the vector for
+ * @param {string} model - The model to use for embedding
+ * @param {import('express').Request} request - The request object to get API key and URL
+ * @returns {Promise<number[]>} - The vector for the text
+ */
+export async function getMakerSuiteVector(text, model, request) {
+    const [embedding] = await getMakerSuiteBatchVector([text], model, request);
+    return embedding;
+}
+
+/**
+ * Gets the vector for the given text from Google Vertex AI
+ * @param {string} text - The text to get the vector for
+ * @param {string} model - The model to use for embedding
+ * @param {import('express').Request} request - The request object to get API key and URL
+ * @returns {Promise<number[]>} - The vector for the text
+ */
+export async function getVertexVector(text, model, request) {
+    const [embedding] = await getVertexBatchVector([text], model, request);
+    return embedding;
+}
@@ -1,61 +0,0 @@
-import fetch from 'node-fetch';
-import { SECRET_KEYS, readSecret } from '../endpoints/secrets.js';
-import { trimTrailingSlash } from '../util.js';
-const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
-
-/**
- * Gets the vector for the given text from gecko model
- * @param {string[]} texts - The array of texts to get the vector for
- * @param {import('../users.js').UserDirectoryList} directories - The directories object for the user
- * @param {string} model - The model to use for embedding
- * @returns {Promise<number[][]>} - The array of vectors for the texts
- */
-export async function getMakerSuiteBatchVector(texts, directories, model) {
-    const promises = texts.map(text => getMakerSuiteVector(text, directories, model));
-    return await Promise.all(promises);
-}
-
-/**
- * Gets the vector for the given text from Gemini API text-embedding-004 model
- * @param {string} text - The text to get the vector for
- * @param {import('../users.js').UserDirectoryList} directories - The directories object for the user
- * @param {string} model - The model to use for embedding (default is 'text-embedding-004')
- * @returns {Promise<number[]>} - The vector for the text
- */
-export async function getMakerSuiteVector(text, directories, model) {
-    const key = readSecret(directories, SECRET_KEYS.MAKERSUITE);
-
-    if (!key) {
-        console.warn('No Google AI Studio key found');
-        throw new Error('No Google AI Studio key found');
-    }
-
-    const apiUrl = trimTrailingSlash(API_MAKERSUITE);
-    const url = `${apiUrl}/v1beta/models/${model}:embedContent?key=${key}`;
-    const body = {
-        content: {
-            parts: [
-                { text: text },
-            ],
-        },
-    };
-
-    const response = await fetch(url, {
-        body: JSON.stringify(body),
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json',
-        },
-    });
-
-    if (!response.ok) {
-        const text = await response.text();
-        console.warn('Google AI Studio request failed', response.statusText, text);
-        throw new Error('Google AI Studio request failed');
-    }
-
-    /** @type {any} */
-    const data = await response.json();
-    // noinspection JSValidateTypes
-    return data['embedding']['values'];
-}