Add vectorization via Vertex AI (#4311)
* Add vectorization via Vertex AI * Enable batching for Google vectors * Add batch embeddings for Vertex * Split embed methods for Vertex/AI Studio
This commit is contained in:
@@ -36,6 +36,7 @@ import { slashCommandReturnHelper } from '../../slash-commands/SlashCommandRetur
|
||||
import { generateWebLlmChatPrompt, isWebLlmSupported } from '../shared.js';
|
||||
import { WebLlmVectorProvider } from './webllm.js';
|
||||
import { removeReasoningFromString } from '../../reasoning.js';
|
||||
import { oai_settings } from '../../openai.js';
|
||||
|
||||
/**
|
||||
* @typedef {object} HashedMessage
|
||||
@@ -50,7 +51,7 @@ export const EXTENSION_PROMPT_TAG = '3_vectors';
|
||||
export const EXTENSION_PROMPT_TAG_DB = '4_vectors_data_bank';
|
||||
|
||||
// Force solo chunks for sources that don't support batching.
|
||||
const getBatchSize = () => ['transformers', 'palm', 'ollama'].includes(settings.source) ? 1 : 5;
|
||||
const getBatchSize = () => ['transformers', 'ollama'].includes(settings.source) ? 1 : 5;
|
||||
|
||||
const settings = {
|
||||
// For both
|
||||
@@ -796,6 +797,14 @@ function getVectorsRequestBody(args = {}) {
|
||||
break;
|
||||
case 'palm':
|
||||
body.model = extension_settings.vectors.google_model;
|
||||
body.api = 'makersuite';
|
||||
break;
|
||||
case 'vertexai':
|
||||
body.model = extension_settings.vectors.google_model;
|
||||
body.api = 'vertexai';
|
||||
body.vertexai_auth_mode = oai_settings.vertexai_auth_mode;
|
||||
body.vertexai_region = oai_settings.vertexai_region;
|
||||
body.vertexai_express_project_id = oai_settings.vertexai_express_project_id;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -881,6 +890,7 @@ async function insertVectorItems(collectionId, items) {
|
||||
function throwIfSourceInvalid() {
|
||||
if (settings.source === 'openai' && !secret_state[SECRET_KEYS.OPENAI] ||
|
||||
settings.source === 'palm' && !secret_state[SECRET_KEYS.MAKERSUITE] ||
|
||||
settings.source === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !secret_state[SECRET_KEYS.VERTEXAI_SERVICE_ACCOUNT] ||
|
||||
settings.source === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] ||
|
||||
settings.source === 'togetherai' && !secret_state[SECRET_KEYS.TOGETHERAI] ||
|
||||
settings.source === 'nomicai' && !secret_state[SECRET_KEYS.NOMICAI] ||
|
||||
@@ -1098,7 +1108,7 @@ function toggleSettings() {
|
||||
$('#nomicai_apiKey').toggle(settings.source === 'nomicai');
|
||||
$('#webllm_vectorsModel').toggle(settings.source === 'webllm');
|
||||
$('#koboldcpp_vectorsModel').toggle(settings.source === 'koboldcpp');
|
||||
$('#google_vectorsModel').toggle(settings.source === 'palm');
|
||||
$('#google_vectorsModel').toggle(settings.source === 'palm' || settings.source === 'vertexai');
|
||||
$('#vector_altEndpointUrl').toggle(vectorApiRequiresUrl.includes(settings.source));
|
||||
if (settings.source === 'webllm') {
|
||||
loadWebLlmModels();
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
<option value="cohere">Cohere</option>
|
||||
<option value="extras">Extras (deprecated)</option>
|
||||
<option value="palm">Google AI Studio</option>
|
||||
<option value="vertexai">Google Vertex AI</option>
|
||||
<option value="koboldcpp">KoboldCpp</option>
|
||||
<option value="llamacpp">llama.cpp</option>
|
||||
<option value="transformers" data-i18n="Local (Transformers)">Local (Transformers)</option>
|
||||
@@ -136,6 +137,7 @@
|
||||
Vectorization Model
|
||||
</label>
|
||||
<select id="vectors_google_model" class="text_pole">
|
||||
<option value="gemini-embedding-001">gemini-embedding-001</option>
|
||||
<option value="gemini-embedding-exp-03-07">gemini-embedding-exp-03-07</option>
|
||||
<option value="text-embedding-004">text-embedding-004</option>
|
||||
<option value="embedding-001">embedding-001</option>
|
||||
|
||||
@@ -11,7 +11,8 @@ import { getNomicAIBatchVector, getNomicAIVector } from '../vectors/nomicai-vect
|
||||
import { getOpenAIVector, getOpenAIBatchVector } from '../vectors/openai-vectors.js';
|
||||
import { getTransformersVector, getTransformersBatchVector } from '../vectors/embedding.js';
|
||||
import { getExtrasVector, getExtrasBatchVector } from '../vectors/extras-vectors.js';
|
||||
import { getMakerSuiteVector, getMakerSuiteBatchVector } from '../vectors/makersuite-vectors.js';
|
||||
import { getMakerSuiteVector, getMakerSuiteBatchVector } from '../vectors/google-vectors.js';
|
||||
import { getVertexVector, getVertexBatchVector } from '../vectors/google-vectors.js';
|
||||
import { getCohereVector, getCohereBatchVector } from '../vectors/cohere-vectors.js';
|
||||
import { getLlamaCppVector, getLlamaCppBatchVector } from '../vectors/llamacpp-vectors.js';
|
||||
import { getVllmVector, getVllmBatchVector } from '../vectors/vllm-vectors.js';
|
||||
@@ -32,6 +33,7 @@ const SOURCES = [
|
||||
'vllm',
|
||||
'webllm',
|
||||
'koboldcpp',
|
||||
'vertexai',
|
||||
];
|
||||
|
||||
/**
|
||||
@@ -56,7 +58,9 @@ async function getVector(source, sourceSettings, text, isQuery, directories) {
|
||||
case 'extras':
|
||||
return getExtrasVector(text, sourceSettings.extrasUrl, sourceSettings.extrasKey);
|
||||
case 'palm':
|
||||
return getMakerSuiteVector(text, directories, sourceSettings.model);
|
||||
return getMakerSuiteVector(text, sourceSettings.model, sourceSettings.request);
|
||||
case 'vertexai':
|
||||
return getVertexVector(text, sourceSettings.model, sourceSettings.request);
|
||||
case 'cohere':
|
||||
return getCohereVector(text, isQuery, directories, sourceSettings.model);
|
||||
case 'llamacpp':
|
||||
@@ -105,7 +109,10 @@ async function getBatchVector(source, sourceSettings, texts, isQuery, directorie
|
||||
results.push(...await getExtrasBatchVector(batch, sourceSettings.extrasUrl, sourceSettings.extrasKey));
|
||||
break;
|
||||
case 'palm':
|
||||
results.push(...await getMakerSuiteBatchVector(batch, directories, sourceSettings.model));
|
||||
results.push(...await getMakerSuiteBatchVector(batch, sourceSettings.model, sourceSettings.request));
|
||||
break;
|
||||
case 'vertexai':
|
||||
results.push(...await getVertexBatchVector(batch, sourceSettings.model, sourceSettings.request));
|
||||
break;
|
||||
case 'cohere':
|
||||
results.push(...await getCohereBatchVector(batch, isQuery, directories, sourceSettings.model));
|
||||
@@ -178,8 +185,10 @@ function getSourceSettings(source, request) {
|
||||
model: getConfigValue('extensions.models.embedding', ''),
|
||||
};
|
||||
case 'palm':
|
||||
case 'vertexai':
|
||||
return {
|
||||
model: String(request.body.model || 'text-embedding-004'),
|
||||
request: request, // Pass the request object to get API key and URL
|
||||
};
|
||||
case 'mistral':
|
||||
return {
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
import fetch from 'node-fetch';
|
||||
import { getGoogleApiConfig } from '../endpoints/google.js';
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from Google AI Studio
|
||||
* @param {string[]} texts - The array of texts to get the vector for
|
||||
* @param {string} model - The model to use for embedding
|
||||
* @param {import('express').Request} request - The request object to get API key and URL
|
||||
* @returns {Promise<number[][]>} - The array of vectors for the texts
|
||||
*/
|
||||
export async function getMakerSuiteBatchVector(texts, model, request) {
|
||||
const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'batchEmbedContents');
|
||||
|
||||
const body = {
|
||||
requests: texts.map(text => ({
|
||||
model: `models/${model}`,
|
||||
content: { parts: [{ text }] },
|
||||
})),
|
||||
};
|
||||
|
||||
const response = await fetch(url, {
|
||||
body: JSON.stringify(body),
|
||||
method: 'POST',
|
||||
headers: headers,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
console.warn(`${apiName} batch request failed`, response.statusText, text);
|
||||
throw new Error(`${apiName} batch request failed`);
|
||||
}
|
||||
|
||||
/** @type {any} */
|
||||
const data = await response.json();
|
||||
if (!Array.isArray(data?.embeddings)) {
|
||||
throw new Error(`${apiName} did not return an array`);
|
||||
}
|
||||
|
||||
const embeddings = data.embeddings.map(embedding => embedding.values);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from Google Vertex AI
|
||||
* @param {string[]} texts - The array of texts to get the vector for
|
||||
* @param {string} model - The model to use for embedding
|
||||
* @param {import('express').Request} request - The request object to get API key and URL
|
||||
* @returns {Promise<number[][]>} - The array of vectors for the texts
|
||||
*/
|
||||
export async function getVertexBatchVector(texts, model, request) {
|
||||
const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'predict');
|
||||
|
||||
const body = {
|
||||
instances: texts.map(text => ({ content: text })),
|
||||
};
|
||||
|
||||
const response = await fetch(url, {
|
||||
body: JSON.stringify(body),
|
||||
method: 'POST',
|
||||
headers: headers,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
console.warn(`${apiName} batch request failed`, response.statusText, text);
|
||||
throw new Error(`${apiName} batch request failed`);
|
||||
}
|
||||
|
||||
/** @type {any} */
|
||||
const data = await response.json();
|
||||
if (!Array.isArray(data?.predictions)) {
|
||||
throw new Error(`${apiName} did not return an array`);
|
||||
}
|
||||
|
||||
const embeddings = data.predictions.map(p => p.embeddings.values);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from Google AI Studio
|
||||
* @param {string} text - The text to get the vector for
|
||||
* @param {string} model - The model to use for embedding
|
||||
* @param {import('express').Request} request - The request object to get API key and URL
|
||||
* @returns {Promise<number[]>} - The vector for the text
|
||||
*/
|
||||
export async function getMakerSuiteVector(text, model, request) {
|
||||
const [embedding] = await getMakerSuiteBatchVector([text], model, request);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from Google Vertex AI
|
||||
* @param {string} text - The text to get the vector for
|
||||
* @param {string} model - The model to use for embedding
|
||||
* @param {import('express').Request} request - The request object to get API key and URL
|
||||
* @returns {Promise<number[]>} - The vector for the text
|
||||
*/
|
||||
export async function getVertexVector(text, model, request) {
|
||||
const [embedding] = await getVertexBatchVector([text], model, request);
|
||||
return embedding;
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
import fetch from 'node-fetch';
|
||||
import { SECRET_KEYS, readSecret } from '../endpoints/secrets.js';
|
||||
import { trimTrailingSlash } from '../util.js';
|
||||
const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from gecko model
|
||||
* @param {string[]} texts - The array of texts to get the vector for
|
||||
* @param {import('../users.js').UserDirectoryList} directories - The directories object for the user
|
||||
* @param {string} model - The model to use for embedding
|
||||
* @returns {Promise<number[][]>} - The array of vectors for the texts
|
||||
*/
|
||||
export async function getMakerSuiteBatchVector(texts, directories, model) {
|
||||
const promises = texts.map(text => getMakerSuiteVector(text, directories, model));
|
||||
return await Promise.all(promises);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the vector for the given text from Gemini API text-embedding-004 model
|
||||
* @param {string} text - The text to get the vector for
|
||||
* @param {import('../users.js').UserDirectoryList} directories - The directories object for the user
|
||||
* @param {string} model - The model to use for embedding (default is 'text-embedding-004')
|
||||
* @returns {Promise<number[]>} - The vector for the text
|
||||
*/
|
||||
export async function getMakerSuiteVector(text, directories, model) {
|
||||
const key = readSecret(directories, SECRET_KEYS.MAKERSUITE);
|
||||
|
||||
if (!key) {
|
||||
console.warn('No Google AI Studio key found');
|
||||
throw new Error('No Google AI Studio key found');
|
||||
}
|
||||
|
||||
const apiUrl = trimTrailingSlash(API_MAKERSUITE);
|
||||
const url = `${apiUrl}/v1beta/models/${model}:embedContent?key=${key}`;
|
||||
const body = {
|
||||
content: {
|
||||
parts: [
|
||||
{ text: text },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
const response = await fetch(url, {
|
||||
body: JSON.stringify(body),
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
console.warn('Google AI Studio request failed', response.statusText, text);
|
||||
throw new Error('Google AI Studio request failed');
|
||||
}
|
||||
|
||||
/** @type {any} */
|
||||
const data = await response.json();
|
||||
// noinspection JSValidateTypes
|
||||
return data['embedding']['values'];
|
||||
}
|
||||
Reference in New Issue
Block a user