Enhance Vectorize All process with error handling, retries and minor improvements (#5479)
* fix (vectors): Fixed Vectorize All progress report and ETA issues * fix (vectors): Added strip reasoning block function for extras/WebLLM summaries * feat(vectors): Retry failed summaries with configurable attempts * feat(vectors): Skip summarization for short messages * feat(vectors): Skip failed messages during Vectorize All instead of aborting all Prevents the "Vectorize All" process from stopping on single-message errors. Failed items are now skipped and reported at the end of the session rather than aborting the entire sync. Summarization: Implements per-message retries; failures use the original text as a fallback or mark for skipping. Vector Insertion: Differentiates fatal configuration errors (abort) from transient batch failures (skip and notify). * Resolved: 'account_id_missing' is missing * Resolved: Refactored out summarizeSkipOnFailure() functionality into summarize() via options parameter * Fix eslint and type checks * feat(vectors): add types to maps and sets, improve summarize function options --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
@@ -44,6 +44,7 @@ import { oai_settings } from '../../openai.js';
|
||||
* @property {string} text - The hashed message text
|
||||
* @property {number} hash - The hash used as the vector key
|
||||
* @property {number} index - The index of the message in the chat
|
||||
* @property {boolean} [summaryFailed] - Whether summarization failed for this message (used internally to skip messages that fail summarization)
|
||||
*/
|
||||
|
||||
const MODULE_NAME = 'vectors';
|
||||
@@ -77,6 +78,8 @@ const settings = {
|
||||
summarize_sent: false,
|
||||
summary_source: 'main',
|
||||
summary_prompt: 'Ignore previous instructions. Summarize the most important parts of the message. Limit yourself to 250 words or less. Your response should include nothing but the summary.',
|
||||
summary_retries: 2,
|
||||
summary_threshold: 200,
|
||||
force_chunk_delimiter: '',
|
||||
|
||||
// For chats
|
||||
@@ -118,7 +121,21 @@ const settings = {
|
||||
|
||||
const moduleWorker = new ModuleWorkerWrapper(synchronizeChat);
|
||||
const webllmProvider = new WebLlmVectorProvider();
|
||||
/**
|
||||
* Cache for storing summaries of messages by their hash.
|
||||
* @type {Map<number, string>}
|
||||
*/
|
||||
const cachedSummaries = new Map();
|
||||
/**
|
||||
* Hashes skipped this Vectorize All session (summary or embed failure). Cleared on next Vectorize All click.
|
||||
* @type {Set<number>}
|
||||
*/
|
||||
const skippedHashes = new Set();
|
||||
/**
|
||||
* Error causes treated as fatal — abort Vectorize All rather than skip.
|
||||
* @type {Set<string>}
|
||||
*/
|
||||
const FATAL_CAUSES = new Set(['account_id_missing', 'api_key_missing', 'api_url_missing', 'api_model_missing', 'extras_module_missing', 'webllm_not_supported', 'summary_endpoint_invalid']);
|
||||
const vectorApiRequiresUrl = ['llamacpp', 'vllm', 'ollama', 'koboldcpp'];
|
||||
|
||||
/**
|
||||
@@ -199,10 +216,12 @@ async function onVectorizeAllClick() {
|
||||
// Clear all cached summaries to ensure that new ones are created
|
||||
// upon request of a full vectorise
|
||||
cachedSummaries.clear();
|
||||
skippedHashes.clear();
|
||||
|
||||
const batchSize = getBatchSize();
|
||||
const elapsedLog = [];
|
||||
let finished = false;
|
||||
let initialPending = null; // total items pending at the start of this run — set on first sync return
|
||||
$('#vectorize_progress').show();
|
||||
$('#vectorize_progress_percent').text('0');
|
||||
$('#vectorize_progress_eta').text('...');
|
||||
@@ -216,16 +235,27 @@ async function onVectorizeAllClick() {
|
||||
const startTime = Date.now();
|
||||
const remaining = await synchronizeChat(batchSize);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
if (remaining === null) {
|
||||
// synchronizeChat already surfaced a toast; bail out of the loop.
|
||||
throw new Error('Vectorization aborted');
|
||||
}
|
||||
|
||||
elapsedLog.push(elapsed);
|
||||
finished = remaining <= 0;
|
||||
|
||||
const total = getContext().chat.length;
|
||||
const processed = total - remaining;
|
||||
const processedPercent = Math.round((processed / total) * 100); // percentage of the work done
|
||||
if (initialPending === null) {
|
||||
initialPending = Math.max(0, remaining + batchSize);
|
||||
}
|
||||
const pending = Math.max(0, remaining);
|
||||
const processed = Math.max(0, initialPending - pending);
|
||||
const processedPercent = initialPending > 0
|
||||
? Math.min(100, Math.round((processed / initialPending) * 100))
|
||||
: 100;
|
||||
const lastElapsed = elapsedLog.slice(-5); // last 5 elapsed times
|
||||
const averageElapsed = lastElapsed.reduce((a, b) => a + b, 0) / lastElapsed.length; // average time needed to process one item
|
||||
const pace = averageElapsed / batchSize; // time needed to process one item
|
||||
const remainingTime = Math.round(pace * remaining / 1000);
|
||||
const remainingTime = Math.round(pace * pending / 1000);
|
||||
|
||||
$('#vectorize_progress_percent').text(processedPercent);
|
||||
$('#vectorize_progress_eta').text(remainingTime);
|
||||
@@ -234,6 +264,9 @@ async function onVectorizeAllClick() {
|
||||
throw new Error('Chat changed');
|
||||
}
|
||||
}
|
||||
if (skippedHashes.size > 0) {
|
||||
toastr.warning(`${skippedHashes.size} message(s) skipped due to errors. Click Vectorize All again to retry.`, 'Vectorization partial');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Vectors: Failed to vectorize all', error);
|
||||
} finally {
|
||||
@@ -304,7 +337,7 @@ async function summarizeExtra(element) {
|
||||
|
||||
if (apiResult.ok) {
|
||||
const data = await apiResult.json();
|
||||
element.text = data.summary;
|
||||
element.text = removeReasoningFromString(data.summary);
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
@@ -336,45 +369,70 @@ async function summarizeWebLLM(element) {
|
||||
}
|
||||
|
||||
const messages = [{ role: 'system', content: settings.summary_prompt }, { role: 'user', content: element.text }];
|
||||
element.text = await generateWebLlmChatPrompt(messages);
|
||||
element.text = removeReasoningFromString(await generateWebLlmChatPrompt(messages));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarizes messages using the chosen method.
|
||||
* @param {HashedMessage[]} hashedMessages Array of hashed messages
|
||||
* Runs one summarization attempt for a single element via the chosen endpoint.
|
||||
* @param {HashedMessage} element
|
||||
* @param {string} endpoint
|
||||
* @returns {Promise<boolean>} Whether the attempt succeeded.
|
||||
*/
|
||||
async function summarizeOne(element, endpoint) {
|
||||
switch (endpoint) {
|
||||
case 'main':
|
||||
return await summarizeMain(element);
|
||||
case 'extras':
|
||||
return await summarizeExtra(element);
|
||||
case 'webllm':
|
||||
return await summarizeWebLLM(element);
|
||||
default:
|
||||
throw new Error(`Unsupported summary endpoint: ${endpoint}`, { cause: 'summary_endpoint_invalid' });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarizes messages using the chosen method. Every returned element has been
|
||||
* summarized (via live call or cache). Throws if any element fails after
|
||||
* `settings.summary_retries` attempts.
|
||||
* @param {HashedMessage[]} hashedMessages Array of hashed messages (mutated in place)
|
||||
* @param {string} endpoint Type of endpoint to use
|
||||
* @param {Object} [options] Options for summarization behavior
|
||||
* @param {boolean} [options.skipOnFailure=false] If true, tags failed elements with `summaryFailed = true` instead of throwing
|
||||
* @returns {Promise<HashedMessage[]>} Summarized messages
|
||||
*/
|
||||
async function summarize(hashedMessages, endpoint = 'main') {
|
||||
async function summarize(hashedMessages, endpoint = 'main', { skipOnFailure = false } = {}) {
|
||||
const maxAttempts = Math.max(1, Number(settings.summary_retries) || 1);
|
||||
for (const element of hashedMessages) {
|
||||
const cachedSummary = cachedSummaries.get(element.hash);
|
||||
if (!cachedSummary) {
|
||||
let success = true;
|
||||
switch (endpoint) {
|
||||
case 'main':
|
||||
success = await summarizeMain(element);
|
||||
break;
|
||||
case 'extras':
|
||||
success = await summarizeExtra(element);
|
||||
break;
|
||||
case 'webllm':
|
||||
success = await summarizeWebLLM(element);
|
||||
break;
|
||||
default:
|
||||
console.error('Unsupported endpoint', endpoint);
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
if (success) {
|
||||
cachedSummaries.set(element.hash, element.text);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (cachedSummary) {
|
||||
element.text = cachedSummary;
|
||||
continue;
|
||||
}
|
||||
|
||||
let success = false;
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
try {
|
||||
success = await summarizeOne(element, endpoint);
|
||||
if (success) break;
|
||||
} catch (error) {
|
||||
if (FATAL_CAUSES.has(error?.cause)) throw error;
|
||||
console.warn(`Vectors: summary attempt ${attempt}/${maxAttempts} threw for hash ${element.hash}`, error);
|
||||
}
|
||||
console.warn(`Vectors: summary attempt ${attempt}/${maxAttempts} failed for hash ${element.hash}`);
|
||||
}
|
||||
if (!success) {
|
||||
if (skipOnFailure) {
|
||||
console.warn(`Vectors: summarization exhausted ${maxAttempts} attempt(s) for hash ${element.hash} — marking for skip`);
|
||||
element.summaryFailed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
throw new Error(`Summarization failed after ${maxAttempts} attempt(s)`, { cause: 'summary_failed' });
|
||||
}
|
||||
cachedSummaries.set(element.hash, element.text);
|
||||
}
|
||||
return hashedMessages;
|
||||
}
|
||||
@@ -401,21 +459,43 @@ async function synchronizeChat(batchSize = 5) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** @type {HashedMessage[]} */
|
||||
const hashedMessages = context.chat.filter(x => settings.keep_hidden || !x.is_system).map(x => ({ text: String(substituteParams(x.mes)), hash: getStringHash(substituteParams(x.mes)), index: context.chat.indexOf(x) }));
|
||||
const hashesInCollection = await getSavedHashes(chatId);
|
||||
|
||||
let newVectorItems = hashedMessages.filter(x => !hashesInCollection.includes(x.hash));
|
||||
const newVectorItems = hashedMessages
|
||||
.filter(x => !hashesInCollection.includes(x.hash))
|
||||
.filter(x => !skippedHashes.has(x.hash));
|
||||
const deletedHashes = hashesInCollection.filter(x => !hashedMessages.some(y => y.hash === x));
|
||||
|
||||
let batch = newVectorItems.slice(0, batchSize);
|
||||
|
||||
if (settings.summarize) {
|
||||
newVectorItems = await summarize(newVectorItems, settings.summary_source);
|
||||
const minLength = Math.max(0, Number(settings.summary_threshold) || 0);
|
||||
const toSummarize = minLength > 0 ? batch.filter(x => x.text.length >= minLength) : batch;
|
||||
if (toSummarize.length > 0) {
|
||||
await summarize(toSummarize, settings.summary_source, { skipOnFailure: true });
|
||||
const failed = toSummarize.filter(x => x.summaryFailed);
|
||||
if (failed.length > 0) {
|
||||
for (const item of failed) skippedHashes.add(item.hash);
|
||||
batch = batch.filter(x => !x.summaryFailed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (newVectorItems.length > 0) {
|
||||
const chunkedBatch = splitByChunks(newVectorItems.slice(0, batchSize));
|
||||
if (batch.length > 0) {
|
||||
const chunkedBatch = splitByChunks(batch);
|
||||
|
||||
console.log(`Vectors: Found ${newVectorItems.length} new items. Processing ${batchSize}...`);
|
||||
await insertVectorItems(chatId, chunkedBatch);
|
||||
console.log(`Vectors: Found ${newVectorItems.length} new items. Processing ${batch.length}...`);
|
||||
try {
|
||||
await insertVectorItems(chatId, chunkedBatch);
|
||||
} catch (insertError) {
|
||||
if (FATAL_CAUSES.has(insertError?.cause)) {
|
||||
throw insertError;
|
||||
}
|
||||
console.warn('Vectors: insert failed for batch — marking for skip', insertError);
|
||||
for (const item of batch) skippedHashes.add(item.hash);
|
||||
}
|
||||
}
|
||||
|
||||
if (deletedHashes.length > 0) {
|
||||
@@ -444,6 +524,10 @@ async function synchronizeChat(batchSize = 5) {
|
||||
return 'WebLLM extension is not installed or the model is not set.';
|
||||
case 'account_id_missing':
|
||||
return 'Workers AI account ID is required. Save it in the "API Connections" panel.';
|
||||
case 'summary_endpoint_invalid':
|
||||
return 'Summarization endpoint is not supported.';
|
||||
case 'summary_failed':
|
||||
return 'Summarization failed after the configured number of retries.';
|
||||
default:
|
||||
return 'Check server console for more details';
|
||||
}
|
||||
@@ -453,7 +537,7 @@ async function synchronizeChat(batchSize = 5) {
|
||||
|
||||
const message = getErrorMessage(error.cause);
|
||||
toastr.error(message, 'Vectorization failed', { preventDuplicates: true });
|
||||
return -1;
|
||||
return null;
|
||||
} finally {
|
||||
syncBlocked = false;
|
||||
}
|
||||
@@ -827,7 +911,11 @@ async function getQueryText(chat, initiator) {
|
||||
.slice(0, settings.query);
|
||||
|
||||
if (initiator === 'chat' && settings.enabled_chats && settings.summarize && settings.summarize_sent) {
|
||||
hashedMessages = await summarize(hashedMessages, settings.summary_source);
|
||||
const minLength = Math.max(0, Number(settings.summary_threshold) || 0);
|
||||
const toSummarize = minLength > 0 ? hashedMessages.filter(x => x.text.length >= minLength) : hashedMessages;
|
||||
if (toSummarize.length > 0) {
|
||||
await summarize(toSummarize, settings.summary_source, { skipOnFailure: true });
|
||||
}
|
||||
}
|
||||
|
||||
const queryText = hashedMessages.map(x => x.text).join('\n');
|
||||
@@ -1830,6 +1918,20 @@ export async function init() {
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#vectors_summary_retries').val(settings.summary_retries).on('input', () => {
|
||||
const parsed = Number($('#vectors_summary_retries').val());
|
||||
settings.summary_retries = Number.isFinite(parsed) && parsed >= 1 ? Math.floor(parsed) : 1;
|
||||
Object.assign(extension_settings.vectors, settings);
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#vectors_summary_threshold').val(settings.summary_threshold).on('input', () => {
|
||||
const parsed = Number($('#vectors_summary_threshold').val());
|
||||
settings.summary_threshold = Number.isFinite(parsed) && parsed >= 0 ? Math.floor(parsed) : 0;
|
||||
Object.assign(extension_settings.vectors, settings);
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#vectors_message_chunk_size').val(settings.message_chunk_size).on('input', () => {
|
||||
settings.message_chunk_size = Number($('#vectors_message_chunk_size').val());
|
||||
Object.assign(extension_settings.vectors, settings);
|
||||
|
||||
@@ -493,6 +493,16 @@
|
||||
<label for="vectors_summary_prompt" title="Summary Prompt:">Summary Prompt:</label>
|
||||
<small data-i18n="Only used when Main API or WebLLM Extension is selected.">Only used when Main API or WebLLM Extension is selected.</small>
|
||||
<textarea id="vectors_summary_prompt" class="text_pole textarea_compact" rows="6" placeholder="This prompt will be sent to AI to request the summary generation."></textarea>
|
||||
|
||||
<label for="vectors_summary_retries" title="Number of attempts per message before aborting vectorization.">
|
||||
<span data-i18n="Summarization retries per message">Summarization retries per message</span>
|
||||
</label>
|
||||
<input id="vectors_summary_retries" type="number" class="text_pole widthUnset" min="1" max="10" step="1" />
|
||||
|
||||
<label for="vectors_summary_threshold" title="Messages shorter than this (in characters) are embedded as-is without summarization. Set to 0 to always summarize.">
|
||||
<span data-i18n="Summarization min length (chars)">Summarization min length (chars)</span>
|
||||
</label>
|
||||
<input id="vectors_summary_threshold" type="number" class="text_pole widthUnset" min="0" step="1" />
|
||||
</div>
|
||||
</div>
|
||||
<small data-i18n="Old messages are vectorized gradually as you chat. To process all previous messages, click the button below.">
|
||||
|
||||
Reference in New Issue
Block a user