Enhance Vectorize All process with error handling, retries and minor improvements (#5479)

* fix (vectors): Fixed Vectorize All progress report and ETA issues

* fix (vectors): Added strip reasoning block function for extras/WebLLM summaries

* feat(vectors): Retry failed summaries with configurable attempts

* feat(vectors): Skip summarization for short messages

* feat(vectors): Skip failed messages during Vectorize All instead of aborting all

Prevents the "Vectorize All" process from stopping on single-message
errors. Failed items are now skipped and reported at the end of the
session rather than aborting the entire sync.

Summarization: Implements per-message retries; failures use the original
text as a fallback or mark for skipping.

Vector Insertion: Differentiates fatal configuration errors (abort) from
transient batch failures (skip and notify).

* Resolved: 'account_id_missing' is missing

* Resolved: Refactored out summarizeSkipOnFailure() functionality into summarize() via options parameter

* Fix eslint and type checks

* feat(vectors): add types to maps and sets, improve summarize function options

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
TanJeeSchuan
2026-04-20 07:33:12 +08:00
committed by GitHub
parent 8aeda4a101
commit e5d4ff5fae
2 changed files with 152 additions and 40 deletions
+142 -40
View File
@@ -44,6 +44,7 @@ import { oai_settings } from '../../openai.js';
* @property {string} text - The hashed message text
* @property {number} hash - The hash used as the vector key
* @property {number} index - The index of the message in the chat
* @property {boolean} [summaryFailed] - Whether summarization failed for this message (used internally to skip messages that fail summarization)
*/
const MODULE_NAME = 'vectors';
@@ -77,6 +78,8 @@ const settings = {
summarize_sent: false,
summary_source: 'main',
summary_prompt: 'Ignore previous instructions. Summarize the most important parts of the message. Limit yourself to 250 words or less. Your response should include nothing but the summary.',
summary_retries: 2,
summary_threshold: 200,
force_chunk_delimiter: '',
// For chats
@@ -118,7 +121,21 @@ const settings = {
const moduleWorker = new ModuleWorkerWrapper(synchronizeChat);
const webllmProvider = new WebLlmVectorProvider();
/**
* Cache for storing summaries of messages by their hash.
* @type {Map<number, string>}
*/
const cachedSummaries = new Map();
/**
* Hashes skipped this Vectorize All session (summary or embed failure). Cleared on next Vectorize All click.
* @type {Set<number>}
*/
const skippedHashes = new Set();
/**
* Error causes treated as fatal — abort Vectorize All rather than skip.
* @type {Set<string>}
*/
const FATAL_CAUSES = new Set(['account_id_missing', 'api_key_missing', 'api_url_missing', 'api_model_missing', 'extras_module_missing', 'webllm_not_supported', 'summary_endpoint_invalid']);
const vectorApiRequiresUrl = ['llamacpp', 'vllm', 'ollama', 'koboldcpp'];
/**
@@ -199,10 +216,12 @@ async function onVectorizeAllClick() {
// Clear all cached summaries to ensure that new ones are created
// upon request of a full vectorise
cachedSummaries.clear();
skippedHashes.clear();
const batchSize = getBatchSize();
const elapsedLog = [];
let finished = false;
let initialPending = null; // total items pending at the start of this run — set on first sync return
$('#vectorize_progress').show();
$('#vectorize_progress_percent').text('0');
$('#vectorize_progress_eta').text('...');
@@ -216,16 +235,27 @@ async function onVectorizeAllClick() {
const startTime = Date.now();
const remaining = await synchronizeChat(batchSize);
const elapsed = Date.now() - startTime;
if (remaining === null) {
// synchronizeChat already surfaced a toast; bail out of the loop.
throw new Error('Vectorization aborted');
}
elapsedLog.push(elapsed);
finished = remaining <= 0;
const total = getContext().chat.length;
const processed = total - remaining;
const processedPercent = Math.round((processed / total) * 100); // percentage of the work done
if (initialPending === null) {
initialPending = Math.max(0, remaining + batchSize);
}
const pending = Math.max(0, remaining);
const processed = Math.max(0, initialPending - pending);
const processedPercent = initialPending > 0
? Math.min(100, Math.round((processed / initialPending) * 100))
: 100;
const lastElapsed = elapsedLog.slice(-5); // last 5 elapsed times
const averageElapsed = lastElapsed.reduce((a, b) => a + b, 0) / lastElapsed.length; // average time needed to process one item
const pace = averageElapsed / batchSize; // time needed to process one item
const remainingTime = Math.round(pace * remaining / 1000);
const remainingTime = Math.round(pace * pending / 1000);
$('#vectorize_progress_percent').text(processedPercent);
$('#vectorize_progress_eta').text(remainingTime);
@@ -234,6 +264,9 @@ async function onVectorizeAllClick() {
throw new Error('Chat changed');
}
}
if (skippedHashes.size > 0) {
toastr.warning(`${skippedHashes.size} message(s) skipped due to errors. Click Vectorize All again to retry.`, 'Vectorization partial');
}
} catch (error) {
console.error('Vectors: Failed to vectorize all', error);
} finally {
@@ -304,7 +337,7 @@ async function summarizeExtra(element) {
if (apiResult.ok) {
const data = await apiResult.json();
element.text = data.summary;
element.text = removeReasoningFromString(data.summary);
}
} catch (error) {
console.log(error);
@@ -336,45 +369,70 @@ async function summarizeWebLLM(element) {
}
const messages = [{ role: 'system', content: settings.summary_prompt }, { role: 'user', content: element.text }];
element.text = await generateWebLlmChatPrompt(messages);
element.text = removeReasoningFromString(await generateWebLlmChatPrompt(messages));
return true;
}
/**
* Summarizes messages using the chosen method.
* @param {HashedMessage[]} hashedMessages Array of hashed messages
* Runs one summarization attempt for a single element via the chosen endpoint.
* @param {HashedMessage} element
* @param {string} endpoint
* @returns {Promise<boolean>} Whether the attempt succeeded.
*/
async function summarizeOne(element, endpoint) {
switch (endpoint) {
case 'main':
return await summarizeMain(element);
case 'extras':
return await summarizeExtra(element);
case 'webllm':
return await summarizeWebLLM(element);
default:
throw new Error(`Unsupported summary endpoint: ${endpoint}`, { cause: 'summary_endpoint_invalid' });
}
}
/**
* Summarizes messages using the chosen method. Every returned element has been
* summarized (via live call or cache). Throws if any element fails after
* `settings.summary_retries` attempts.
* @param {HashedMessage[]} hashedMessages Array of hashed messages (mutated in place)
* @param {string} endpoint Type of endpoint to use
* @param {Object} [options] Options for summarization behavior
* @param {boolean} [options.skipOnFailure=false] If true, tags failed elements with `summaryFailed = true` instead of throwing
* @returns {Promise<HashedMessage[]>} Summarized messages
*/
async function summarize(hashedMessages, endpoint = 'main') {
async function summarize(hashedMessages, endpoint = 'main', { skipOnFailure = false } = {}) {
const maxAttempts = Math.max(1, Number(settings.summary_retries) || 1);
for (const element of hashedMessages) {
const cachedSummary = cachedSummaries.get(element.hash);
if (!cachedSummary) {
let success = true;
switch (endpoint) {
case 'main':
success = await summarizeMain(element);
break;
case 'extras':
success = await summarizeExtra(element);
break;
case 'webllm':
success = await summarizeWebLLM(element);
break;
default:
console.error('Unsupported endpoint', endpoint);
success = false;
break;
}
if (success) {
cachedSummaries.set(element.hash, element.text);
} else {
break;
}
} else {
if (cachedSummary) {
element.text = cachedSummary;
continue;
}
let success = false;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
success = await summarizeOne(element, endpoint);
if (success) break;
} catch (error) {
if (FATAL_CAUSES.has(error?.cause)) throw error;
console.warn(`Vectors: summary attempt ${attempt}/${maxAttempts} threw for hash ${element.hash}`, error);
}
console.warn(`Vectors: summary attempt ${attempt}/${maxAttempts} failed for hash ${element.hash}`);
}
if (!success) {
if (skipOnFailure) {
console.warn(`Vectors: summarization exhausted ${maxAttempts} attempt(s) for hash ${element.hash} — marking for skip`);
element.summaryFailed = true;
continue;
}
throw new Error(`Summarization failed after ${maxAttempts} attempt(s)`, { cause: 'summary_failed' });
}
cachedSummaries.set(element.hash, element.text);
}
return hashedMessages;
}
@@ -401,21 +459,43 @@ async function synchronizeChat(batchSize = 5) {
return -1;
}
/** @type {HashedMessage[]} */
const hashedMessages = context.chat.filter(x => settings.keep_hidden || !x.is_system).map(x => ({ text: String(substituteParams(x.mes)), hash: getStringHash(substituteParams(x.mes)), index: context.chat.indexOf(x) }));
const hashesInCollection = await getSavedHashes(chatId);
let newVectorItems = hashedMessages.filter(x => !hashesInCollection.includes(x.hash));
const newVectorItems = hashedMessages
.filter(x => !hashesInCollection.includes(x.hash))
.filter(x => !skippedHashes.has(x.hash));
const deletedHashes = hashesInCollection.filter(x => !hashedMessages.some(y => y.hash === x));
let batch = newVectorItems.slice(0, batchSize);
if (settings.summarize) {
newVectorItems = await summarize(newVectorItems, settings.summary_source);
const minLength = Math.max(0, Number(settings.summary_threshold) || 0);
const toSummarize = minLength > 0 ? batch.filter(x => x.text.length >= minLength) : batch;
if (toSummarize.length > 0) {
await summarize(toSummarize, settings.summary_source, { skipOnFailure: true });
const failed = toSummarize.filter(x => x.summaryFailed);
if (failed.length > 0) {
for (const item of failed) skippedHashes.add(item.hash);
batch = batch.filter(x => !x.summaryFailed);
}
}
}
if (newVectorItems.length > 0) {
const chunkedBatch = splitByChunks(newVectorItems.slice(0, batchSize));
if (batch.length > 0) {
const chunkedBatch = splitByChunks(batch);
console.log(`Vectors: Found ${newVectorItems.length} new items. Processing ${batchSize}...`);
await insertVectorItems(chatId, chunkedBatch);
console.log(`Vectors: Found ${newVectorItems.length} new items. Processing ${batch.length}...`);
try {
await insertVectorItems(chatId, chunkedBatch);
} catch (insertError) {
if (FATAL_CAUSES.has(insertError?.cause)) {
throw insertError;
}
console.warn('Vectors: insert failed for batch — marking for skip', insertError);
for (const item of batch) skippedHashes.add(item.hash);
}
}
if (deletedHashes.length > 0) {
@@ -444,6 +524,10 @@ async function synchronizeChat(batchSize = 5) {
return 'WebLLM extension is not installed or the model is not set.';
case 'account_id_missing':
return 'Workers AI account ID is required. Save it in the "API Connections" panel.';
case 'summary_endpoint_invalid':
return 'Summarization endpoint is not supported.';
case 'summary_failed':
return 'Summarization failed after the configured number of retries.';
default:
return 'Check server console for more details';
}
@@ -453,7 +537,7 @@ async function synchronizeChat(batchSize = 5) {
const message = getErrorMessage(error.cause);
toastr.error(message, 'Vectorization failed', { preventDuplicates: true });
return -1;
return null;
} finally {
syncBlocked = false;
}
@@ -827,7 +911,11 @@ async function getQueryText(chat, initiator) {
.slice(0, settings.query);
if (initiator === 'chat' && settings.enabled_chats && settings.summarize && settings.summarize_sent) {
hashedMessages = await summarize(hashedMessages, settings.summary_source);
const minLength = Math.max(0, Number(settings.summary_threshold) || 0);
const toSummarize = minLength > 0 ? hashedMessages.filter(x => x.text.length >= minLength) : hashedMessages;
if (toSummarize.length > 0) {
await summarize(toSummarize, settings.summary_source, { skipOnFailure: true });
}
}
const queryText = hashedMessages.map(x => x.text).join('\n');
@@ -1830,6 +1918,20 @@ export async function init() {
saveSettingsDebounced();
});
$('#vectors_summary_retries').val(settings.summary_retries).on('input', () => {
const parsed = Number($('#vectors_summary_retries').val());
settings.summary_retries = Number.isFinite(parsed) && parsed >= 1 ? Math.floor(parsed) : 1;
Object.assign(extension_settings.vectors, settings);
saveSettingsDebounced();
});
$('#vectors_summary_threshold').val(settings.summary_threshold).on('input', () => {
const parsed = Number($('#vectors_summary_threshold').val());
settings.summary_threshold = Number.isFinite(parsed) && parsed >= 0 ? Math.floor(parsed) : 0;
Object.assign(extension_settings.vectors, settings);
saveSettingsDebounced();
});
$('#vectors_message_chunk_size').val(settings.message_chunk_size).on('input', () => {
settings.message_chunk_size = Number($('#vectors_message_chunk_size').val());
Object.assign(extension_settings.vectors, settings);
@@ -493,6 +493,16 @@
<label for="vectors_summary_prompt" title="Summary Prompt:">Summary Prompt:</label>
<small data-i18n="Only used when Main API or WebLLM Extension is selected.">Only used when Main API or WebLLM Extension is selected.</small>
<textarea id="vectors_summary_prompt" class="text_pole textarea_compact" rows="6" placeholder="This prompt will be sent to AI to request the summary generation."></textarea>
<label for="vectors_summary_retries" title="Number of attempts per message before aborting vectorization.">
<span data-i18n="Summarization retries per message">Summarization retries per message</span>
</label>
<input id="vectors_summary_retries" type="number" class="text_pole widthUnset" min="1" max="10" step="1" />
<label for="vectors_summary_threshold" title="Messages shorter than this (in characters) are embedded as-is without summarization. Set to 0 to always summarize.">
<span data-i18n="Summarization min length (chars)">Summarization min length (chars)</span>
</label>
<input id="vectors_summary_threshold" type="number" class="text_pole widthUnset" min="0" step="1" />
</div>
</div>
<small data-i18n="Old messages are vectorized gradually as you chat. To process all previous messages, click the button below.">