From e5d4ff5fae3c7568512ddd67f1114a5f21c4376c Mon Sep 17 00:00:00 2001
From: TanJeeSchuan <89920999+TanJeeSchuan@users.noreply.github.com>
Date: Mon, 20 Apr 2026 07:33:12 +0800
Subject: [PATCH] Enhance Vectorize All process with error handling, retries
 and minor improvements (#5479)

* fix (vectors): Fixed Vectorize All progress report and ETA issues

* fix (vectors): Added strip reasoning block function for extras/WebLLM summaries

* feat(vectors): Retry failed summaries with configurable attempts

* feat(vectors): Skip summarization for short messages

* feat(vectors): Skip failed messages during Vectorize All instead of aborting all

Prevents the "Vectorize All" process from stopping on single-message
errors. Failed items are now skipped and reported at the end of the
session rather than aborting the entire sync.

Summarization: Implements per-message retries; failures use the original
text as a fallback or mark for skipping.

Vector Insertion: Differentiates fatal configuration errors (abort) from
transient batch failures (skip and notify).

* Resolved: 'account_id_missing' is missing

* Resolved: Refactored out summarizeSkipOnFailure() functionality into summarize() via options parameter

* Fix eslint and type checks

* feat(vectors): add types to maps and sets, improve summarize function options

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
---
 public/scripts/extensions/vectors/index.js    | 182 ++++++++++++++----
 .../scripts/extensions/vectors/settings.html  |  10 +
 2 files changed, 152 insertions(+), 40 deletions(-)
diff --git a/public/scripts/extensions/vectors/index.js b/public/scripts/extensions/vectors/index.js
index 0b0fcac70..86158f16c 100644
--- a/public/scripts/extensions/vectors/index.js
+++ b/public/scripts/extensions/vectors/index.js
@@ -44,6 +44,7 @@ import { oai_settings } from '../../openai.js';
  * @property {string} text - The hashed message text
  * @property {number} hash - The hash used as the vector key
  * @property {number} index - The index of the message in the chat
+ * @property {boolean} [summaryFailed] - Whether summarization failed for this message (used internally to skip messages that fail summarization)
  */
 
 const MODULE_NAME = 'vectors';
@@ -77,6 +78,8 @@ const settings = {
     summarize_sent: false,
     summary_source: 'main',
     summary_prompt: 'Ignore previous instructions. Summarize the most important parts of the message. Limit yourself to 250 words or less. Your response should include nothing but the summary.',
+    summary_retries: 2,
+    summary_threshold: 200,
     force_chunk_delimiter: '',
 
     // For chats
@@ -118,7 +121,21 @@ const settings = {
 
 const moduleWorker = new ModuleWorkerWrapper(synchronizeChat);
 const webllmProvider = new WebLlmVectorProvider();
+/**
+ * Cache for storing summaries of messages by their hash.
+ * @type {Map<number, string>}
+ */
 const cachedSummaries = new Map();
+/**
+ * Hashes skipped this Vectorize All session (summary or embed failure). Cleared on next Vectorize All click.
+ * @type {Set<number>}
+ */
+const skippedHashes = new Set();
+/**
+ * Error causes treated as fatal — abort Vectorize All rather than skip.
+ * @type {Set<string>}
+ */
+const FATAL_CAUSES = new Set(['account_id_missing', 'api_key_missing', 'api_url_missing', 'api_model_missing', 'extras_module_missing', 'webllm_not_supported', 'summary_endpoint_invalid']);
 const vectorApiRequiresUrl = ['llamacpp', 'vllm', 'ollama', 'koboldcpp'];
 
 /**
@@ -199,10 +216,12 @@ async function onVectorizeAllClick() {
         // Clear all cached summaries to ensure that new ones are created
         // upon request of a full vectorise
         cachedSummaries.clear();
+        skippedHashes.clear();
 
         const batchSize = getBatchSize();
         const elapsedLog = [];
         let finished = false;
+        let initialPending = null; // total items pending at the start of this run — set on first sync return
         $('#vectorize_progress').show();
         $('#vectorize_progress_percent').text('0');
         $('#vectorize_progress_eta').text('...');
@@ -216,16 +235,27 @@ async function onVectorizeAllClick() {
             const startTime = Date.now();
             const remaining = await synchronizeChat(batchSize);
             const elapsed = Date.now() - startTime;
+
+            if (remaining === null) {
+                // synchronizeChat already surfaced a toast; bail out of the loop.
+                throw new Error('Vectorization aborted');
+            }
+
             elapsedLog.push(elapsed);
             finished = remaining <= 0;
 
-            const total = getContext().chat.length;
-            const processed = total - remaining;
-            const processedPercent = Math.round((processed / total) * 100); // percentage of the work done
+            if (initialPending === null) {
+                initialPending = Math.max(0, remaining + batchSize);
+            }
+            const pending = Math.max(0, remaining);
+            const processed = Math.max(0, initialPending - pending);
+            const processedPercent = initialPending > 0
+                ? Math.min(100, Math.round((processed / initialPending) * 100))
+                : 100;
             const lastElapsed = elapsedLog.slice(-5); // last 5 elapsed times
             const averageElapsed = lastElapsed.reduce((a, b) => a + b, 0) / lastElapsed.length; // average time needed to process one item
             const pace = averageElapsed / batchSize; // time needed to process one item
-            const remainingTime = Math.round(pace * remaining / 1000);
+            const remainingTime = Math.round(pace * pending / 1000);
 
             $('#vectorize_progress_percent').text(processedPercent);
             $('#vectorize_progress_eta').text(remainingTime);
@@ -234,6 +264,9 @@ async function onVectorizeAllClick() {
                 throw new Error('Chat changed');
             }
         }
+        if (skippedHashes.size > 0) {
+            toastr.warning(`${skippedHashes.size} message(s) skipped due to errors. Click Vectorize All again to retry.`, 'Vectorization partial');
+        }
     } catch (error) {
         console.error('Vectors: Failed to vectorize all', error);
     } finally {
@@ -304,7 +337,7 @@ async function summarizeExtra(element) {
 
         if (apiResult.ok) {
             const data = await apiResult.json();
-            element.text = data.summary;
+            element.text = removeReasoningFromString(data.summary);
         }
     } catch (error) {
         console.log(error);
@@ -336,45 +369,70 @@ async function summarizeWebLLM(element) {
     }
 
     const messages = [{ role: 'system', content: settings.summary_prompt }, { role: 'user', content: element.text }];
-    element.text = await generateWebLlmChatPrompt(messages);
+    element.text = removeReasoningFromString(await generateWebLlmChatPrompt(messages));
 
     return true;
 }
 
 /**
- * Summarizes messages using the chosen method.
- * @param {HashedMessage[]} hashedMessages Array of hashed messages
+ * Runs one summarization attempt for a single element via the chosen endpoint.
+ * @param {HashedMessage} element
+ * @param {string} endpoint
+ * @returns {Promise<boolean>} Whether the attempt succeeded.
+ */
+async function summarizeOne(element, endpoint) {
+    switch (endpoint) {
+        case 'main':
+            return await summarizeMain(element);
+        case 'extras':
+            return await summarizeExtra(element);
+        case 'webllm':
+            return await summarizeWebLLM(element);
+        default:
+            throw new Error(`Unsupported summary endpoint: ${endpoint}`, { cause: 'summary_endpoint_invalid' });
+    }
+}
+
+/**
+ * Summarizes messages using the chosen method. Every returned element has been
+ * summarized (via live call or cache). Throws if any element fails after
+ * `settings.summary_retries` attempts.
+ * @param {HashedMessage[]} hashedMessages Array of hashed messages (mutated in place)
  * @param {string} endpoint Type of endpoint to use
+ * @param {Object} [options] Options for summarization behavior
+ * @param {boolean} [options.skipOnFailure=false] If true, tags failed elements with `summaryFailed = true` instead of throwing
  * @returns {Promise<HashedMessage[]>} Summarized messages
  */
-async function summarize(hashedMessages, endpoint = 'main') {
+async function summarize(hashedMessages, endpoint = 'main', { skipOnFailure = false } = {}) {
+    const maxAttempts = Math.max(1, Number(settings.summary_retries) || 1);
     for (const element of hashedMessages) {
         const cachedSummary = cachedSummaries.get(element.hash);
-        if (!cachedSummary) {
-            let success = true;
-            switch (endpoint) {
-                case 'main':
-                    success = await summarizeMain(element);
-                    break;
-                case 'extras':
-                    success = await summarizeExtra(element);
-                    break;
-                case 'webllm':
-                    success = await summarizeWebLLM(element);
-                    break;
-                default:
-                    console.error('Unsupported endpoint', endpoint);
-                    success = false;
-                    break;
-            }
-            if (success) {
-                cachedSummaries.set(element.hash, element.text);
-            } else {
-                break;
-            }
-        } else {
+        if (cachedSummary) {
             element.text = cachedSummary;
+            continue;
         }
+
+        let success = false;
+        for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+            try {
+                success = await summarizeOne(element, endpoint);
+                if (success) break;
+            } catch (error) {
+                if (FATAL_CAUSES.has(error?.cause)) throw error;
+                console.warn(`Vectors: summary attempt ${attempt}/${maxAttempts} threw for hash ${element.hash}`, error);
+            }
+            console.warn(`Vectors: summary attempt ${attempt}/${maxAttempts} failed for hash ${element.hash}`);
+        }
+        if (!success) {
+            if (skipOnFailure) {
+                console.warn(`Vectors: summarization exhausted ${maxAttempts} attempt(s) for hash ${element.hash} — marking for skip`);
+                element.summaryFailed = true;
+                continue;
+            }
+
+            throw new Error(`Summarization failed after ${maxAttempts} attempt(s)`, { cause: 'summary_failed' });
+        }
+        cachedSummaries.set(element.hash, element.text);
     }
     return hashedMessages;
 }
@@ -401,21 +459,43 @@ async function synchronizeChat(batchSize = 5) {
             return -1;
         }
 
+        /** @type {HashedMessage[]} */
         const hashedMessages = context.chat.filter(x => settings.keep_hidden || !x.is_system).map(x => ({ text: String(substituteParams(x.mes)), hash: getStringHash(substituteParams(x.mes)), index: context.chat.indexOf(x) }));
         const hashesInCollection = await getSavedHashes(chatId);
 
-        let newVectorItems = hashedMessages.filter(x => !hashesInCollection.includes(x.hash));
+        const newVectorItems = hashedMessages
+            .filter(x => !hashesInCollection.includes(x.hash))
+            .filter(x => !skippedHashes.has(x.hash));
         const deletedHashes = hashesInCollection.filter(x => !hashedMessages.some(y => y.hash === x));
 
+        let batch = newVectorItems.slice(0, batchSize);
+
         if (settings.summarize) {
-            newVectorItems = await summarize(newVectorItems, settings.summary_source);
+            const minLength = Math.max(0, Number(settings.summary_threshold) || 0);
+            const toSummarize = minLength > 0 ? batch.filter(x => x.text.length >= minLength) : batch;
+            if (toSummarize.length > 0) {
+                await summarize(toSummarize, settings.summary_source, { skipOnFailure: true });
+                const failed = toSummarize.filter(x => x.summaryFailed);
+                if (failed.length > 0) {
+                    for (const item of failed) skippedHashes.add(item.hash);
+                    batch = batch.filter(x => !x.summaryFailed);
+                }
+            }
         }
 
-        if (newVectorItems.length > 0) {
-            const chunkedBatch = splitByChunks(newVectorItems.slice(0, batchSize));
+        if (batch.length > 0) {
+            const chunkedBatch = splitByChunks(batch);
 
-            console.log(`Vectors: Found ${newVectorItems.length} new items. Processing ${batchSize}...`);
-            await insertVectorItems(chatId, chunkedBatch);
+            console.log(`Vectors: Found ${newVectorItems.length} new items. Processing ${batch.length}...`);
+            try {
+                await insertVectorItems(chatId, chunkedBatch);
+            } catch (insertError) {
+                if (FATAL_CAUSES.has(insertError?.cause)) {
+                    throw insertError;
+                }
+                console.warn('Vectors: insert failed for batch — marking for skip', insertError);
+                for (const item of batch) skippedHashes.add(item.hash);
+            }
         }
 
         if (deletedHashes.length > 0) {
@@ -444,6 +524,10 @@ async function synchronizeChat(batchSize = 5) {
                     return 'WebLLM extension is not installed or the model is not set.';
                 case 'account_id_missing':
                     return 'Workers AI account ID is required. Save it in the "API Connections" panel.';
+                case 'summary_endpoint_invalid':
+                    return 'Summarization endpoint is not supported.';
+                case 'summary_failed':
+                    return 'Summarization failed after the configured number of retries.';
                 default:
                     return 'Check server console for more details';
             }
@@ -453,7 +537,7 @@ async function synchronizeChat(batchSize = 5) {
 
         const message = getErrorMessage(error.cause);
         toastr.error(message, 'Vectorization failed', { preventDuplicates: true });
-        return -1;
+        return null;
     } finally {
         syncBlocked = false;
     }
@@ -827,7 +911,11 @@ async function getQueryText(chat, initiator) {
         .slice(0, settings.query);
 
     if (initiator === 'chat' && settings.enabled_chats && settings.summarize && settings.summarize_sent) {
-        hashedMessages = await summarize(hashedMessages, settings.summary_source);
+        const minLength = Math.max(0, Number(settings.summary_threshold) || 0);
+        const toSummarize = minLength > 0 ? hashedMessages.filter(x => x.text.length >= minLength) : hashedMessages;
+        if (toSummarize.length > 0) {
+            await summarize(toSummarize, settings.summary_source, { skipOnFailure: true });
+        }
     }
 
     const queryText = hashedMessages.map(x => x.text).join('\n');
@@ -1830,6 +1918,20 @@ export async function init() {
         saveSettingsDebounced();
     });
 
+    $('#vectors_summary_retries').val(settings.summary_retries).on('input', () => {
+        const parsed = Number($('#vectors_summary_retries').val());
+        settings.summary_retries = Number.isFinite(parsed) && parsed >= 1 ? Math.floor(parsed) : 1;
+        Object.assign(extension_settings.vectors, settings);
+        saveSettingsDebounced();
+    });
+
+    $('#vectors_summary_threshold').val(settings.summary_threshold).on('input', () => {
+        const parsed = Number($('#vectors_summary_threshold').val());
+        settings.summary_threshold = Number.isFinite(parsed) && parsed >= 0 ? Math.floor(parsed) : 0;
+        Object.assign(extension_settings.vectors, settings);
+        saveSettingsDebounced();
+    });
+
     $('#vectors_message_chunk_size').val(settings.message_chunk_size).on('input', () => {
         settings.message_chunk_size = Number($('#vectors_message_chunk_size').val());
         Object.assign(extension_settings.vectors, settings);
diff --git a/public/scripts/extensions/vectors/settings.html b/public/scripts/extensions/vectors/settings.html
index 270ebf2e0..50847efd5 100644
--- a/public/scripts/extensions/vectors/settings.html
+++ b/public/scripts/extensions/vectors/settings.html
@@ -493,6 +493,16 @@
                         <label for="vectors_summary_prompt" title="Summary Prompt:">Summary Prompt:</label>
                         <small data-i18n="Only used when Main API or WebLLM Extension is selected.">Only used when Main API or WebLLM Extension is selected.</small>
                         <textarea id="vectors_summary_prompt" class="text_pole textarea_compact" rows="6" placeholder="This prompt will be sent to AI to request the summary generation."></textarea>
+
+                        <label for="vectors_summary_retries" title="Number of attempts per message before aborting vectorization.">
+                            <span data-i18n="Summarization retries per message">Summarization retries per message</span>
+                        </label>
+                        <input id="vectors_summary_retries" type="number" class="text_pole widthUnset" min="1" max="10" step="1" />
+
+                        <label for="vectors_summary_threshold" title="Messages shorter than this (in characters) are embedded as-is without summarization. Set to 0 to always summarize.">
+                            <span data-i18n="Summarization min length (chars)">Summarization min length (chars)</span>
+                        </label>
+                        <input id="vectors_summary_threshold" type="number" class="text_pole widthUnset" min="0" step="1" />
                     </div>
                 </div>
                 <small data-i18n="Old messages are vectorized gradually as you chat. To process all previous messages, click the button below.">