[FEATURE_REQUEST] Sending PDF/HTML files? #1414

This commit is contained in:
Cohee
2023-11-29 17:51:30 +02:00
parent 1ce009b84e
commit e0bf2b8e3e
10 changed files with 74770 additions and 19 deletions
+2 -1
View File
@@ -22,7 +22,8 @@
"droll",
"handlebars",
"highlight.js",
"localforage"
"localforage",
"pdfjs-dist"
]
}
}
+17398
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+10 -5
View File
@@ -195,7 +195,7 @@ import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
import { hideLoader, showLoader } from "./scripts/loader.js";
import { CharacterContextMenu, BulkEditOverlay } from "./scripts/BulkEditOverlay.js";
import { loadMancerModels } from "./scripts/mancer-settings.js";
import { hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from "./scripts/chats.js";
import { replaceVariableMacros } from "./scripts/variables.js";
//exporting functions and vars for mods
@@ -3019,22 +3019,27 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
coreChat.pop();
}
coreChat = coreChat.map(chatItem => {
coreChat = await Promise.all(coreChat.map(async (chatItem) => {
let message = chatItem.mes;
let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT;
let options = { isPrompt: true };
let regexedMessage = getRegexedString(message, regexType, options);
if (chatItem.extra?.file?.text) {
regexedMessage += `\n\n${chatItem.extra.file.text}`;
if (chatItem.extra?.file) {
const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url));
if (fileText) {
chatItem.extra.fileStart = regexedMessage.length;
regexedMessage += `\n\n${fileText}`;
}
}
return {
...chatItem,
mes: regexedMessage,
};
});
}));
// Determine token limit
let this_max_context = getMaxContextSize();
+109 -13
View File
@@ -8,14 +8,33 @@ import {
eventSource,
event_types,
getCurrentChatId,
getRequestHeaders,
hideSwipeButtons,
name2,
saveChatDebounced,
showSwipeButtons,
} from "../script.js";
import { getBase64Async, humanFileSize, saveBase64AsFile } from "./utils.js";
import {
extractTextFromHTML,
extractTextFromMarkdown,
extractTextFromPDF,
getBase64Async,
getStringHash,
humanFileSize,
saveBase64AsFile,
} from "./utils.js";
const fileSizeLimit = 1024 * 1024 * 1; // 1 MB
const fileSizeLimit = 1024 * 1024 * 10; // 10 MB
const converters = {
'application/pdf': extractTextFromPDF,
'text/html': extractTextFromHTML,
'text/markdown': extractTextFromMarkdown,
}
function isConvertible(type) {
return Object.keys(converters).includes(type);
}
/**
* Mark message as hidden (system message).
@@ -70,7 +89,7 @@ export async function unhideChatMessage(messageId, messageBlock) {
/**
* Adds a file attachment to the message.
* @param {object} message Message object
* @returns {Promise<void>}
* @returns {Promise<void>} A promise that resolves when file is uploaded.
*/
export async function populateFileAttachment(message, inputId = 'file_form_input') {
try {
@@ -81,18 +100,38 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
const file = fileInput.files[0];
if (!file) return;
const fileBase64 = await getBase64Async(file);
let base64Data = fileBase64.split(',')[1];
// If file is image
if (file.type.startsWith('image/')) {
const base64Img = await getBase64Async(file);
const base64ImgData = base64Img.split(',')[1];
const extension = file.type.split('/')[1];
const imageUrl = await saveBase64AsFile(base64ImgData, name2, file.name, extension);
const imageUrl = await saveBase64AsFile(base64Data, name2, file.name, extension);
message.extra.image = imageUrl;
message.extra.inline_image = true;
} else {
const fileText = await file.text();
const slug = getStringHash(file.name);
const uniqueFileName = `${Date.now()}_${slug}.txt`;
if (isConvertible(file.type)) {
try {
const converter = converters[file.type];
const fileText = await converter(file);
base64Data = window.btoa(unescape(encodeURIComponent(fileText)));
} catch (error) {
toastr.error(error, 'Could not convert file');
console.error('Could not convert file', error);
}
}
const fileUrl = await uploadFileAttachment(uniqueFileName, base64Data);
if (!fileUrl) {
return;
}
message.extra.file = {
text: fileText,
url: fileUrl,
size: file.size,
name: file.name,
};
@@ -105,6 +144,62 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
}
}
/**
* Uploads file to the server.
* @param {string} fileName
* @param {string} base64Data
* @returns {Promise<string>} File URL
*/
export async function uploadFileAttachment(fileName, base64Data) {
try {
const result = await fetch('/api/file/upload', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
name: fileName,
data: base64Data,
}),
});
if (!result.ok) {
const error = await result.text();
throw new Error(error);
}
const responseData = await result.json();
return responseData.path.replace(/\\/g, '/');
} catch (error) {
toastr.error(error, 'Could not upload file');
console.error('Could not upload file', error);
}
}
/**
* Downloads file from the server.
* @param {string} url File URL
* @returns {Promise<string>} File text
*/
export async function getFileAttachment(url) {
try {
const result = await fetch(url, {
method: 'GET',
cache: 'force-cache',
headers: getRequestHeaders(),
});
if (!result.ok) {
const error = await result.text();
throw new Error(error);
}
const text = await result.text();
return text;
} catch (error) {
toastr.error(error, 'Could not download file');
console.error('Could not download file', error);
}
}
/**
* Validates file to make sure it is not binary or not image.
* @param {File} file File object
@@ -121,7 +216,7 @@ async function validateFile(file) {
}
// If file is binary
if (isBinary && !isImage) {
if (isBinary && !isImage && !isConvertible(file.type)) {
toastr.error('Binary files are not supported. Select a text file or image.');
return false;
}
@@ -193,22 +288,23 @@ async function deleteMessageFile(messageId) {
* @param {number} messageId Message ID
*/
async function viewMessageFile(messageId) {
const messageText = chat[messageId]?.extra?.file?.text;
const messageFile = chat[messageId]?.extra?.file;
if (!messageText) {
if (!messageFile) {
console.debug('Message has no file or it is empty');
return;
}
const fileText = messageFile.text || (await getFileAttachment(messageFile.url));
const modalTemplate = $('<div><pre><code></code></pre></div>');
modalTemplate.find('code').addClass('txt').text(messageText);
modalTemplate.find('code').addClass('txt').text(fileText);
modalTemplate.addClass('file_modal');
addCopyToCodeBlocks(modalTemplate);
callPopup(modalTemplate, 'text');
}
/**
* Inserts a file embed into the message.
* @param {number} messageId
+97
View File
@@ -1,6 +1,7 @@
import { getContext } from "./extensions.js";
import { getRequestHeaders } from "../script.js";
import { isMobile } from "./RossAscends-mods.js";
import { collapseNewlines } from "./power-user.js";
/**
* Pagination status string template.
@@ -1066,3 +1067,99 @@ export function uuidv4() {
return v.toString(16);
});
}
function postProcessText(text) {
// Collapse multiple newlines into one
text = collapseNewlines(text);
// Trim leading and trailing whitespace, and remove empty lines
text = text.split('\n').map(l => l.trim()).filter(Boolean).join('\n');
// Remove carriage returns
text = text.replace(/\r/g, '');
// Normalize unicode spaces
text = text.replace(/\u00A0/g, ' ');
// Collapse multiple spaces into one (except for newlines)
text = text.replace(/ {2,}/g, ' ');
// Remove leading and trailing spaces
text = text.trim();
return text;
}
/**
* Use pdf.js to load and parse text from PDF pages
* @param {Blob} blob PDF file blob
* @returns {Promise<string>} A promise that resolves to the parsed text.
*/
export async function extractTextFromPDF(blob) {
async function initPdfJs() {
const promises = [];
const workerPromise = new Promise((resolve, reject) => {
const workerScript = document.createElement('script');
workerScript.type = 'module';
workerScript.async = true;
workerScript.src = 'lib/pdf.worker.mjs';
workerScript.onload = resolve;
workerScript.onerror = reject;
document.head.appendChild(workerScript);
});
promises.push(workerPromise);
const pdfjsPromise = new Promise((resolve, reject) => {
const pdfjsScript = document.createElement('script');
pdfjsScript.type = 'module';
pdfjsScript.async = true;
pdfjsScript.src = 'lib/pdf.mjs';
pdfjsScript.onload = resolve;
pdfjsScript.onerror = reject;
document.head.appendChild(pdfjsScript);
});
promises.push(pdfjsPromise);
return Promise.all(promises);
}
if (!('pdfjsLib' in window)) {
await initPdfJs();
}
const buffer = await getFileBuffer(blob);
const pdf = await pdfjsLib.getDocument(buffer).promise;
const pages = [];
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items.map(item => item.str).join(' ');
pages.push(text);
}
return postProcessText(pages.join('\n'));
}
/**
* Use DOMParser to load and parse text from HTML
* @param {Blob} blob HTML content blob
* @returns {Promise<string>} A promise that resolves to the parsed text.
*/
export async function extractTextFromHTML(blob) {
const html = await blob.text();
const domParser = new DOMParser();
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
const text = postProcessText(document.body.textContent);
return text;
}
/**
* Use showdown to load and parse text from Markdown
* @param {Blob} blob Markdown content blob
* @returns {Promise<string>} A promise that resolves to the parsed text.
*/
export async function extractTextFromMarkdown(blob) {
const markdown = await blob.text();
const converter = new showdown.Converter();
const html = converter.makeHtml(markdown);
const domParser = new DOMParser();
const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html');
const text = postProcessText(document.body.textContent);
return text;
}
+27
View File
@@ -3,6 +3,7 @@ const fs = require('fs');
const sanitize = require('sanitize-filename');
const fetch = require('node-fetch').default;
const { finished } = require('stream/promises');
const writeFileSyncAtomic = require('write-file-atomic').sync;
const { DIRECTORIES, UNSAFE_EXTENSIONS } = require('./constants');
const VALID_CATEGORIES = ["bgm", "ambient", "blip", "live2d"];
@@ -297,6 +298,32 @@ function registerEndpoints(app, jsonParser) {
return response.sendStatus(500);
}
});
app.post('/api/file/upload', jsonParser, async (request, response) => {
try {
if (!request.body.name) {
return response.status(400).send("No upload name specified");
}
if (!request.body.data) {
return response.status(400).send("No upload data specified");
}
const safeInput = checkAssetFileName(request.body.name);
if (!safeInput) {
return response.status(400).send("Invalid upload name");
}
const pathToUpload = path.join(DIRECTORIES.files, safeInput);
writeFileSyncAtomic(pathToUpload, request.body.data, 'base64');
const url = path.normalize(pathToUpload.replace('public' + path.sep, ''));
return response.send({ path: url });
} catch (error) {
console.log(error);
return response.sendStatus(500);
}
});
}
module.exports = {
+1
View File
@@ -24,6 +24,7 @@ const DIRECTORIES = {
quickreplies: 'public/QuickReplies',
assets: 'public/assets',
comfyWorkflows: 'public/user/workflows',
files: 'public/user/files',
};
const UNSAFE_EXTENSIONS = [