Files
SillyTavern/public/scripts/extensions/tts/volcengine.js
T
Crush0 eaa6a00e97 Volcengine tts (#5003)
* feat(tts): Add support for Volcengine TTS provider

* refactor: Remove the redundant comments in the Volcengine TTS-related code.

* fix(volcengine): Fix the audio data processing logic in the voice generation interface

* feat(tts): Enhance Volcengine TTS functionality and improve error handling

- Return more detailed error information when generating voice fails
- Add multiple preset voice options and support custom voice management
- Reconstruct the audio stream processing logic to enhance reliability
- Improve the UI interface, adding a voice selection dropdown menu and operation buttons

* refactor(tts): Optimize the code structure and error handling of the Volcengine TTS provider
- Remove the unused "voices" array and "model" parameter
- Improve the text processing logic, eliminating unnecessary separators
- Standardize the error handling logic, simplifying the status code checks
- Fix the DOM operation method, using "createElement" instead of string concatenation
- Ensure the existence check of the "customVoices" array

* Fix: Change the Content-Type of the audio response to audio/mpeg.

* Clean-up

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
2026-01-26 20:45:38 +02:00

317 lines
12 KiB
JavaScript

import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { saveTtsProviderSettings, initVoiceMap } from './index.js';
import { Popup } from '../../popup.js';
export { VolcengineTtsProvider };
class VolcengineTtsProvider {
static voices = [
{
name: 'zh_female_xiaohe_uranus_bigtts',
voice_id: 'zh_female_xiaohe_uranus_bigtts',
lang: 'cl',
},
{
name: 'zh_female_vv_uranus_bigtts',
voice_id: 'zh_female_vv_uranus_bigtts',
lang: 'cl',
},
{
name: 'saturn_zh_female_keainvsheng_tob',
voice_id: 'saturn_zh_female_keainvsheng_tob',
lang: 'cl',
},
{
name: 'saturn_zh_female_tiaopigongzhu_tob',
voice_id: 'saturn_zh_female_tiaopigongzhu_tob',
lang: 'cl',
},
{
name: 'saturn_zh_female_cancan_tob',
voice_id: 'saturn_zh_female_cancan_tob',
lang: 'cl',
},
{
name: 'saturn_zh_male_shuanglangshaonian_tob',
voice_id: 'saturn_zh_male_shuanglangshaonian_tob',
lang: 'cl',
},
{
name: 'saturn_zh_male_tiancaitongzhuo_tob',
voice_id: 'saturn_zh_male_tiancaitongzhuo_tob',
lang: 'cl',
},
{
name: 'zh_male_taocheng_uranus_bigtts',
voice_id: 'zh_male_taocheng_uranus_bigtts',
lang: 'cl',
},
];
settings;
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
customVoices: [],
resource_id: '',
speed: 0,
provider_endpoint: 'https://openspeech.bytedance.com/api/v3/tts/unidirectional',
};
processText(text) {
return text.split('...').join('');
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (![SECRET_KEYS.VOLCENGINE_APP_ID, SECRET_KEYS.VOLCENGINE_ACCESS_KEY].includes(key)) return;
$('#volcengine-tts-app-id').toggleClass('success', !!secret_state[SECRET_KEYS.VOLCENGINE_APP_ID]);
$('#volcengine-tts-access-key').toggleClass('success', !!secret_state[SECRET_KEYS.VOLCENGINE_ACCESS_KEY]);
await this.onRefreshClick();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
async previewTtsVoice(voice) {
const text = 'Hello! Nice to meet you!';
const audio = await this.generateTts(text, voice);
const audioElement = new Audio(URL.createObjectURL(await audio.blob()));
audioElement.play().catch(e => console.error('Error playing audio:', e));
}
async fetchTtsVoiceObjects() {
return this.getAllVoices();
}
get settingsHtml() {
let html = `
<div>Volcengine (Doubao) TTS Configuration.</div>
<small>Hint: Volcengine (Doubao) TTS configuration items.</small>
<small>Please refer to the <a href="https://www.volcengine.com/docs/6561/1598757" target="_blank">documentation</a> to obtain the configuration items.</small>
<div class="flex-container alignItemsCenter">
<div id="volcengine-tts-app-id" class="menu_button menu_button_icon manage-api-keys" data-key="volcengine_app_id">
<i class="fa-solid fa-key"></i>
<span>App ID</span>
</div>
<div id="volcengine-tts-access-key" class="menu_button menu_button_icon manage-api-keys" data-key="volcengine_access_key">
<i class="fa-solid fa-key"></i>
<span>Access Key</span>
</div>
</div>
<div>
<label for="volcengine-tts-resource-id">Resource ID:</label>
<input type="text" class="text_pole" id="volcengine-tts-resource-id">
</div>
<label for="volcengine-tts-voice">Custom Voice (Speaker):</label>
<div class="tts_custom_voices">
<select id="volcengine-tts-voice-select">
</select>
<i title="Add" id="volcengine-tts-add-voice" class="tts-button fa-solid fa-plus fa-xl success" role="button"></i>
<i title="Delete" id="volcengine-tts-delete-voice" class="tts-button fa-solid fa-xmark fa-xl failure" tabindex="0" role="button"></i>
</div>
<div>
<label for="volcengine-tts-speed">Speed:</label>
<div class="flex-container">
<div class="range-block-range">
<input type="range" id="volcengine-tts-speed" min="-50" max="100" step="1">
</div>
<div class="range-block-counter">
<input type="number" min="-50" max="100" step="1" data-for="volcengine-tts-speed" id="volcengine-tts-speed_counter">
</div>
</div>
</div>
<div>
<label for="volcengine-tts-provider-endpoint">Provider Endpoint:</label>
<input type="text" class="text_pole" id="volcengine-tts-provider-endpoint">
</div>
`;
return html;
}
async getVoice(voiceName) {
const allVoices = this.getAllVoices();
return allVoices.find(voice => voice.name == voiceName);
}
getAllVoices() {
const voices = [...VolcengineTtsProvider.voices];
for (const customVoice of this.settings.customVoices) {
voices.push({
name: customVoice,
voice_id: customVoice,
lang: 'cl',
});
}
return voices;
}
populateVoices() {
const voiceSelect = $('#volcengine-tts-voice-select');
voiceSelect.empty();
for (const customVoice of this.settings.customVoices) {
const option = document.createElement('option');
option.value = customVoice;
option.textContent = customVoice;
voiceSelect.append(option);
}
}
async onRefreshClick() {
return await this.checkReady();
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.resource_id = $('#volcengine-tts-resource-id').val();
this.settings.speed = $('#volcengine-tts-speed').val();
this.settings.provider_endpoint = $('#volcengine-tts-provider-endpoint').val();
saveTtsProviderSettings();
this.changeTTSSettings();
}
async changeTTSSettings() {
const speed = this.settings.speed;
$('#volcengine-tts-speed').val(speed);
$('#volcengine-tts-speed_counter').val(speed);
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = { ...this.defaultSettings };
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
// Set initial values from the settings
$('#volcengine-tts-resource-id').val(this.settings.resource_id).on('change', this.onSettingsChange.bind(this));
$('#volcengine-tts-add-voice').on('click', this.createNewVoice.bind(this));
$('#volcengine-tts-delete-voice').on('click', this.deleteSelectedVoice.bind(this));
// Ensure custom configuration arrays exist
if (!this.settings.customVoices) this.settings.customVoices = [];
this.populateVoices();
// Speed control - range and number inputs
const speedInput = $('#volcengine-tts-speed');
const speedCounter = $('#volcengine-tts-speed_counter');
speedInput.val(this.settings.speed).on('input change', (e) => {
const value = $(e.target).val();
speedCounter.val(value);
this.settings.speed = value;
saveTtsProviderSettings();
this.changeTTSSettings();
});
speedCounter.val(this.settings.speed).on('input change', (e) => {
const value = $(e.target).val();
speedInput.val(value);
this.settings.speed = value;
saveTtsProviderSettings();
this.changeTTSSettings();
});
$('#volcengine-tts-provider-endpoint').val(this.settings.provider_endpoint).on('change', this.onSettingsChange.bind(this));
// Initialize secret keys UI
$('#volcengine-tts-app-id').toggleClass('success', !!secret_state[SECRET_KEYS.VOLCENGINE_APP_ID]);
$('#volcengine-tts-access-key').toggleClass('success', !!secret_state[SECRET_KEYS.VOLCENGINE_ACCESS_KEY]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
await this.checkReady();
console.info('Volcengine TTS: Settings loaded');
}
async createNewVoice() {
const name = await Popup.show.input('Voice name: ', null);
if (!name) {
return;
}
if (this.settings.customVoices.includes(name)) {
toastr.error('Voice name should be unique.');
return;
}
this.settings.customVoices.push(name);
this.populateVoices();
initVoiceMap();
saveTtsProviderSettings();
}
async deleteSelectedVoice() {
const selectedVoiceName = $('#volcengine-tts-voice-select').val();
if (!selectedVoiceName) {
toastr.error('Please select a voice first.');
return;
}
const confirm = await Popup.show.confirm(`Are you sure you want to delete the selected voice ${selectedVoiceName}?`);
if (!confirm) {
return;
}
const voiceIndex = this.settings.customVoices.indexOf(selectedVoiceName);
if (voiceIndex !== -1) {
this.settings.customVoices.splice(voiceIndex, 1);
}
this.populateVoices();
initVoiceMap();
saveTtsProviderSettings();
}
async checkReady() {
await Promise.allSettled([this.changeTTSSettings()]);
}
async generateTts(text, speaker) {
const response = await this.fetchTtsGeneration(text, speaker);
return response;
}
async fetchTtsGeneration(text, voice_speaker) {
console.info(`Generating new TTS for voice_id ${voice_speaker}`);
const response = await fetch('/api/volcengine/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
'provider_endpoint': this.settings.provider_endpoint,
'resource_id': this.settings.resource_id,
'text': text,
'voice_speaker': voice_speaker,
'speed': this.settings.speed,
}),
});
if (!response.ok) {
const errorText = await response.text();
console.error(`HTTP ${response.status}: ${errorText}`);
toastr.error(errorText);
throw new Error(`HTTP ${response.status}: ${errorText}`);
}
return response;
}
}