Fix MiniMax (TTS) pitch on API calls being the wrong range (#4578)

* fix: change MiniMax TTS pitch from float range 0.5-2.0 to integer range -12 to 12

Fixes #4570

* refactor: replace hardcoded defaults with defaultSettings in MiniMaxTtsProvider config

* refactor: restructure TTS settings with min/max ranges and add value clamping, adjust volume range

* fix: flatten MiniMaxTtsProvider settings to use default values from complex field definitions

* Adjust block spacing

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
Wolfsblvt
2025-10-02 20:03:22 +02:00
committed by GitHub
parent 9415185a89
commit d2f3a85742
2 changed files with 49 additions and 16 deletions
@@ -1,3 +1,8 @@
.minimax_tts_settings>.tts_block {
gap: 5px;
margin: 5px 0;
}
.minimax-custom-item {
display: flex;
justify-content: space-between;
+44 -16
View File
@@ -19,9 +19,9 @@ class MiniMaxTtsProvider {
apiHost: 'https://api.minimax.io',
model: 'speech-02-hd',
voiceMap: {},
speed: 1.0,
volume: 1.0,
pitch: 1.0,
speed: { default: 1.0, min: 0.5, max: 2.0, step: 0.1 },
volume: { default: 1.0, min: 0.0, max: 10.0, step: 0.1 },
pitch: { default: 0, min: -12, max: 12, step: 1 },
audioSampleRate: 32000,
bitrate: 128000,
format: 'mp3',
@@ -84,15 +84,15 @@ class MiniMaxTtsProvider {
<div class="tts_block">
<label for="minimax_tts_speed">Speed: <span id="minimax_tts_speed_output"></span></label>
<input id="minimax_tts_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2.0" step="0.1" />
<input id="minimax_tts_speed" type="range" value="${this.defaultSettings.speed.default}" min="${this.defaultSettings.speed.min}" max="${this.defaultSettings.speed.max}" step="${this.defaultSettings.speed.step}" />
</div>
<div class="tts_block">
<label for="minimax_tts_volume">Volume: <span id="minimax_tts_volume_output"></span></label>
<input id="minimax_tts_volume" type="range" value="${this.defaultSettings.volume}" min="0.1" max="2.0" step="0.1" />
<input id="minimax_tts_volume" type="range" value="${this.defaultSettings.volume.default}" min="${this.defaultSettings.volume.min}" max="${this.defaultSettings.volume.max}" step="${this.defaultSettings.volume.step}" />
</div>
<div class="tts_block">
<label for="minimax_tts_pitch">Pitch: <span id="minimax_tts_pitch_output"></span></label>
<input id="minimax_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0.5" max="2.0" step="0.1" />
<input id="minimax_tts_pitch" type="range" value="${this.defaultSettings.pitch.default}" min="${this.defaultSettings.pitch.min}" max="${this.defaultSettings.pitch.max}" step="${this.defaultSettings.pitch.step}" />
</div>
<div class="tts_block">
<label for="minimax_tts_format">Audio Format</label>
@@ -190,14 +190,14 @@ class MiniMaxTtsProvider {
this.settings.apiHost = $('#minimax_tts_api_host').val();
this.settings.speed = parseFloat($('#minimax_tts_speed').val().toString());
this.settings.volume = parseFloat($('#minimax_tts_volume').val().toString());
this.settings.pitch = parseFloat($('#minimax_tts_pitch').val().toString());
this.settings.pitch = parseInt($('#minimax_tts_pitch').val().toString());
this.settings.model = $('#minimax_tts_model').find(':selected').val();
this.settings.format = $('#minimax_tts_format').find(':selected').val();
this.settings.customVoiceId = $('#minimax_tts_custom_voice_id').val();
$('#minimax_tts_speed_output').text(this.settings.speed.toFixed(1));
$('#minimax_tts_volume_output').text(this.settings.volume.toFixed(1));
$('#minimax_tts_pitch_output').text(this.settings.pitch.toFixed(1));
$('#minimax_tts_pitch_output').text(this.settings.pitch);
saveTtsProviderSettings();
}
@@ -458,6 +458,16 @@ class MiniMaxTtsProvider {
// Only accept keys defined in defaultSettings
this.settings = { ...this.defaultSettings };
// Flatten the settings fields with default/min/max definitions so the actual values are used
this.settings = Object.fromEntries(
Object.entries(this.defaultSettings).map(([key, value]) => {
if (value && typeof value === 'object' && 'default' in value) {
return [key, value.default];
}
return [key, value];
}),
);
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
@@ -470,6 +480,21 @@ class MiniMaxTtsProvider {
if (!this.settings.customModels) this.settings.customModels = [];
if (!this.settings.customVoices) this.settings.customVoices = [];
// # Migrate settings
// Pitch value changed from float to int. If it's a float, let's try to extrapolate it to the new range
if (!Number.isInteger(this.settings.pitch)) {
const oldPitch = parseFloat(this.settings.pitch);
if (!isNaN(oldPitch)) {
// map old [0.5..1.0] to [-12..0], and [1.0..2.0] to [0..12] (old default was 1.0, new default is 0)
const newPitch = (oldPitch < 1.0) ? (oldPitch - 1.0) * 24 : (oldPitch - 1.0) * 12;
this.settings.pitch = Math.max(-12, Math.min(12, Math.round(newPitch)));
console.info(`MiniMax TTS: Migrated pitch from ${oldPitch} to ${this.settings.pitch}`);
} else {
this.settings.pitch = 0;
console.info(`MiniMax TTS: Migration reset pitch to default ${this.settings.pitch}`);
}
}
$('#minimax_tts_api_host').val(this.settings.apiHost || 'https://api.minimax.io');
$('#minimax_tts_model').val(this.settings.model);
$('#minimax_tts_speed').val(this.settings.speed);
@@ -546,7 +571,7 @@ class MiniMaxTtsProvider {
$('#minimax_tts_speed_output').text(this.settings.speed.toFixed(1));
$('#minimax_tts_volume_output').text(this.settings.volume.toFixed(1));
$('#minimax_tts_pitch_output').text(this.settings.pitch.toFixed(1));
$('#minimax_tts_pitch_output').text(this.settings.pitch);
// Initialize custom configuration display
this.updateCustomModelsDisplay();
@@ -756,17 +781,20 @@ class MiniMaxTtsProvider {
throw error;
}
/** @param {number} number @param {number} lower @param {number} upper @returns {number} */
const clamp = (number, lower, upper) => Math.min(Math.max(number, lower), upper);
const requestBody = {
text: inputText,
voiceId: voiceId,
apiHost: this.settings.apiHost,
model: this.settings.model || 'speech-02-hd',
speed: Number(this.settings.speed) || 1.0,
volume: Number(this.settings.volume) || 1.0,
pitch: Number(this.settings.pitch) || 1.0,
audioSampleRate: Number(this.settings.audioSampleRate) || 32000,
bitrate: Number(this.settings.bitrate) || 128000,
format: this.settings.format || 'mp3',
model: this.settings.model || this.defaultSettings.model,
speed: clamp(Number(this.settings.speed) || this.defaultSettings.speed.default, this.defaultSettings.speed.min, this.defaultSettings.speed.max),
volume: clamp(Number(this.settings.volume) || this.defaultSettings.volume.default, this.defaultSettings.volume.min, this.defaultSettings.volume.max),
pitch: clamp(Math.round(Number(this.settings.pitch)) || this.defaultSettings.pitch.default, this.defaultSettings.pitch.min, this.defaultSettings.pitch.max),
audioSampleRate: Number(this.settings.audioSampleRate) || this.defaultSettings.audioSampleRate,
bitrate: Number(this.settings.bitrate) || this.defaultSettings.bitrate,
format: this.settings.format || this.defaultSettings.format,
language: language,
};