Google: Add Imagen image generation
This commit is contained in:
@@ -58,6 +58,7 @@ import { commonEnumProviders } from '../../slash-commands/SlashCommandCommonEnum
|
||||
import { ToolManager } from '../../tool-calling.js';
|
||||
import { MacrosParser } from '../../macros.js';
|
||||
import { t } from '../../i18n.js';
|
||||
import { oai_settings } from '../../openai.js';
|
||||
|
||||
export { MODULE_NAME };
|
||||
|
||||
@@ -85,6 +86,7 @@ const sources = {
|
||||
bfl: 'bfl',
|
||||
falai: 'falai',
|
||||
xai: 'xai',
|
||||
google: 'google',
|
||||
};
|
||||
|
||||
const initiators = {
|
||||
@@ -330,6 +332,10 @@ const defaultSettings = {
|
||||
|
||||
// BFL API settings
|
||||
bfl_upsampling: false,
|
||||
|
||||
// Google settings
|
||||
google_api: 'makersuite',
|
||||
google_enhance: true,
|
||||
};
|
||||
|
||||
const writePromptFieldsDebounced = debounce(writePromptFields, debounce_timeout.relaxed);
|
||||
@@ -509,6 +515,8 @@ async function loadSettings() {
|
||||
$('#sd_huggingface_model_id').val(extension_settings.sd.huggingface_model_id);
|
||||
$('#sd_function_tool').prop('checked', extension_settings.sd.function_tool);
|
||||
$('#sd_bfl_upsampling').prop('checked', extension_settings.sd.bfl_upsampling);
|
||||
$('#sd_google_api').val(extension_settings.sd.google_api);
|
||||
$('#sd_google_enhance').prop('checked', extension_settings.sd.google_enhance);
|
||||
|
||||
for (const style of extension_settings.sd.styles) {
|
||||
const option = document.createElement('option');
|
||||
@@ -1277,6 +1285,7 @@ async function onModelChange() {
|
||||
sources.bfl,
|
||||
sources.falai,
|
||||
sources.xai,
|
||||
sources.google,
|
||||
];
|
||||
|
||||
if (cloudSources.includes(extension_settings.sd.source)) {
|
||||
@@ -1498,6 +1507,9 @@ async function loadSamplers() {
|
||||
case sources.xai:
|
||||
samplers = ['N/A'];
|
||||
break;
|
||||
case sources.google:
|
||||
samplers = ['N/A'];
|
||||
break;
|
||||
}
|
||||
|
||||
for (const sampler of samplers) {
|
||||
@@ -1694,6 +1706,9 @@ async function loadModels() {
|
||||
case sources.xai:
|
||||
models = await loadXAIModels();
|
||||
break;
|
||||
case sources.google:
|
||||
models = await loadGoogleModels();
|
||||
break;
|
||||
}
|
||||
|
||||
for (const model of models) {
|
||||
@@ -2023,6 +2038,21 @@ async function loadNovelModels() {
|
||||
];
|
||||
}
|
||||
|
||||
async function loadGoogleModels() {
|
||||
return [
|
||||
'imagen-4.0-generate-preview-06-06',
|
||||
'imagen-4.0-fast-generate-preview-06-06',
|
||||
'imagen-4.0-ultra-generate-preview-06-06',
|
||||
'imagen-3.0-generate-002',
|
||||
'imagen-3.0-generate-001',
|
||||
'imagen-3.0-fast-generate-001',
|
||||
'imagen-3.0-capability-001',
|
||||
'imagegeneration@006',
|
||||
'imagegeneration@005',
|
||||
'imagegeneration@002',
|
||||
].map(name => ({ value: name, text: name }));
|
||||
}
|
||||
|
||||
function loadNovelSchedulers() {
|
||||
return ['karras', 'native', 'exponential', 'polyexponential'];
|
||||
}
|
||||
@@ -2105,6 +2135,9 @@ async function loadSchedulers() {
|
||||
case sources.xai:
|
||||
schedulers = ['N/A'];
|
||||
break;
|
||||
case sources.google:
|
||||
schedulers = ['N/A'];
|
||||
break;
|
||||
}
|
||||
|
||||
for (const scheduler of schedulers) {
|
||||
@@ -2199,6 +2232,9 @@ async function loadVaes() {
|
||||
case sources.xai:
|
||||
vaes = ['N/A'];
|
||||
break;
|
||||
case sources.google:
|
||||
vaes = ['N/A'];
|
||||
break;
|
||||
}
|
||||
|
||||
for (const vae of vaes) {
|
||||
@@ -2779,6 +2815,9 @@ async function sendGenerationRequest(generationType, prompt, additionalNegativeP
|
||||
case sources.xai:
|
||||
result = await generateXAIImage(prefixedPrompt, negativePrompt, signal);
|
||||
break;
|
||||
case sources.google:
|
||||
result = await generateGoogleImage(prefixedPrompt, negativePrompt, signal);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!result.data) {
|
||||
@@ -2916,20 +2955,39 @@ async function generateExtrasImage(prompt, negativePrompt, signal) {
|
||||
* Gets an aspect ratio for Stability that is the closest to the given width and height.
|
||||
* @param {number} width Target width
|
||||
* @param {number} height Target height
|
||||
* @param {'google'|'stability'} source Source of the request, used to determine aspect ratio
|
||||
* @returns {string} Closest aspect ratio as a string
|
||||
*/
|
||||
function getClosestAspectRatio(width, height) {
|
||||
const aspectRatios = {
|
||||
'16:9': 16 / 9,
|
||||
'1:1': 1,
|
||||
'21:9': 21 / 9,
|
||||
'2:3': 2 / 3,
|
||||
'3:2': 3 / 2,
|
||||
'4:5': 4 / 5,
|
||||
'5:4': 5 / 4,
|
||||
'9:16': 9 / 16,
|
||||
'9:21': 9 / 21,
|
||||
};
|
||||
function getClosestAspectRatio(width, height, source) {
|
||||
function getAspectRatios() {
|
||||
switch (source) {
|
||||
case 'stability':
|
||||
return {
|
||||
'16:9': 16 / 9,
|
||||
'1:1': 1,
|
||||
'21:9': 21 / 9,
|
||||
'2:3': 2 / 3,
|
||||
'3:2': 3 / 2,
|
||||
'4:5': 4 / 5,
|
||||
'5:4': 5 / 4,
|
||||
'9:16': 9 / 16,
|
||||
'9:21': 9 / 21,
|
||||
};
|
||||
case 'google':
|
||||
return {
|
||||
'1:1': 1,
|
||||
'16:9': 16 / 9,
|
||||
'9:16': 9 / 16,
|
||||
'4:3': 4 / 3,
|
||||
'3:4': 3 / 4,
|
||||
};
|
||||
default:
|
||||
console.warn(`Unknown source "${source}" for aspect ratio calculation.`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const aspectRatios = getAspectRatios() || { '1:1': 1 };
|
||||
|
||||
const aspectRatio = width / height;
|
||||
|
||||
@@ -2968,7 +3026,7 @@ async function generateStabilityImage(prompt, negativePrompt, signal) {
|
||||
payload: {
|
||||
prompt: prompt.slice(0, PROMPT_LIMIT),
|
||||
negative_prompt: negativePrompt.slice(0, PROMPT_LIMIT),
|
||||
aspect_ratio: getClosestAspectRatio(extension_settings.sd.width, extension_settings.sd.height),
|
||||
aspect_ratio: getClosestAspectRatio(extension_settings.sd.width, extension_settings.sd.height, 'stability'),
|
||||
seed: extension_settings.sd.seed >= 0 ? extension_settings.sd.seed : undefined,
|
||||
style_preset: extension_settings.sd.stability_style_preset,
|
||||
output_format: IMAGE_FORMAT,
|
||||
@@ -3619,7 +3677,41 @@ async function generateFalaiImage(prompt, negativePrompt, signal) {
|
||||
return { format: 'jpg', data: data.image };
|
||||
} else {
|
||||
const text = await result.text();
|
||||
console.log(text);
|
||||
throw new Error(text);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates an image using the Google Vertex AI API.
|
||||
* @param {string} prompt The main instruction used to guide the image generation.
|
||||
* @param {string} negativePrompt The instruction used to restrict the image generation.
|
||||
* @param {AbortSignal} signal An AbortSignal object that can be used to cancel the request.
|
||||
* @returns {Promise<{format: string, data: string}>} A promise that resolves when the image generation and processing are complete.
|
||||
*/
|
||||
async function generateGoogleImage(prompt, negativePrompt, signal) {
|
||||
const result = await fetch('/api/google/generate-image', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
signal: signal,
|
||||
body: JSON.stringify({
|
||||
prompt: prompt,
|
||||
aspect_ratio: getClosestAspectRatio(extension_settings.sd.width, extension_settings.sd.height, 'google'),
|
||||
negative_prompt: negativePrompt,
|
||||
model: extension_settings.sd.model,
|
||||
enhance: extension_settings.sd.google_enhance,
|
||||
api: extension_settings.sd.google_api || 'makersuite',
|
||||
seed: extension_settings.sd.seed >= 0 ? extension_settings.sd.seed : undefined,
|
||||
vertexai_auth_mode: oai_settings.vertexai_auth_mode,
|
||||
vertexai_region: oai_settings.vertexai_region,
|
||||
vertexai_express_project_id: oai_settings.vertexai_express_project_id,
|
||||
}),
|
||||
});
|
||||
|
||||
if (result.ok) {
|
||||
const data = await result.json();
|
||||
return { format: 'jpg', data: data.image };
|
||||
} else {
|
||||
const text = await result.text();
|
||||
throw new Error(text);
|
||||
}
|
||||
}
|
||||
@@ -3913,6 +4005,8 @@ function isValidState() {
|
||||
return secret_state[SECRET_KEYS.FALAI];
|
||||
case sources.xai:
|
||||
return secret_state[SECRET_KEYS.XAI];
|
||||
case sources.google:
|
||||
return secret_state[SECRET_KEYS.MAKERSUITE] || secret_state[SECRET_KEYS.VERTEXAI] || secret_state[SECRET_KEYS.VERTEXAI_SERVICE_ACCOUNT];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4573,6 +4667,15 @@ jQuery(async () => {
|
||||
$('#sd_function_tool').on('input', onFunctionToolInput);
|
||||
$('#sd_bfl_upsampling').on('input', onBflUpsamplingInput);
|
||||
|
||||
$('#sd_google_api').on('input', function () {
|
||||
extension_settings.sd.google_api = String($(this).val());
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
$('#sd_google_enhance').on('input', function () {
|
||||
extension_settings.sd.google_enhance = $(this).prop('checked');
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
if (!CSS.supports('field-sizing', 'content')) {
|
||||
$('.sd_settings .inline-drawer-toggle').on('click', function () {
|
||||
initScrollHeight($('#sd_prompt_prefix'));
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
<option value="drawthings">DrawThings HTTP API</option>
|
||||
<option value="extras">Extras API (deprecated)</option>
|
||||
<option value="falai">FAL.AI</option>
|
||||
<option value="google">Google AI</option>
|
||||
<option value="huggingface">HuggingFace Inference API (serverless)</option>
|
||||
<option value="nanogpt">NanoGPT</option>
|
||||
<option value="novel">NovelAI Diffusion</option>
|
||||
@@ -281,6 +282,26 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div data-sd-source="google">
|
||||
<div class="flex-container">
|
||||
<div class="flex1">
|
||||
<label for="sd_google_api" data-i18n="API Type">API Type</label>
|
||||
<select id="sd_google_api" class="text_pole">
|
||||
<option value="makersuite">Google AI Studio</option>
|
||||
<option value="vertexai">Google Vertex AI</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex-container">
|
||||
<label class="flex1 checkbox_label" for="sd_google_enhance" title="Enables prompt enhancing (passes prompts through an LLM to add detail).">
|
||||
<input id="sd_google_enhance" type="checkbox" />
|
||||
<span data-i18n="Enhance">
|
||||
Enhance
|
||||
</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex-container">
|
||||
<div class="flex1">
|
||||
<label for="sd_model" data-i18n="Model">Model</label>
|
||||
|
||||
@@ -418,3 +418,68 @@ router.post('/generate-native-tts', async (request, response) => {
|
||||
return response.end();
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/generate-image', async (request, response) => {
|
||||
try {
|
||||
const model = request.body.model || 'imagen-3.0-generate-002';
|
||||
const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'predict');
|
||||
|
||||
// block_none for safetySetting is currently not supported.
|
||||
// AI Studio is stricter than Vertex AI.
|
||||
const safetySetting = request.body.api === 'vertexai'
|
||||
? 'block_only_high'
|
||||
: 'block_low_and_above';
|
||||
|
||||
const requestBody = {
|
||||
instances: [{
|
||||
prompt: request.body.prompt || '',
|
||||
}],
|
||||
parameters: {
|
||||
sampleCount: 1,
|
||||
seed: Number(request.body.seed ?? Math.floor(Math.random() * 1000000)),
|
||||
enhancePrompt: Boolean(request.body.enhance ?? false),
|
||||
negativePrompt: request.body.negative_prompt || undefined,
|
||||
aspectRatio: String(request.body.aspect_ratio || '1:1'),
|
||||
personGeneration: 'allow_all',
|
||||
language: 'auto',
|
||||
safetySetting: safetySetting,
|
||||
addWatermark: false,
|
||||
outputOptions: {
|
||||
mimeType: 'image/jpeg',
|
||||
compressionQuality: 100,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
console.debug(`${apiName} image generation request:`, model, requestBody);
|
||||
|
||||
const result = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: headers,
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const errorText = await result.text();
|
||||
console.warn(`${apiName} image generation error: ${result.status} ${result.statusText}`, errorText);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
|
||||
/** @type {any} */
|
||||
const data = await result.json();
|
||||
const imagePart = data?.predictions?.[0]?.bytesBase64Encoded;
|
||||
|
||||
if (!imagePart) {
|
||||
console.warn(`${apiName} image generation error: No image data found in response`);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
|
||||
return response.send({ image: imagePart });
|
||||
} catch (error) {
|
||||
console.error('Google Image generation failed:', error);
|
||||
if (!response.headersSent) {
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
return response.end();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1327,17 +1327,6 @@ xai.post('/generate', async (request, response) => {
|
||||
}
|
||||
});
|
||||
|
||||
router.use('/comfy', comfy);
|
||||
router.use('/together', together);
|
||||
router.use('/drawthings', drawthings);
|
||||
router.use('/pollinations', pollinations);
|
||||
router.use('/stability', stability);
|
||||
router.use('/huggingface', huggingface);
|
||||
router.use('/nanogpt', nanogpt);
|
||||
router.use('/bfl', bfl);
|
||||
router.use('/falai', falai);
|
||||
router.use('/xai', xai);
|
||||
|
||||
const aimlapi = express.Router();
|
||||
|
||||
aimlapi.post('/models', async (request, response) => {
|
||||
@@ -1422,4 +1411,14 @@ aimlapi.post('/generate-image', async (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
router.use('/comfy', comfy);
|
||||
router.use('/together', together);
|
||||
router.use('/drawthings', drawthings);
|
||||
router.use('/pollinations', pollinations);
|
||||
router.use('/stability', stability);
|
||||
router.use('/huggingface', huggingface);
|
||||
router.use('/nanogpt', nanogpt);
|
||||
router.use('/bfl', bfl);
|
||||
router.use('/falai', falai);
|
||||
router.use('/xai', xai);
|
||||
router.use('/aimlapi', aimlapi);
|
||||
|
||||
Reference in New Issue
Block a user