Z.AI: Video inlining and 'coding' captions

Closes #4899
This commit is contained in:
Cohee
2025-12-17 23:31:14 +02:00
parent 995fa10fa4
commit c92939e56c
6 changed files with 30 additions and 9 deletions
+1 -1
View File
@@ -1998,7 +1998,7 @@
<i class="icon-supported fa-solid fa-image" title="Supported by the current model" data-i18n="[title]Supported by the current model"></i>
<i class="icon-unsupported fa-solid fa-image" title="Unsupported by the current model" data-i18n="[title]Unsupported by the current model"></i>
</div>
<div id="openai_video_inlining_supported" data-source="makersuite,vertexai,openrouter">
<div id="openai_video_inlining_supported" data-source="makersuite,vertexai,openrouter,zai">
<i class="icon-supported fa-solid fa-film" title="Supported by the current model" data-i18n="[title]Supported by the current model"></i>
<i class="icon-unsupported fa-solid fa-film" title="Unsupported by the current model" data-i18n="[title]Unsupported by the current model"></i>
</div>
+1 -1
View File
@@ -455,7 +455,7 @@ function isVideoCaptioningAvailable() {
return false;
}
return ['google', 'vertexai'].includes(extension_settings.caption.multimodal_api);
return ['google', 'vertexai', 'zai'].includes(extension_settings.caption.multimodal_api);
}
jQuery(async function () {
@@ -177,9 +177,6 @@
<option data-type="custom" value="custom_current" data-i18n="currently_selected">[Currently selected]</option>
</select>
</div>
<div data-type="zai">
<b>Will use Common API. Coding API is not supported!</b>
</div>
<div data-type="ollama">
<div>
The model must be downloaded first! Do it with the <code>ollama pull</code> command or <a href="#" id="caption_ollama_pull">click here</a>.
+5 -1
View File
@@ -1,7 +1,7 @@
import { CONNECT_API_MAP, getRequestHeaders } from '../../script.js';
import { extension_settings, openThirdPartyExtensionMenu } from '../extensions.js';
import { t } from '../i18n.js';
import { oai_settings, proxies } from '../openai.js';
import { oai_settings, proxies, ZAI_ENDPOINT } from '../openai.js';
import { SECRET_KEYS, secret_state } from '../secrets.js';
import { textgen_types, textgenerationwebui_settings } from '../textgen-settings.js';
import { getTokenCountAsync } from '../tokenizers.js';
@@ -115,6 +115,10 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.custom_exclude_body = oai_settings.custom_exclude_body;
}
if (extension_settings.caption.multimodal_api === 'zai') {
requestBody.zai_endpoint = oai_settings.zai_endpoint || ZAI_ENDPOINT.COMMON;
}
function getEndpointUrl() {
switch (extension_settings.caption.multimodal_api) {
case 'google':
+6 -1
View File
@@ -5870,12 +5870,15 @@ export function isVideoInliningSupported() {
return false;
}
// Only Gemini models support video for now
const videoSupportedModels = [
// Gemini
'gemini-2.0',
'gemini-2.5',
'gemini-exp-1206',
'gemini-3',
// Z.AI (GLM)
'glm-4.5v',
'glm-4.6v',
];
switch (oai_settings.chat_completion_source) {
@@ -5885,6 +5888,8 @@ export function isVideoInliningSupported() {
return videoSupportedModels.some(model => oai_settings.vertexai_model.includes(model));
case chat_completion_sources.OPENROUTER:
return (Array.isArray(model_list) && model_list.find(m => m.id === oai_settings.openrouter_model)?.architecture?.input_modalities?.includes('video'));
case chat_completion_sources.ZAI:
return videoSupportedModels.some(model => oai_settings.zai_model.includes(model));
default:
return false;
}
+17 -2
View File
@@ -8,7 +8,7 @@ import express from 'express';
import { getConfigValue, mergeObjectWithYaml, excludeKeysByYaml, trimV1, delay } from '../util.js';
import { setAdditionalHeaders } from '../additional-headers.js';
import { readSecret, SECRET_KEYS } from './secrets.js';
import { AIMLAPI_HEADERS, OPENROUTER_HEADERS } from '../constants.js';
import { AIMLAPI_HEADERS, OPENROUTER_HEADERS, ZAI_ENDPOINT } from '../constants.js';
export const router = express.Router();
@@ -191,7 +191,22 @@ router.post('/caption-image', async (request, response) => {
}
if (request.body.api === 'zai') {
apiUrl = 'https://api.z.ai/api/paas/v4/chat/completions';
apiUrl = request.body.zai_endpoint === ZAI_ENDPOINT.CODING
? 'https://api.z.ai/api/coding/paas/v4/chat/completions'
: 'https://api.z.ai/api/paas/v4/chat/completions';
// Handle video inlining for Z.AI
if (/data:video\/\w+;base64,/.test(request.body.image)) {
const message = body.messages.find(msg => Array.isArray(msg.content));
if (message) {
const imgContent = message.content.find(c => c.type === 'image_url');
if (imgContent) {
imgContent.type = 'video_url';
imgContent.video_url = imgContent.image_url;
delete imgContent.image_url;
}
}
}
}
if (['koboldcpp', 'vllm', 'llamacpp', 'ooba'].includes(request.body.api)) {