Add caching system prompt feature for OpenRouter Gemini (#4903)
* feat: add caching system prompt for OpenRouter Gemini * fix: resolve reviews
This commit is contained in:
@@ -287,6 +287,12 @@ claude:
|
||||
gemini:
|
||||
# API endpoint version ("v1beta" or "v1alpha")
|
||||
apiVersion: 'v1beta'
|
||||
# Enables caching of the system prompt (if supported). Only for OpenRouter.
|
||||
# -- IMPORTANT! --
|
||||
# Use only when the prompt before the chat history is static and doesn't change between requests
|
||||
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
||||
# Otherwise, you'll just waste money on cache misses.
|
||||
enableSystemPromptCache: false
|
||||
# https://ai.google.dev/gemini-api/docs/imagen#imagen-configuration
|
||||
image:
|
||||
# Leave empty to use the API-default value.
|
||||
|
||||
@@ -45,7 +45,7 @@ import {
|
||||
addAssistantPrefix,
|
||||
embedOpenRouterMedia,
|
||||
addReasoningContentToToolCalls,
|
||||
cachingSystemPromptForOpenRouterClaude,
|
||||
cachingSystemPromptForOpenRouter,
|
||||
addOpenRouterSignatures,
|
||||
} from '../../prompt-converters.js';
|
||||
|
||||
@@ -84,6 +84,58 @@ const API_COMETAPI = 'https://api.cometapi.com/v1';
|
||||
const API_ZAI_COMMON = 'https://api.z.ai/api/paas/v4';
|
||||
const API_ZAI_CODING = 'https://api.z.ai/api/coding/paas/v4';
|
||||
const API_SILICONFLOW = 'https://api.siliconflow.com/v1';
|
||||
const API_OPENROUTER = 'https://openrouter.ai/api/v1';
|
||||
|
||||
/**
|
||||
* Cache for cacheable (writing) OpenRouter model IDs.
|
||||
* @type {string[]}
|
||||
*/
|
||||
const openRouterCacheableModels = [];
|
||||
|
||||
/**
|
||||
* Checks if an OpenRouter model supports prompt cache writing.
|
||||
* Uses a cache to avoid repeated API calls.
|
||||
* @param {string} modelId - The OpenRouter model ID
|
||||
* @returns {Promise<boolean>} `true` if the model supports writing cache
|
||||
*/
|
||||
async function isOpenRouterModelCacheable(modelId) {
|
||||
if (openRouterCacheableModels.includes(modelId)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_OPENROUTER}/models`, {
|
||||
method: 'GET',
|
||||
headers: { 'Accept': 'application/json' },
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(`OpenRouter models API returned ${response.status}: ${response.statusText}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @type {any} */
|
||||
const data = await response.json();
|
||||
|
||||
if (!Array.isArray(data?.data)) {
|
||||
console.warn('OpenRouter API response format unexpected');
|
||||
return false;
|
||||
}
|
||||
|
||||
const model = data.data.find(m => m.id === modelId);
|
||||
const supportsCache = model?.pricing?.input_cache_write != null;
|
||||
|
||||
if (supportsCache) {
|
||||
openRouterCacheableModels.push(modelId);
|
||||
}
|
||||
|
||||
return supportsCache;
|
||||
} catch (error) {
|
||||
console.warn(`Failed to check OpenRouter cache support for ${modelId}:`, error.message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets OpenRouter transforms based on the request.
|
||||
@@ -2051,22 +2103,30 @@ router.post('/generate', async function (request, response) {
|
||||
const cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
|
||||
const isClaude3or4 = /anthropic\/claude-(3|opus-4|sonnet-4|haiku-4)/.test(request.body.model);
|
||||
const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
|
||||
|
||||
const isGemini = /google\/gemini/.test(request.body.model);
|
||||
const isCacheableGemini = isGemini && await isOpenRouterModelCacheable(request.body.model);
|
||||
const enableGeminiSystemPromptCache = getConfigValue('gemini.enableSystemPromptCache', false, 'boolean');
|
||||
|
||||
if (Array.isArray(request.body.messages)) {
|
||||
embedOpenRouterMedia(request.body.messages);
|
||||
addOpenRouterSignatures(request.body.messages, request.body.model);
|
||||
|
||||
if (isClaude3or4) {
|
||||
if (enableSystemPromptCache) {
|
||||
cachingSystemPromptForOpenRouterClaude(request.body.messages, cacheTTL);
|
||||
cachingSystemPromptForOpenRouter(request.body.messages, cacheTTL);
|
||||
}
|
||||
|
||||
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0) {
|
||||
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth, cacheTTL);
|
||||
}
|
||||
}
|
||||
|
||||
if (isCacheableGemini && enableGeminiSystemPromptCache) {
|
||||
cachingSystemPromptForOpenRouter(request.body.messages);
|
||||
}
|
||||
}
|
||||
|
||||
const isGemini = /google\/gemini/.test(request.body.model);
|
||||
if (isGemini) {
|
||||
bodyParams['safety_settings'] = GEMINI_SAFETY;
|
||||
}
|
||||
|
||||
@@ -1057,12 +1057,12 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth, ttl)
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds cache_control to the system prompt for OpenRouter Claude requests.
|
||||
* Adds cache_control to the system prompt for OpenRouter requests.
|
||||
*
|
||||
* @param {object[]} messages Array of messages
|
||||
* @param {string} ttl TTL value
|
||||
* @param {string} [ttl] TTL value (optional)
|
||||
*/
|
||||
export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
|
||||
export function cachingSystemPromptForOpenRouter(messages, ttl = undefined) {
|
||||
if (!Array.isArray(messages) || messages.length === 0) {
|
||||
return;
|
||||
}
|
||||
@@ -1078,6 +1078,10 @@ export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
|
||||
return;
|
||||
}
|
||||
|
||||
const cacheControl = ttl
|
||||
? { type: 'ephemeral', ttl }
|
||||
: { type: 'ephemeral' };
|
||||
|
||||
if (Array.isArray(systemMessage.content)) {
|
||||
const hasExistingCacheControl = systemMessage.content.some(part => part?.cache_control);
|
||||
if (hasExistingCacheControl) {
|
||||
@@ -1086,7 +1090,7 @@ export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
|
||||
|
||||
for (let i = systemMessage.content.length - 1; i >= 0; i--) {
|
||||
if (systemMessage.content[i]?.type === 'text') {
|
||||
systemMessage.content[i].cache_control = { type: 'ephemeral', ttl };
|
||||
systemMessage.content[i].cache_control = cacheControl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1095,7 +1099,7 @@ export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
|
||||
{
|
||||
type: 'text',
|
||||
text: systemMessage.content,
|
||||
cache_control: { type: 'ephemeral', ttl },
|
||||
cache_control: cacheControl,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user