Add caching system prompt feature for OpenRouter Gemini (#4903)

* feat: add caching system prompt for OpenRouter Gemini

* fix: resolve reviews
This commit is contained in:
Chanho Chung
2025-12-21 02:01:42 +09:00
committed by GitHub
parent c75df956c5
commit ca43796795
3 changed files with 78 additions and 8 deletions
+6
View File
@@ -287,6 +287,12 @@ claude:
gemini:
# API endpoint version ("v1beta" or "v1alpha")
apiVersion: 'v1beta'
# Enables caching of the system prompt (if supported). Only for OpenRouter.
# -- IMPORTANT! --
# Use only when the prompt before the chat history is static and doesn't change between requests
# (e.g {{random}} macro or lorebooks not as in-chat injections).
# Otherwise, you'll just waste money on cache misses.
enableSystemPromptCache: false
# https://ai.google.dev/gemini-api/docs/imagen#imagen-configuration
image:
# Leave empty to use the API-default value.
+63 -3
View File
@@ -45,7 +45,7 @@ import {
addAssistantPrefix,
embedOpenRouterMedia,
addReasoningContentToToolCalls,
cachingSystemPromptForOpenRouterClaude,
cachingSystemPromptForOpenRouter,
addOpenRouterSignatures,
} from '../../prompt-converters.js';
@@ -84,6 +84,58 @@ const API_COMETAPI = 'https://api.cometapi.com/v1';
const API_ZAI_COMMON = 'https://api.z.ai/api/paas/v4';
const API_ZAI_CODING = 'https://api.z.ai/api/coding/paas/v4';
const API_SILICONFLOW = 'https://api.siliconflow.com/v1';
const API_OPENROUTER = 'https://openrouter.ai/api/v1';
/**
* Cache for cacheable (writing) OpenRouter model IDs.
* @type {string[]}
*/
const openRouterCacheableModels = [];
/**
* Checks if an OpenRouter model supports prompt cache writing.
* Uses a cache to avoid repeated API calls.
* @param {string} modelId - The OpenRouter model ID
* @returns {Promise<boolean>} `true` if the model supports writing cache
*/
async function isOpenRouterModelCacheable(modelId) {
if (openRouterCacheableModels.includes(modelId)) {
return true;
}
try {
const response = await fetch(`${API_OPENROUTER}/models`, {
method: 'GET',
headers: { 'Accept': 'application/json' },
signal: AbortSignal.timeout(5000),
});
if (!response.ok) {
console.warn(`OpenRouter models API returned ${response.status}: ${response.statusText}`);
return false;
}
/** @type {any} */
const data = await response.json();
if (!Array.isArray(data?.data)) {
console.warn('OpenRouter API response format unexpected');
return false;
}
const model = data.data.find(m => m.id === modelId);
const supportsCache = model?.pricing?.input_cache_write != null;
if (supportsCache) {
openRouterCacheableModels.push(modelId);
}
return supportsCache;
} catch (error) {
console.warn(`Failed to check OpenRouter cache support for ${modelId}:`, error.message);
return false;
}
}
/**
* Gets OpenRouter transforms based on the request.
@@ -2051,22 +2103,30 @@ router.post('/generate', async function (request, response) {
const cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
const isClaude3or4 = /anthropic\/claude-(3|opus-4|sonnet-4|haiku-4)/.test(request.body.model);
const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
const isGemini = /google\/gemini/.test(request.body.model);
const isCacheableGemini = isGemini && await isOpenRouterModelCacheable(request.body.model);
const enableGeminiSystemPromptCache = getConfigValue('gemini.enableSystemPromptCache', false, 'boolean');
if (Array.isArray(request.body.messages)) {
embedOpenRouterMedia(request.body.messages);
addOpenRouterSignatures(request.body.messages, request.body.model);
if (isClaude3or4) {
if (enableSystemPromptCache) {
cachingSystemPromptForOpenRouterClaude(request.body.messages, cacheTTL);
cachingSystemPromptForOpenRouter(request.body.messages, cacheTTL);
}
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0) {
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth, cacheTTL);
}
}
if (isCacheableGemini && enableGeminiSystemPromptCache) {
cachingSystemPromptForOpenRouter(request.body.messages);
}
}
const isGemini = /google\/gemini/.test(request.body.model);
if (isGemini) {
bodyParams['safety_settings'] = GEMINI_SAFETY;
}
+9 -5
View File
@@ -1057,12 +1057,12 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth, ttl)
}
/**
* Adds cache_control to the system prompt for OpenRouter Claude requests.
* Adds cache_control to the system prompt for OpenRouter requests.
*
* @param {object[]} messages Array of messages
* @param {string} ttl TTL value
* @param {string} [ttl] TTL value (optional)
*/
export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
export function cachingSystemPromptForOpenRouter(messages, ttl = undefined) {
if (!Array.isArray(messages) || messages.length === 0) {
return;
}
@@ -1078,6 +1078,10 @@ export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
return;
}
const cacheControl = ttl
? { type: 'ephemeral', ttl }
: { type: 'ephemeral' };
if (Array.isArray(systemMessage.content)) {
const hasExistingCacheControl = systemMessage.content.some(part => part?.cache_control);
if (hasExistingCacheControl) {
@@ -1086,7 +1090,7 @@ export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
for (let i = systemMessage.content.length - 1; i >= 0; i--) {
if (systemMessage.content[i]?.type === 'text') {
systemMessage.content[i].cache_control = { type: 'ephemeral', ttl };
systemMessage.content[i].cache_control = cacheControl;
return;
}
}
@@ -1095,7 +1099,7 @@ export function cachingSystemPromptForOpenRouterClaude(messages, ttl) {
{
type: 'text',
text: systemMessage.content,
cache_control: { type: 'ephemeral', ttl },
cache_control: cacheControl,
},
];
}