fix: set max output tokens to 16384 for all LLM providers

OpenAI's default output limit (4096 tokens) was too low for structured synthesis output with multiple categories and articles per category, causing truncated JSON. Set 16384 for both OpenAI APIs (Responses + Chat Completions) and Gemini. Anthropic already had 16384. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
3 months ago · 8a18b70aff
parent fdb3110407
commit 8a18b70aff
2 changed files with 4 additions and 1 deletions
--- a/backend/src/services/llm/gemini.rs
+++ b/backend/src/services/llm/gemini.rs
@ -138,7 +138,8 @@ fn build_request_body(
        },
        "generationConfig": {
            "responseMimeType": "application/json",
-            "responseSchema": response_schema
+            "responseSchema": response_schema,
            "maxOutputTokens": 16384
        }
    });
--- a/backend/src/services/llm/openai.rs
+++ b/backend/src/services/llm/openai.rs
@ -44,6 +44,7 @@ impl OpenAiProvider {
            "model": model,
            "instructions": system_prompt,
            "input": user_prompt,
            "max_output_tokens": 16384,
            "text": {
                "format": {
                    "type": "json_schema",
@ -116,6 +117,7 @@ impl OpenAiProvider {
                    "content": user_prompt
                }
            ],
            "max_tokens": 16384,
            "response_format": {
                "type": "json_schema",
                "json_schema": {