diff --git a/backend/src/handlers/api_keys.rs b/backend/src/handlers/api_keys.rs index 57412c5..c300420 100644 --- a/backend/src/handlers/api_keys.rs +++ b/backend/src/handlers/api_keys.rs @@ -142,7 +142,7 @@ pub async fn test_key( let test_model = get_default_model_for_provider(&state, &provider).await?; let result = llm_provider - .generate_rewrite_pass( + .call_llm( &test_model, "You are a test assistant. Respond in JSON as instructed.", "Say hello in one word.", diff --git a/backend/src/services/llm/anthropic.rs b/backend/src/services/llm/anthropic.rs index 7064504..7ddc532 100644 --- a/backend/src/services/llm/anthropic.rs +++ b/backend/src/services/llm/anthropic.rs @@ -1,8 +1,8 @@ //! Anthropic LLM provider implementation. //! //! Implements the `LlmProvider` trait using the Anthropic Messages API. -//! - **Pass 1 (search)**: Messages API with `web_search_20250305` tool -//! - **Pass 2 (rewrite)**: Messages API without tools, JSON via prompt instructions +//! Uses the Messages API without web search tools; structured output is +//! enforced via schema instructions embedded in the system prompt. use async_trait::async_trait; use serde_json::Value; @@ -16,9 +16,6 @@ const ANTHROPIC_VERSION: &str = "2023-06-01"; /// Default max tokens for Anthropic responses. const DEFAULT_MAX_TOKENS: u32 = 16384; -/// Maximum web search uses per request. -const WEB_SEARCH_MAX_USES: u32 = 10; - /// Anthropic provider. /// /// Holds the API key and an HTTP client for making requests @@ -36,19 +33,20 @@ impl AnthropicProvider { http_client, } } +} - /// Execute a request to the Anthropic Messages API. - /// - /// Sends a POST to `https://api.anthropic.com/v1/messages` with the - /// appropriate headers and body. When `include_web_search` is true, - /// the `web_search_20250305` tool is included. - async fn call_messages_api( +#[async_trait] +impl LlmProvider for AnthropicProvider { + fn provider_id(&self) -> &str { + "anthropic" + } + + async fn call_llm( &self, model: &str, system_prompt: &str, user_prompt: &str, response_schema: &Value, - include_web_search: bool, ) -> Result { // Anthropic doesn't have native JSON schema enforcement like OpenAI/Gemini. // We embed the schema in the system prompt to instruct Claude to respond with @@ -62,7 +60,7 @@ impl AnthropicProvider { let full_system_prompt = format!("{}{}", system_prompt, schema_instruction); - let mut body = serde_json::json!({ + let body = serde_json::json!({ "model": model, "max_tokens": DEFAULT_MAX_TOKENS, "system": full_system_prompt, @@ -72,14 +70,6 @@ impl AnthropicProvider { }] }); - if include_web_search { - body["tools"] = serde_json::json!([{ - "type": "web_search_20250305", - "name": "web_search", - "max_uses": WEB_SEARCH_MAX_USES - }]); - } - let response = self .http_client .post("https://api.anthropic.com/v1/messages") @@ -115,39 +105,6 @@ impl AnthropicProvider { } } -#[async_trait] -impl LlmProvider for AnthropicProvider { - fn provider_id(&self) -> &str { - "anthropic" - } - - async fn generate_search_pass( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result { - self.call_messages_api(model, system_prompt, user_prompt, response_schema, true) - .await - } - - async fn generate_rewrite_pass( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result { - self.call_messages_api(model, system_prompt, user_prompt, response_schema, false) - .await - } - - fn supports_web_search(&self) -> bool { - true - } -} - /// Extract the text content from an Anthropic Messages API response. /// /// The response structure is: @@ -155,7 +112,6 @@ impl LlmProvider for AnthropicProvider { /// { /// "content": [ /// { "type": "text", "text": "..." }, -/// { "type": "web_search_tool_result", ... }, /// { "type": "text", "text": "{...json...}" } /// ] /// } @@ -281,7 +237,6 @@ mod tests { fn anthropic_provider_metadata() { let provider = AnthropicProvider::new("test-key".into(), reqwest::Client::new()); assert_eq!(provider.provider_id(), "anthropic"); - assert!(provider.supports_web_search()); } // ── Content extraction ────────────────────────────────────── @@ -307,30 +262,13 @@ mod tests { } #[test] - fn extract_content_with_web_search_results() { - // When web_search is used, the response may contain tool results interleaved with text + fn extract_content_with_multiple_text_blocks() { + // When there are multiple text blocks, we take the last one let response = serde_json::json!({ "content": [ { "type": "text", - "text": "Let me search for that information." - }, - { - "type": "server_tool_use", - "id": "srvtoolu_123", - "name": "web_search", - "input": { "query": "AI news this week" } - }, - { - "type": "web_search_tool_result", - "tool_use_id": "srvtoolu_123", - "content": [ - { - "type": "web_search_result", - "url": "https://example.com/ai-news", - "title": "AI News" - } - ] + "text": "Let me think about that." }, { "type": "text", diff --git a/backend/src/services/llm/factory.rs b/backend/src/services/llm/factory.rs index c86a3a5..5bf1242 100644 --- a/backend/src/services/llm/factory.rs +++ b/backend/src/services/llm/factory.rs @@ -60,21 +60,18 @@ mod tests { fn factory_creates_gemini_provider() { let provider = create_provider("gemini", "test-key".into()).unwrap(); assert_eq!(provider.provider_id(), "gemini"); - assert!(provider.supports_web_search()); } #[test] fn factory_creates_openai_provider() { let provider = create_provider("openai", "test-key".into()).unwrap(); assert_eq!(provider.provider_id(), "openai"); - assert!(provider.supports_web_search()); } #[test] fn factory_creates_anthropic_provider() { let provider = create_provider("anthropic", "test-key".into()).unwrap(); assert_eq!(provider.provider_id(), "anthropic"); - assert!(provider.supports_web_search()); } #[test] diff --git a/backend/src/services/llm/gemini.rs b/backend/src/services/llm/gemini.rs index 4f33cf4..0dfbfcc 100644 --- a/backend/src/services/llm/gemini.rs +++ b/backend/src/services/llm/gemini.rs @@ -1,8 +1,7 @@ //! Google Gemini LLM provider implementation. //! //! Implements the `LlmProvider` trait using the Gemini REST API. -//! Supports both web search grounding (Pass 1) and plain structured -//! output (Pass 2) via the `generateContent` endpoint. +//! Uses the `generateContent` endpoint with structured JSON output. use async_trait::async_trait; use serde_json::Value; @@ -75,56 +74,25 @@ impl LlmProvider for GeminiProvider { "gemini" } - async fn generate_search_pass( + async fn call_llm( &self, model: &str, system_prompt: &str, user_prompt: &str, response_schema: &Value, ) -> Result { - let body = build_request_body( - system_prompt, - user_prompt, - response_schema, - true, // include googleSearch tool - ); - - self.generate_content(model, &body).await - } - - async fn generate_rewrite_pass( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result { - let body = build_request_body( - system_prompt, - user_prompt, - response_schema, - false, // no tools for rewrite - ); - + let body = build_request_body(system_prompt, user_prompt, response_schema); self.generate_content(model, &body).await } - - fn supports_web_search(&self) -> bool { - true - } } /// Build the JSON request body for the Gemini `generateContent` endpoint. -/// -/// When `include_search` is true, the `googleSearch` tool is included -/// to enable web search grounding (Pass 1). fn build_request_body( system_prompt: &str, user_prompt: &str, response_schema: &Value, - include_search: bool, ) -> Value { - let mut body = serde_json::json!({ + serde_json::json!({ "contents": [{ "role": "user", "parts": [{ @@ -141,15 +109,7 @@ fn build_request_body( "responseSchema": response_schema, "maxOutputTokens": 16384 } - }); - - if include_search { - body["tools"] = serde_json::json!([{ - "googleSearch": {} - }]); - } - - body + }) } /// Extract the text content from a Gemini API response. @@ -222,7 +182,7 @@ mod tests { use super::*; #[test] - fn build_request_body_with_search() { + fn build_request_body_structure() { let schema = serde_json::json!({ "type": "object", "properties": { @@ -233,7 +193,7 @@ mod tests { } }); - let body = build_request_body("system prompt", "user prompt", &schema, true); + let body = build_request_body("system prompt", "user prompt", &schema); // Verify contents assert_eq!( @@ -251,9 +211,8 @@ mod tests { "system prompt" ); - // Verify tools (googleSearch present) - assert!(body["tools"].is_array()); - assert!(body["tools"][0].get("googleSearch").is_some()); + // No tools key + assert!(body.get("tools").is_none()); // Verify generation config assert_eq!( @@ -263,15 +222,6 @@ mod tests { assert!(body["generationConfig"]["responseSchema"].is_object()); } - #[test] - fn build_request_body_without_search() { - let schema = serde_json::json!({"type": "object"}); - let body = build_request_body("sys", "user", &schema, false); - - // No tools key when search is disabled - assert!(body.get("tools").is_none()); - } - #[test] fn extract_content_valid_response() { let response = serde_json::json!({ @@ -385,12 +335,11 @@ mod tests { } #[test] - fn gemini_provider_supports_web_search() { + fn gemini_provider_metadata() { let provider = GeminiProvider::new( "test-key".into(), reqwest::Client::new(), ); - assert!(provider.supports_web_search()); assert_eq!(provider.provider_id(), "gemini"); } } diff --git a/backend/src/services/llm/mod.rs b/backend/src/services/llm/mod.rs index 6105627..517f22c 100644 --- a/backend/src/services/llm/mod.rs +++ b/backend/src/services/llm/mod.rs @@ -14,64 +14,27 @@ use serde_json::Value; use crate::errors::AppError; -/// Capabilities advertised by an LLM provider. -#[derive(Debug, Clone)] -pub struct ProviderCapabilities { - /// Whether the provider supports native web search grounding. - pub supports_web_search: bool, - /// Whether the provider supports structured output via JSON schema. - pub supports_structured_output: bool, -} - /// Trait defining the contract for LLM provider implementations. /// /// Each provider (Gemini, OpenAI, Anthropic) implements this trait -/// to provide a unified interface for the synthesis generation pipeline. -/// -/// The pipeline uses two passes: -/// - **Search pass**: Generates content with web search grounding (if supported) -/// - **Rewrite pass**: Rewrites/consolidates content with structured output +/// to provide a unified interface for structured LLM calls. #[async_trait] pub trait LlmProvider: Send + Sync { /// Returns the provider identifier (e.g., "gemini", "openai", "anthropic"). fn provider_id(&self) -> &str; - /// Generate content with web search grounding (Pass 1). - /// - /// For providers that support native web search (e.g., Gemini with googleSearch), - /// this pass retrieves and structures information from the web. - /// - /// # Arguments - /// * `model` — The model identifier (e.g., "gemini-2.5-pro") - /// * `system_prompt` — System-level instructions for the model - /// * `user_prompt` — The user's prompt with search criteria - /// * `response_schema` — JSON Schema defining the expected response structure - async fn generate_search_pass( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result; - - /// Generate content without web search (Pass 2). - /// - /// Used for rewriting, consolidating, or reformatting content - /// with structured output but no web search tools. + /// Call the LLM with a prompt and expected JSON schema. /// /// # Arguments - /// * `model` — The model identifier - /// * `system_prompt` — System-level instructions for the model - /// * `user_prompt` — The user's prompt (typically includes content from Pass 1) + /// * `model` — The model identifier (e.g., "gpt-4o-mini") + /// * `system_prompt` — System-level instructions + /// * `user_prompt` — The user's prompt /// * `response_schema` — JSON Schema defining the expected response structure - async fn generate_rewrite_pass( + async fn call_llm( &self, model: &str, system_prompt: &str, user_prompt: &str, response_schema: &Value, ) -> Result; - - /// Whether this provider supports native web search grounding. - fn supports_web_search(&self) -> bool; } diff --git a/backend/src/services/llm/openai.rs b/backend/src/services/llm/openai.rs index ea431be..be510a4 100644 --- a/backend/src/services/llm/openai.rs +++ b/backend/src/services/llm/openai.rs @@ -1,8 +1,7 @@ //! OpenAI LLM provider implementation. //! -//! Implements the `LlmProvider` trait using two OpenAI APIs: -//! - **Pass 1 (search)**: Responses API (`/v1/responses`) with `web_search_preview` tool -//! - **Pass 2 (rewrite)**: Chat Completions API (`/v1/chat/completions`) with structured output +//! Implements the `LlmProvider` trait using the OpenAI Responses API (`/v1/responses`) +//! with structured JSON output via `json_schema` text format. use async_trait::async_trait; use serde_json::Value; @@ -13,7 +12,7 @@ use crate::errors::AppError; /// OpenAI provider. /// /// Holds the API key and an HTTP client for making requests -/// to the OpenAI Responses and Chat Completions APIs. +/// to the OpenAI Responses API. pub struct OpenAiProvider { api_key: String, http_client: reqwest::Client, @@ -27,20 +26,22 @@ impl OpenAiProvider { http_client, } } +} + +#[async_trait] +impl LlmProvider for OpenAiProvider { + fn provider_id(&self) -> &str { + "openai" + } - /// Execute a request to the OpenAI Responses API (Pass 1). - /// - /// Uses the Responses API with `web_search_preview` tool for grounded search results - /// and structured output via `json_schema` text format. - async fn call_responses_api( + async fn call_llm( &self, model: &str, system_prompt: &str, user_prompt: &str, response_schema: &Value, - include_web_search: bool, ) -> Result { - let mut body = serde_json::json!({ + let body = serde_json::json!({ "model": model, "instructions": system_prompt, "input": user_prompt, @@ -55,12 +56,6 @@ impl OpenAiProvider { } }); - if include_web_search { - body["tools"] = serde_json::json!([{ - "type": "web_search_preview" - }]); - } - let response = self .http_client .post("https://api.openai.com/v1/responses") @@ -93,106 +88,6 @@ impl OpenAiProvider { extract_responses_api_content(&response_body) } - - /// Execute a request to the OpenAI Chat Completions API (Pass 2). - /// - /// Uses the Chat Completions API with `json_schema` response format - /// for structured output without web search. - async fn call_chat_completions_api( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result { - let body = serde_json::json!({ - "model": model, - "messages": [ - { - "role": "system", - "content": system_prompt - }, - { - "role": "user", - "content": user_prompt - } - ], - "max_tokens": 16384, - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "synthesis", - "strict": true, - "schema": response_schema - } - } - }); - - let response = self - .http_client - .post("https://api.openai.com/v1/chat/completions") - .header("Authorization", format!("Bearer {}", self.api_key)) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .map_err(|e| { - let kind = if e.is_timeout() { - "timeout" - } else if e.is_connect() { - "connection error" - } else { - "network error" - }; - tracing::error!("OpenAI Chat Completions API request failed: {}", kind); - AppError::Internal(anyhow::anyhow!("Failed to connect to OpenAI API")) - })?; - - let status = response.status(); - let response_body: Value = response.json().await.map_err(|e| { - tracing::error!("Failed to parse OpenAI response body: {}", e); - AppError::Internal(anyhow::anyhow!("Failed to parse OpenAI API response")) - })?; - - if !status.is_success() { - return Err(map_openai_error(status.as_u16(), &response_body)); - } - - extract_chat_completions_content(&response_body) - } -} - -#[async_trait] -impl LlmProvider for OpenAiProvider { - fn provider_id(&self) -> &str { - "openai" - } - - async fn generate_search_pass( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result { - self.call_responses_api(model, system_prompt, user_prompt, response_schema, true) - .await - } - - async fn generate_rewrite_pass( - &self, - model: &str, - system_prompt: &str, - user_prompt: &str, - response_schema: &Value, - ) -> Result { - self.call_chat_completions_api(model, system_prompt, user_prompt, response_schema) - .await - } - - fn supports_web_search(&self) -> bool { - true - } } /// Extract the text content from an OpenAI Responses API response. @@ -257,34 +152,6 @@ fn extract_responses_api_content(response: &Value) -> Result { ))) } -/// Extract the text content from an OpenAI Chat Completions API response. -/// -/// The response structure is: -/// ```json -/// { "choices": [{ "message": { "content": "..." } }] } -/// ``` -fn extract_chat_completions_content(response: &Value) -> Result { - let text = response - .get("choices") - .and_then(|c| c.get(0)) - .and_then(|c| c.get("message")) - .and_then(|m| m.get("content")) - .and_then(|t| t.as_str()) - .ok_or_else(|| { - tracing::error!("Unexpected OpenAI Chat Completions response structure"); - AppError::Internal(anyhow::anyhow!( - "OpenAI Chat Completions API returned an unexpected response structure" - )) - })?; - - serde_json::from_str(text).map_err(|e| { - tracing::error!("Failed to parse OpenAI Chat Completions JSON output: {}", e); - AppError::Internal(anyhow::anyhow!( - "OpenAI returned invalid JSON in structured output" - )) - }) -} - /// Map OpenAI API error responses to appropriate `AppError` variants. /// /// Handles common error codes without exposing internal details. @@ -325,13 +192,12 @@ fn map_openai_error(status: u16, body: &Value) -> AppError { mod tests { use super::*; - // ── Request body tests ────────────────────────────────────── + // ── Provider metadata ─────────────────────────────────────── #[test] fn openai_provider_metadata() { let provider = OpenAiProvider::new("test-key".into(), reqwest::Client::new()); assert_eq!(provider.provider_id(), "openai"); - assert!(provider.supports_web_search()); } // ── Responses API response parsing ────────────────────────── @@ -420,52 +286,6 @@ mod tests { assert!(extract_responses_api_content(&response).is_err()); } - // ── Chat Completions response parsing ─────────────────────── - - #[test] - fn extract_chat_completions_content_valid() { - let response = serde_json::json!({ - "choices": [{ - "message": { - "role": "assistant", - "content": "{\"category_0\": [{\"title\": \"Rewritten\", \"url\": \"https://example.com\", \"summary\": \"Rewritten summary\"}]}" - }, - "finish_reason": "stop" - }] - }); - - let result = extract_chat_completions_content(&response).unwrap(); - assert!(result["category_0"].is_array()); - assert_eq!( - result["category_0"][0]["title"].as_str().unwrap(), - "Rewritten" - ); - } - - #[test] - fn extract_chat_completions_content_missing_choices() { - let response = serde_json::json!({}); - assert!(extract_chat_completions_content(&response).is_err()); - } - - #[test] - fn extract_chat_completions_content_empty_choices() { - let response = serde_json::json!({"choices": []}); - assert!(extract_chat_completions_content(&response).is_err()); - } - - #[test] - fn extract_chat_completions_content_invalid_json() { - let response = serde_json::json!({ - "choices": [{ - "message": { - "content": "this is not json" - } - }] - }); - assert!(extract_chat_completions_content(&response).is_err()); - } - // ── Error mapping tests ───────────────────────────────────── #[test] diff --git a/backend/src/services/source_scraper.rs b/backend/src/services/source_scraper.rs index c272156..9e8f547 100644 --- a/backend/src/services/source_scraper.rs +++ b/backend/src/services/source_scraper.rs @@ -160,7 +160,7 @@ pub async fn extract_article_links_with_llm( let (system, user) = build_link_extraction_prompt(&head_html, &body_html); let schema = build_link_extraction_schema(); - match provider.generate_rewrite_pass(model, &system, &user, &schema).await { + match provider.call_llm(model, &system, &user, &schema).await { Ok(llm_response) => { let urls: Vec = llm_response .get("urls") diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index c8ce996..f800039 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -510,7 +510,7 @@ async fn run_generation_inner( let llm_start = std::time::Instant::now(); let class_response = provider - .generate_rewrite_pass( + .call_llm( &model_research, &class_system, &class_user, @@ -669,7 +669,7 @@ async fn run_generation_inner( let llm_start = std::time::Instant::now(); let raw_results = provider - .generate_search_pass(&model_research, &system_prompt, &user_prompt, &search_schema) + .call_llm(&model_research, &system_prompt, &user_prompt, &search_schema) .await?; let llm_duration = llm_start.elapsed().as_millis() as u64; log_llm_call(&state.pool, user_id, job_id, "search", &model_research, @@ -815,7 +815,7 @@ async fn run_generation_inner( let llm_start = std::time::Instant::now(); let class_response = provider - .generate_rewrite_pass( + .call_llm( &model_research, &class_system, &class_user, @@ -916,7 +916,7 @@ async fn run_generation_inner( let llm_start = std::time::Instant::now(); let final_results = provider - .generate_rewrite_pass(&model_writing, &rewrite_system, &rewrite_user, &rewrite_schema) + .call_llm(&model_writing, &rewrite_system, &rewrite_user, &rewrite_schema) .await?; let llm_duration = llm_start.elapsed().as_millis() as u64; log_llm_call(&state.pool, user_id, job_id, "rewrite", &model_writing, @@ -1765,7 +1765,7 @@ async fn scrape_single_article_with_llm( ); let schema = crate::services::llm::schema::build_article_extraction_schema(); - match provider.generate_rewrite_pass(&model, &system, &user, &schema).await { + match provider.call_llm(&model, &system, &user, &schema).await { Ok(response) => { let title = response.get("title").and_then(|t| t.as_str()).unwrap_or("").to_string(); let body = response.get("body_text").and_then(|b| b.as_str()).unwrap_or("").to_string();