From 1f8f2ddc9d2fb3ef0797e4bc06eebb79cbeef4f0 Mon Sep 17 00:00:00 2001
From: oabrivard <olivier@abrivard.fr>
Date: Thu, 26 Mar 2026 10:06:50 +0100
Subject: [PATCH] docs: add implementation plan for integration tests with mock
 LLM provider

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../plans/2026-03-26-integration-tests.md     | 591 ++++++++++++++++++
 1 file changed, 591 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-03-26-integration-tests.md
diff --git a/docs/superpowers/plans/2026-03-26-integration-tests.md b/docs/superpowers/plans/2026-03-26-integration-tests.md
new file mode 100644
index 0000000..48a61e2
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-26-integration-tests.md
@@ -0,0 +1,591 @@
+# Integration Tests — Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add integration tests for the generation pipeline using a MockLlmProvider and wiremock, exercising scraping, filtering, classification, and synthesis saving.
+
+**Architecture:** Create a MockLlmProvider that returns canned responses. Refactor `run_generation_inner` to accept an optional provider override for dependency injection. Write focused pipeline tests that call `run_generation_inner` directly with mock provider + wiremock HTTP server.
+
+**Tech Stack:** Rust (tokio, sqlx, wiremock, async-trait), PostgreSQL (test DB)
+
+**Spec:** `docs/superpowers/specs/2026-03-26-integration-tests-design.md`
+
+---
+
+### Task 1: Fix test infrastructure for recent config changes
+
+**Files:**
+- Modify: `backend/tests/common/mod.rs`
+
+The recent polish tasks removed `session_secret` from `AppConfig` and wrapped `master_encryption_key` in `Arc<String>`. The test helper still uses the old config shape.
+
+- [ ] **Step 1: Update `TestApp::new()` config**
+
+In `backend/tests/common/mod.rs`, find the `AppConfig` construction (around line 108). Apply:
+- Remove `session_secret: "a".repeat(64),`
+- Change `master_encryption_key: "ab".repeat(32),` to `master_encryption_key: std::sync::Arc::new("ab".repeat(32)),`
+
+- [ ] **Step 2: Verify integration tests compile**
+
+Run: `cd backend && cargo test --test api_syntheses_test -- --list 2>&1 | head -5`
+Expected: Lists test names without compile errors
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add backend/tests/common/mod.rs
+git commit -m "fix: update test config for session_secret removal and Arc master key"
+```
+
+---
+
+### Task 2: Create MockLlmProvider
+
+**Files:**
+- Create: `backend/src/services/llm/mock.rs`
+- Modify: `backend/src/services/llm/mod.rs`
+
+- [ ] **Step 1: Create the mock provider**
+
+Create `backend/src/services/llm/mock.rs`:
+
+```rust
+//! Mock LLM provider for integration testing.
+//!
+//! Returns canned JSON responses based on prompt content,
+//! enabling pipeline tests without real LLM API calls.
+
+use std::sync::Arc;
+use async_trait::async_trait;
+use serde_json::{json, Value};
+use crate::errors::AppError;
+use super::LlmProvider;
+
+/// A mock LLM provider that returns deterministic responses.
+pub struct MockLlmProvider {
+    /// Default category for classify responses.
+    default_category: String,
+    /// URLs to return for search responses.
+    search_urls: Vec<String>,
+    /// URLs to return for link extraction responses.
+    link_urls: Vec<String>,
+}
+
+impl MockLlmProvider {
+    /// Create a new mock provider with default configuration.
+    pub fn new() -> Self {
+        Self {
+            default_category: "Autre".to_string(),
+            search_urls: Vec::new(),
+            link_urls: Vec::new(),
+        }
+    }
+
+    /// Set the default category for classify responses.
+    pub fn with_default_category(mut self, category: &str) -> Self {
+        self.default_category = category.to_string();
+        self
+    }
+
+    /// Set URLs to return for search responses.
+    pub fn with_search_urls(mut self, urls: Vec<String>) -> Self {
+        self.search_urls = urls;
+        self
+    }
+
+    /// Set URLs to return for link extraction responses.
+    pub fn with_link_urls(mut self, urls: Vec<String>) -> Self {
+        self.link_urls = urls;
+        self
+    }
+
+    /// Create as an `Arc<dyn LlmProvider>` for use in the pipeline.
+    pub fn into_arc(self) -> Arc<dyn LlmProvider> {
+        Arc::new(self)
+    }
+}
+
+#[async_trait]
+impl LlmProvider for MockLlmProvider {
+    fn provider_id(&self) -> &str {
+        "mock"
+    }
+
+    async fn call_llm(
+        &self,
+        _model: &str,
+        system_prompt: &str,
+        user_prompt: &str,
+        _response_schema: &Value,
+    ) -> Result<Value, AppError> {
+        let sys_lower = system_prompt.to_lowercase();
+
+        // Classify/summarize call — system prompt contains "classer"
+        if sys_lower.contains("classer") {
+            // Extract title from user prompt (after "Titre : ")
+            let title = user_prompt
+                .lines()
+                .find(|l| l.starts_with("Titre : "))
+                .map(|l| l.trim_start_matches("Titre : ").to_string())
+                .unwrap_or_else(|| "Mock Article".to_string());
+
+            return Ok(json!({
+                "title": title,
+                "summary": format!("Mock summary for: {}", title),
+                "category": self.default_category,
+            }));
+        }
+
+        // Link extraction call — system prompt contains "liens"
+        if sys_lower.contains("liens") {
+            return Ok(json!({
+                "urls": self.link_urls,
+            }));
+        }
+
+        // Search call — system prompt contains "precis" (from "Tu es un assistant IA precis")
+        if sys_lower.contains("precis") {
+            let items: Vec<Value> = self.search_urls.iter().map(|url| {
+                json!({
+                    "title": format!("Search result: {}", url),
+                    "url": url,
+                    "summary": format!("Mock search summary for {}", url),
+                })
+            }).collect();
+
+            return Ok(json!({
+                "category_0": items,
+            }));
+        }
+
+        // Fallback
+        Ok(json!({"result": "mock response"}))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn mock_provider_returns_classify_response() {
+        let provider = MockLlmProvider::new().with_default_category("AI News");
+        let result = provider
+            .call_llm("model", "Tu dois classer l'article", "Titre : GPT-7 Released\n\nContenu...", &json!({}))
+            .await
+            .unwrap();
+        assert_eq!(result["title"], "GPT-7 Released");
+        assert_eq!(result["category"], "AI News");
+        assert!(result["summary"].as_str().unwrap().contains("GPT-7"));
+    }
+
+    #[tokio::test]
+    async fn mock_provider_returns_search_response() {
+        let provider = MockLlmProvider::new()
+            .with_search_urls(vec!["http://example.com/a".into()]);
+        let result = provider
+            .call_llm("model", "Tu es un assistant IA precis", "Recherche...", &json!({}))
+            .await
+            .unwrap();
+        let items = result["category_0"].as_array().unwrap();
+        assert_eq!(items.len(), 1);
+        assert_eq!(items[0]["url"], "http://example.com/a");
+    }
+
+    #[tokio::test]
+    async fn mock_provider_returns_link_extraction() {
+        let provider = MockLlmProvider::new()
+            .with_link_urls(vec!["http://example.com/post-1".into()]);
+        let result = provider
+            .call_llm("model", "Tu dois identifier les liens", "Links...", &json!({}))
+            .await
+            .unwrap();
+        let urls = result["urls"].as_array().unwrap();
+        assert_eq!(urls.len(), 1);
+    }
+}
+```
+
+- [ ] **Step 2: Register module in `mod.rs`**
+
+In `backend/src/services/llm/mod.rs`, add after the other `pub mod` declarations:
+
+```rust
+pub mod mock;
+```
+
+- [ ] **Step 3: Build and test**
+
+Run: `cd backend && cargo build && cargo test --lib llm::mock`
+Expected: Build succeeds, 3 mock tests pass
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add backend/src/services/llm/mock.rs backend/src/services/llm/mod.rs
+git commit -m "feat: add MockLlmProvider for integration testing"
+```
+
+---
+
+### Task 3: Dependency injection for pipeline
+
+**Files:**
+- Modify: `backend/src/services/synthesis.rs`
+- Modify: `backend/src/handlers/generation.rs`
+
+- [ ] **Step 1: Add `provider_override` parameter to `run_generation`**
+
+In `backend/src/services/synthesis.rs`, update `run_generation` signature (around line 192):
+
+```rust
+pub async fn run_generation(
+    job_id: Uuid,
+    state: AppState,
+    user_id: Uuid,
+    tx: Arc<watch::Sender<ProgressEvent>>,
+    provider_override: Option<Arc<dyn crate::services::llm::LlmProvider>>,
+) {
+    let result = run_generation_inner(job_id, &state, user_id, &tx, provider_override).await;
+    // ... rest unchanged
+```
+
+- [ ] **Step 2: Add `provider_override` to `run_generation_inner` and make it public**
+
+Change `async fn run_generation_inner` to `pub async fn run_generation_inner` and add the parameter:
+
+```rust
+pub async fn run_generation_inner(
+    job_id: Uuid,
+    state: &AppState,
+    user_id: Uuid,
+    tx: &watch::Sender<ProgressEvent>,
+    provider_override: Option<Arc<dyn crate::services::llm::LlmProvider>>,
+) -> Result<Uuid, AppError> {
+```
+
+- [ ] **Step 3: Use provider_override when provided**
+
+Find the provider creation section (around line 257-258):
+
+```rust
+// Before:
+let (provider_name, api_key) = resolve_provider_and_key(state, user_id, &settings).await?;
+let provider = create_provider(&provider_name, api_key)?;
+```
+
+Replace with:
+
+```rust
+let (provider_name, provider) = if let Some(mock_provider) = provider_override {
+    ("mock".to_string(), mock_provider)
+} else {
+    let (pname, api_key) = resolve_provider_and_key(state, user_id, &settings).await?;
+    let p = create_provider(&pname, api_key)?;
+    (pname, p)
+};
+```
+
+- [ ] **Step 4: Handle model resolution for mock provider**
+
+Find the model resolution section (around line 260-265). Add a mock-path branch:
+
+```rust
+let (model_research, model_websearch) = if provider_name == "mock" {
+    let research = if settings.ai_model.is_empty() { "mock-model".to_string() } else { settings.ai_model.clone() };
+    let websearch = if settings.ai_model_websearch.is_empty() { "mock-model".to_string() } else { settings.ai_model_websearch.clone() };
+    (research, websearch)
+} else {
+    // existing resolve_model calls...
+};
+```
+
+Read the file to see how `model_research` and `model_websearch` are currently resolved and wrap that in the `else` branch.
+
+- [ ] **Step 5: Update the handler to pass `None`**
+
+In `backend/src/handlers/generation.rs`, find the `tokio::spawn` call. Update:
+
+```rust
+// Before:
+synthesis::run_generation(job_id, state_clone.clone(), user_id, tx.clone())
+
+// After:
+synthesis::run_generation(job_id, state_clone.clone(), user_id, tx.clone(), None)
+```
+
+- [ ] **Step 6: Build and test**
+
+Run: `cd backend && cargo build && cargo test --lib`
+Expected: All pass (no behavioral change)
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add backend/src/services/synthesis.rs backend/src/handlers/generation.rs
+git commit -m "refactor: add provider_override for pipeline dependency injection"
+```
+
+---
+
+### Task 4: Add wiremock and write pipeline integration tests
+
+**Files:**
+- Modify: `backend/Cargo.toml`
+- Create: `backend/tests/pipeline_test.rs`
+
+- [ ] **Step 1: Add wiremock dependency**
+
+In `backend/Cargo.toml` `[dev-dependencies]`, add:
+
+```toml
+wiremock = "0.6"
+```
+
+- [ ] **Step 2: Create the test file**
+
+Create `backend/tests/pipeline_test.rs`. This file tests the pipeline by calling `run_generation_inner` directly.
+
+The test setup needs:
+1. A `TestApp` for the DB (reuse `mod common;`)
+2. A `wiremock::MockServer` for serving fake HTML pages
+3. A `MockLlmProvider` configured for the test
+4. A `watch::channel` for progress events
+
+```rust
+mod common;
+
+use ai_synth_backend::services::llm::mock::MockLlmProvider;
+use ai_synth_backend::services::synthesis;
+use std::sync::Arc;
+use tokio::sync::watch;
+use wiremock::matchers::{method, path};
+use wiremock::{Mock, MockServer, ResponseTemplate};
+
+/// Helper: set up a wiremock server with a source page and article pages.
+async fn setup_mock_server() -> MockServer {
+    let server = MockServer::start().await;
+
+    // Source page with links to articles
+    Mock::given(method("GET"))
+        .and(path("/source"))
+        .respond_with(ResponseTemplate::new(200).set_body_string(format!(
+            r#"<html><body>
+                <a href="{base}/article-1">Article One</a>
+                <a href="{base}/article-2">Article Two</a>
+                <a href="{base}/article-3">Article Three</a>
+            </body></html>"#,
+            base = server.uri()
+        )))
+        .mount(&server)
+        .await;
+
+    // Article pages
+    for i in 1..=3 {
+        Mock::given(method("GET"))
+            .and(path(format!("/article-{}", i)))
+            .respond_with(ResponseTemplate::new(200).set_body_string(format!(
+                r#"<html>
+                    <head><title>Test Article {i}</title></head>
+                    <body><p>This is the body content of test article {i} about artificial intelligence and technology news.</p></body>
+                </html>"#
+            )))
+            .mount(&server)
+            .await;
+    }
+
+    server
+}
+
+/// Helper: create user with settings and sources configured.
+async fn setup_user(
+    app: &common::TestApp,
+    source_url: Option<&str>,
+    categories: Vec<&str>,
+    max_items: i32,
+) -> (uuid::Uuid, String) {
+    let (user_id, session) = app.create_authenticated_user("pipeline-test@example.com").await;
+
+    // Update settings via API
+    let categories_json: Vec<serde_json::Value> = categories.iter().map(|c| serde_json::json!(c)).collect();
+    let settings = serde_json::json!({
+        "theme": "Intelligence Artificielle",
+        "max_age_days": 30,
+        "categories": categories_json,
+        "max_items_per_category": max_items,
+        "max_articles_per_source": 10,
+        "use_llm_for_source_links": false,
+        "use_brave_search": false,
+        "article_history_days": 90,
+        "batch_size": 5,
+        "search_agent_behavior": "",
+        "ai_provider": "",
+        "ai_model": "",
+        "ai_model_websearch": "",
+        "rate_limit_max_requests": null,
+        "rate_limit_time_window_seconds": null
+    });
+    let (status, _) = app.put_with_session("/api/v1/settings", &settings, &session).await;
+    assert_eq!(status.as_u16(), 200, "Settings save should succeed");
+
+    // Add source if provided
+    if let Some(url) = source_url {
+        let source = serde_json::json!({"title": "Test Source", "url": url});
+        let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
+        assert!(status.is_success(), "Source creation should succeed");
+    }
+
+    (user_id, session)
+}
+
+#[tokio::test]
+async fn phase1_classifies_scraped_articles() {
+    let app = common::TestApp::new().await;
+    let mock_server = setup_mock_server().await;
+
+    let source_url = format!("{}/source", mock_server.uri());
+    let (user_id, _session) = setup_user(&app, Some(&source_url), vec!["AI News"], 4).await;
+
+    let mock_provider = MockLlmProvider::new()
+        .with_default_category("AI News")
+        .into_arc();
+
+    let job_id = uuid::Uuid::new_v4();
+    let (tx, _rx) = watch::channel(synthesis::ProgressEvent::Progress {
+        step: "init".into(),
+        message: "Starting...".into(),
+        percent: 0,
+    });
+
+    let state = ai_synth_backend::app_state::AppState::new(
+        app.config.clone(),
+        app.pool.clone(),
+        reqwest::Client::new(),
+    );
+
+    let result = synthesis::run_generation_inner(
+        job_id, &state, user_id, &tx, Some(mock_provider),
+    ).await;
+
+    assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
+
+    // Verify synthesis was saved
+    let synthesis_id = result.unwrap();
+    let synthesis = sqlx::query_as::<_, (serde_json::Value,)>(
+        "SELECT sections FROM syntheses WHERE id = $1"
+    )
+    .bind(synthesis_id)
+    .fetch_one(&app.pool)
+    .await
+    .expect("Synthesis should exist in DB");
+
+    let sections: Vec<serde_json::Value> = serde_json::from_value(synthesis.0).unwrap();
+    assert!(!sections.is_empty(), "Should have at least one section");
+
+    // Verify article history was recorded
+    let history_count: (i64,) = sqlx::query_as(
+        "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2"
+    )
+    .bind(user_id)
+    .bind(job_id)
+    .fetch_one(&app.pool)
+    .await
+    .unwrap();
+    assert!(history_count.0 > 0, "Article history should have entries");
+}
+
+#[tokio::test]
+async fn phase2_search_fills_gaps_when_no_sources() {
+    let app = common::TestApp::new().await;
+    let mock_server = setup_mock_server().await;
+
+    // No sources — Phase 1 produces nothing, Phase 2 should kick in
+    let (user_id, _session) = setup_user(&app, None, vec!["AI News"], 2).await;
+
+    let mock_provider = MockLlmProvider::new()
+        .with_default_category("AI News")
+        .with_search_urls(vec![
+            format!("{}/article-1", mock_server.uri()),
+            format!("{}/article-2", mock_server.uri()),
+        ])
+        .into_arc();
+
+    let job_id = uuid::Uuid::new_v4();
+    let (tx, _rx) = watch::channel(synthesis::ProgressEvent::Progress {
+        step: "init".into(), message: "Starting...".into(), percent: 0,
+    });
+
+    let state = ai_synth_backend::app_state::AppState::new(
+        app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+    );
+
+    let result = synthesis::run_generation_inner(
+        job_id, &state, user_id, &tx, Some(mock_provider),
+    ).await;
+
+    assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
+}
+
+#[tokio::test]
+async fn category_overflow_spills_to_autre() {
+    let app = common::TestApp::new().await;
+    let mock_server = setup_mock_server().await;
+
+    let source_url = format!("{}/source", mock_server.uri());
+    // max_items_per_category = 1, but source has 3 articles all classified to same category
+    let (user_id, _session) = setup_user(&app, Some(&source_url), vec!["AI News"], 1).await;
+
+    let mock_provider = MockLlmProvider::new()
+        .with_default_category("AI News") // all go to same category
+        .into_arc();
+
+    let job_id = uuid::Uuid::new_v4();
+    let (tx, _rx) = watch::channel(synthesis::ProgressEvent::Progress {
+        step: "init".into(), message: "Starting...".into(), percent: 0,
+    });
+
+    let state = ai_synth_backend::app_state::AppState::new(
+        app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+    );
+
+    let result = synthesis::run_generation_inner(
+        job_id, &state, user_id, &tx, Some(mock_provider),
+    ).await;
+
+    assert!(result.is_ok(), "Generation should succeed");
+
+    let synthesis_id = result.unwrap();
+    let synthesis = sqlx::query_as::<_, (serde_json::Value,)>(
+        "SELECT sections FROM syntheses WHERE id = $1"
+    )
+    .bind(synthesis_id)
+    .fetch_one(&app.pool)
+    .await
+    .unwrap();
+
+    let sections: Vec<serde_json::Value> = serde_json::from_value(synthesis.0).unwrap();
+    // Should have "AI News" section (1 item) and potentially "Autre" (overflow)
+    let autre_section = sections.iter().find(|s| s["title"] == "Autre");
+    if sections.len() > 1 {
+        assert!(autre_section.is_some(), "Overflow should go to Autre section");
+    }
+}
+```
+
+**Note to implementer**: The SSRF check will reject `127.0.0.1` (wiremock address) for source page fetching. The `check_ssrf` function in `scraper.rs` is called before fetching source pages. For these tests to work, either:
+- The wiremock mock server address must bypass SSRF — check if `localhost`/`127.0.0.1` is resolved in `check_ssrf`. If it is blocked, the Phase 1 test will get empty links from sources. In that case, rely on the LLM link extraction mock (set `use_llm_for_source_links: true` in settings, and configure `MockLlmProvider.with_link_urls()` to return article URLs).
+- OR skip Phase 1 source tests and focus on Phase 2 search tests.
+
+Read the `check_ssrf` function to determine which approach works, and adapt the tests accordingly.
+
+- [ ] **Step 3: Build and run tests**
+
+Run: `cd backend && cargo test --test pipeline_test`
+Expected: All tests pass (requires `TEST_DATABASE_URL` env var)
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add backend/Cargo.toml backend/tests/pipeline_test.rs
+git commit -m "feat: add pipeline integration tests with MockLlmProvider and wiremock"
+```