diff --git a/docs/superpowers/plans/2026-03-25-brave-search.md b/docs/superpowers/plans/2026-03-25-brave-search.md new file mode 100644 index 0000000..88c3827 --- /dev/null +++ b/docs/superpowers/plans/2026-03-25-brave-search.md @@ -0,0 +1,705 @@ +# Brave Search Integration — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add Brave Search API as an alternative to LLM web search in Phase 2, with per-article scrape+classify using the same flow as Phase 1. + +**Architecture:** New `brave_search` service module calls the Brave API. A `use_brave_search` setting toggles Phase 2 between LLM search (existing) and Brave search (new). Brave API key stored in existing `user_api_keys` table. The Brave Phase 2 path reuses Phase 1's batched scrape+classify loop. + +**Tech Stack:** Rust (Axum, sqlx, reqwest), SolidJS, PostgreSQL + +**Spec:** `docs/superpowers/specs/2026-03-25-brave-search-design.md` + +--- + +### Task 1: Brave Search service module + +**Files:** +- Create: `backend/src/services/brave_search.rs` +- Modify: `backend/src/services/mod.rs` + +- [ ] **Step 1: Create `brave_search.rs` with `BraveResult` struct and `search` function** + +Create `backend/src/services/brave_search.rs`: + +```rust +//! Brave Search API client. +//! +//! Calls the Brave Web Search API and returns structured results. +//! Used as an alternative to LLM web search in Phase 2. + +use crate::errors::AppError; +use serde::Deserialize; + +/// A single result from the Brave Search API. +#[derive(Debug, Clone)] +pub struct BraveResult { + pub title: String, + pub url: String, + pub description: String, +} + +/// Map `max_age_days` to Brave's `freshness` parameter. +fn freshness_from_days(max_age_days: i32) -> &'static str { + match max_age_days { + d if d <= 1 => "pd", + d if d <= 7 => "pw", + d if d <= 30 => "pm", + _ => "py", + } +} + +/// Brave API response structures (only the fields we need). +#[derive(Deserialize)] +struct BraveSearchResponse { + web: Option, +} + +#[derive(Deserialize)] +struct BraveWebResults { + results: Option>, +} + +#[derive(Deserialize)] +struct BraveWebResult { + title: Option, + url: Option, + description: Option, +} + +/// Search the Brave Web Search API. +/// +/// # Arguments +/// * `http_client` — shared reqwest client +/// * `api_key` — Brave Search subscription token +/// * `query` — search query string +/// * `count` — max results (1-20) +/// * `max_age_days` — mapped to Brave's freshness parameter +pub async fn search( + http_client: &reqwest::Client, + api_key: &str, + query: &str, + count: u32, + max_age_days: i32, +) -> Result, AppError> { + let freshness = freshness_from_days(max_age_days); + + let response = http_client + .get("https://api.search.brave.com/res/v1/web/search") + .header("X-Subscription-Token", api_key) + .header("Accept", "application/json") + .query(&[ + ("q", query), + ("count", &count.to_string()), + ("freshness", freshness), + ("search_lang", "fr"), + ]) + .send() + .await + .map_err(|e| { + tracing::warn!(error = %e, "Brave Search API request failed"); + AppError::Internal(anyhow::anyhow!("Brave Search API request failed: {}", e)) + })?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + tracing::warn!(status = %status, body = %body, "Brave Search API error"); + return Err(AppError::Internal(anyhow::anyhow!( + "Brave Search API returned status {}: {}", + status, + body + ))); + } + + let api_response: BraveSearchResponse = response.json().await.map_err(|e| { + AppError::Internal(anyhow::anyhow!("Failed to parse Brave Search response: {}", e)) + })?; + + let results = api_response + .web + .and_then(|w| w.results) + .unwrap_or_default() + .into_iter() + .filter_map(|r| { + let url = r.url?; + if url.is_empty() { + return None; + } + Some(BraveResult { + title: r.title.unwrap_or_default(), + url, + description: r.description.unwrap_or_default(), + }) + }) + .collect(); + + Ok(results) +} + +/// Test the Brave Search API key with a simple query. +/// +/// Returns `Ok(())` if the key is valid, `Err` with a message otherwise. +pub async fn test_api_key( + http_client: &reqwest::Client, + api_key: &str, +) -> Result<(), AppError> { + let response = http_client + .get("https://api.search.brave.com/res/v1/web/search") + .header("X-Subscription-Token", api_key) + .header("Accept", "application/json") + .query(&[("q", "test"), ("count", "1")]) + .send() + .await + .map_err(|e| AppError::Internal(anyhow::anyhow!("Brave API request failed: {}", e)))?; + + if response.status().is_success() { + Ok(()) + } else { + let status = response.status(); + Err(AppError::BadRequest(format!( + "Brave Search API returned status {}. Check your API key.", + status + ))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn freshness_mapping() { + assert_eq!(freshness_from_days(1), "pd"); + assert_eq!(freshness_from_days(0), "pd"); + assert_eq!(freshness_from_days(7), "pw"); + assert_eq!(freshness_from_days(3), "pw"); + assert_eq!(freshness_from_days(30), "pm"); + assert_eq!(freshness_from_days(14), "pm"); + assert_eq!(freshness_from_days(31), "py"); + assert_eq!(freshness_from_days(365), "py"); + } +} +``` + +- [ ] **Step 2: Register module in `mod.rs`** + +In `backend/src/services/mod.rs`, add: + +```rust +pub mod brave_search; +``` + +- [ ] **Step 3: Build and test** + +Run: `cd backend && cargo build && cargo test --lib brave_search` +Expected: Build succeeds, 1 test passes + +- [ ] **Step 4: Commit** + +```bash +git add backend/src/services/brave_search.rs backend/src/services/mod.rs +git commit -m "feat: add Brave Search API client module" +``` + +--- + +### Task 2: Add `use_brave_search` setting + +**Files:** +- Create: `backend/migrations/20260325000022_add_brave_search_setting.sql` +- Modify: `backend/src/models/settings.rs` +- Modify: `backend/src/db/settings.rs` +- Modify: `backend/src/services/prompts.rs` (test fixture) +- Modify: `backend/tests/api_syntheses_test.rs` (test fixture) +- Modify: `e2e/tests/generation-live.spec.ts` (test fixture) +- Modify: `frontend/src/types.ts` +- Modify: `frontend/src/i18n/fr.ts` +- Modify: `CLAUDE.md` + +- [ ] **Step 1: Create migration** + +Create `backend/migrations/20260325000022_add_brave_search_setting.sql`: + +```sql +ALTER TABLE settings ADD COLUMN use_brave_search BOOLEAN NOT NULL DEFAULT false; +``` + +- [ ] **Step 2: Add `use_brave_search` to all Rust settings structs** + +In `backend/src/models/settings.rs`, add `pub use_brave_search: bool` to `UserSettings`, `SettingsResponse`, and `UpdateSettingsRequest` (after `use_llm_for_source_links` in each struct). + +In the `From for SettingsResponse` impl, add: `use_brave_search: s.use_brave_search,` + +In `Default for UserSettings`, add: `use_brave_search: false,` + +No validation needed (it's a bool). + +- [ ] **Step 3: Update `SettingsRow` and DB queries in `db/settings.rs`** + +Add `use_brave_search: bool` to `SettingsRow`. + +Add `use_brave_search` to the `TryFrom` impl. + +Update both SQL queries (INSERT and UPSERT) to include `use_brave_search`: +- Add to column lists and VALUES placeholders (new bind parameter) +- Add to ON CONFLICT SET clause +- Add to RETURNING clause +- Add `.bind()` calls + +- [ ] **Step 4: Update test fixtures** + +In `backend/src/models/settings.rs` test helper `valid_request()`, add: `use_brave_search: false,` + +In `backend/src/services/prompts.rs` test `test_settings()`, add: `use_brave_search: false,` + +In `backend/tests/api_syntheses_test.rs`, add to the settings JSON: `"use_brave_search": false` + +In `e2e/tests/generation-live.spec.ts`, add to the settings object: `use_brave_search: false,` + +- [ ] **Step 5: Update frontend types** + +In `frontend/src/types.ts`, add `use_brave_search: boolean` to `UserSettings` interface (after `use_llm_for_source_links`). + +In `DEFAULT_SETTINGS`, add: `use_brave_search: false,` + +- [ ] **Step 6: Add i18n labels** + +In `frontend/src/i18n/fr.ts`, add after the `'settings.useLlmForSourceLinks'` entry: + +```typescript +'settings.useBraveSearch': 'Utiliser Brave Search pour la recherche web', +'settings.useBraveSearchHelp': 'Remplace la recherche web par IA par l\'API Brave Search pour des resultats plus precis.', +'settings.braveSearch': 'Brave Search', +'settings.braveSearchKey': 'Cle API Brave Search', +'settings.braveSearchKeyHelp': 'Obtenez une cle sur api-dashboard.search.brave.com. Le plan gratuit offre 2000 requetes/mois.', +'settings.braveSearchNotConfigured': 'Configurez une cle API Brave Search pour activer cette option.', +``` + +- [ ] **Step 7: Update CLAUDE.md** + +Change `## Database (21 migrations)` to `## Database (22 migrations)`. + +- [ ] **Step 8: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Run: `cd frontend && npx tsc --noEmit` +Expected: All pass + +- [ ] **Step 9: Commit** + +```bash +git add backend/migrations/20260325000022_add_brave_search_setting.sql \ + backend/src/models/settings.rs backend/src/db/settings.rs \ + backend/src/services/prompts.rs \ + backend/tests/api_syntheses_test.rs \ + e2e/tests/generation-live.spec.ts \ + frontend/src/types.ts frontend/src/i18n/fr.ts CLAUDE.md +git commit -m "feat: add use_brave_search setting" +``` + +--- + +### Task 3: Handle `brave_search` in API key test endpoint + +**Files:** +- Modify: `backend/src/handlers/api_keys.rs` + +- [ ] **Step 1: Add Brave Search test branch** + +In `backend/src/handlers/api_keys.rs`, update the `test_key` function. Before the line `let llm_provider = factory::create_provider(&provider, decrypted_key)?;` (around line 125), add a branch for `brave_search`: + +```rust +// Handle Brave Search key testing separately (not an LLM provider) +if provider == "brave_search" { + let result = crate::services::brave_search::test_api_key( + &state.http_client, + &decrypted_key, + ).await; + + return match result { + Ok(()) => Ok(Json(TestResult { + success: true, + message: "Brave Search API key is valid and working".into(), + })), + Err(e) => { + let message = match &e { + AppError::BadRequest(msg) => msg.clone(), + _ => "Brave Search API key test failed.".into(), + }; + Ok(Json(TestResult { + success: false, + message, + })) + } + }; +} +``` + +- [ ] **Step 2: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 3: Commit** + +```bash +git add backend/src/handlers/api_keys.rs +git commit -m "feat: handle brave_search in API key test endpoint" +``` + +--- + +### Task 4: Phase 2 Brave Search pipeline path + +**Files:** +- Modify: `backend/src/services/synthesis.rs` + +This is the core change. When `use_brave_search` is true, Phase 2 calls Brave Search, filters results, then scrapes+classifies using the same batched loop as Phase 1. + +- [ ] **Step 1: Add Brave key resolution helper** + +At the bottom of `synthesis.rs`, near `resolve_provider_and_key` (around line 962), add: + +```rust +/// Decrypt the Brave Search API key for a user. +async fn resolve_brave_key( + state: &AppState, + user_id: Uuid, +) -> Result { + let master_key = encryption::MasterKey::from_hex(&state.config.master_encryption_key)?; + let key_record = db::api_keys::get_for_user_and_provider( + &state.pool, user_id, "brave_search", + ).await? + .ok_or_else(|| AppError::BadRequest( + "Brave Search est active mais aucune cle API Brave n'est configuree. \ + Veuillez ajouter une cle API Brave Search dans vos parametres.".into(), + ))?; + + encryption::decrypt(&master_key, &key_record.encrypted_key, &key_record.nonce) +} +``` + +- [ ] **Step 2: Add the Brave Phase 2 path** + +In `synthesis.rs`, find the Phase 2 block (starts around line 554 with `// === PHASE 2: Web Search Fallback ===`). The current code checks `if !category_gaps.is_empty()` and then runs the LLM search. Replace the body of that `if` block with a branch: + +```rust +if !category_gaps.is_empty() { + if settings.use_brave_search { + // === BRAVE SEARCH PATH === + emit_progress(tx, "search", "Recherche Brave Search...", 70); + + let brave_key = resolve_brave_key(state, user_id).await?; + let query = format!("{} actualites", settings.theme); + let brave_results = crate::services::brave_search::search( + &state.http_client, &brave_key, &query, 20, settings.max_age_days, + ).await?; + + tracing::info!(results = brave_results.len(), "Brave Search returned results"); + + // Filter Brave results (same filters as existing Phase 2) + let mut brave_urls: Vec = Vec::new(); + for result in &brave_results { + let url_lower = result.url.to_lowercase(); + + // Homepage filter + if let Ok(parsed_url) = url::Url::parse(&result.url) { + let path = parsed_url.path(); + if path.is_empty() || path == "/" { + trace_article(&state.pool, user_id, job_id, &result.url, &result.title, "brave_search", None, None, None, "filtered_homepage", false).await; + continue; + } + } + + // Cross-phase dedup + if seen_urls.contains(&url_lower) { + trace_article(&state.pool, user_id, job_id, &result.url, &result.title, "brave_search", None, None, None, "filtered_cross_phase_dedup", false).await; + continue; + } + + // History dedup + if settings.article_history_days > 0 { + let hash = hash_article_url(&result.url); + let exists = db::article_history::check_urls_exist(&state.pool, user_id, std::slice::from_ref(&hash)).await.unwrap_or_default(); + if exists.contains(&hash) { + trace_article(&state.pool, user_id, job_id, &result.url, &result.title, "brave_search", None, None, None, "filtered_history", false).await; + continue; + } + } + + // Source diversity + if let Some(domain) = extract_domain(&result.url) { + let count = source_counts.get(&domain).copied().unwrap_or(0); + if count >= settings.max_articles_per_source as usize { + trace_article(&state.pool, user_id, job_id, &result.url, &result.title, "brave_search", None, None, None, "filtered_diversity", false).await; + continue; + } + } + + seen_urls.insert(url_lower); + brave_urls.push(result.url.clone()); + } + + // Scrape + classify in batches (same loop as Phase 1) + if !brave_urls.is_empty() { + emit_progress(tx, "processing", "Traitement des articles Brave...", 75); + let total_candidates = brave_urls.len(); + let batch_size = settings.batch_size.max(1) as usize; + let mut processed = 0usize; + let mut candidates_iter = brave_urls.into_iter(); + let mut done = false; + + while !done { + let mut batch: Vec = Vec::new(); + while batch.len() < batch_size { + let Some(url) = candidates_iter.next() else { break }; + batch.push(url); + } + + if batch.is_empty() { break; } + + let pct = 75 + ((processed as u32 * 15) / total_candidates.max(1) as u32).min(15); + emit_progress(tx, "processing", &format!("Articles Brave {}-{}/{}...", processed + 1, processed + batch.len(), total_candidates), pct as u8); + + // Scrape batch in parallel + let mut scrape_set = tokio::task::JoinSet::new(); + for url in &batch { + let client = state.http_client.clone(); + let u = url.clone(); + let mad = settings.max_age_days as i64; + scrape_set.spawn(async move { + let result = scrape_single_article(&client, &u, mad).await; + (u, result) + }); + } + + let mut scraped_articles: Vec<(String, String, String)> = Vec::new(); // (url, body_text, page_title) + while let Some(join_result) = scrape_set.join_next().await { + if let Ok((_url, (body_text, page_title, final_url, drop_reason))) = join_result { + if let Some(reason) = drop_reason { + trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "brave_search", None, None, None, reason, false).await; + } else { + scraped_articles.push((final_url, body_text, page_title)); + } + } + } + + if scraped_articles.is_empty() { + processed += batch.len(); + continue; + } + + // Classify/summarize in parallel + check_rate_limit(state, &user_rate_limiter, &provider_name).await?; + + let mut classify_set = tokio::task::JoinSet::new(); + for (final_url, body_text, page_title) in &scraped_articles { + let provider_clone = std::sync::Arc::clone(&provider); + let model = model_research.clone(); + let schema = classify_schema.clone(); + let cats = classification_categories.clone(); + let body_snippet: String = body_text.chars().take(500).collect(); + let title = page_title.clone(); + let url = final_url.clone(); + let pool = state.pool.clone(); + let uid = user_id; + let jid = job_id; + + let (class_sys, class_user) = crate::services::prompts::build_article_classify_prompt(&title, &body_snippet, &cats); + let sys = class_sys.clone(); + let usr = class_user.clone(); + let mdl = model.clone(); + + classify_set.spawn(async move { + let llm_start = std::time::Instant::now(); + let result = provider_clone.call_llm(&mdl, &sys, &usr, &schema).await; + let duration = llm_start.elapsed().as_millis() as u64; + + if let Ok(ref resp) = result { + let resp_str = serde_json::to_string_pretty(resp).unwrap_or_default(); + crate::db::llm_call_log::insert(&pool, uid, jid, "classify_summarize", &mdl, &sys, &usr, &resp_str, duration as i32, Some(&url)).await.ok(); + } + + (url, title, result) + }); + } + + while let Some(join_result) = classify_set.join_next().await { + if let Ok((final_url, page_title, llm_result)) = join_result { + let class_response = match llm_result { + Ok(resp) => resp, + Err(e) => { + tracing::warn!(url = %final_url, error = %e, "LLM classify failed, skipping article"); + continue; + } + }; + + let llm_title = class_response.get("title").and_then(|t| t.as_str()).unwrap_or(&page_title).to_string(); + let llm_summary = class_response.get("summary").and_then(|s| s.as_str()).unwrap_or("").to_string(); + let mut llm_category = class_response.get("category").and_then(|c| c.as_str()).unwrap_or("Autre").to_string(); + + if !classification_categories.iter().any(|c| c.to_lowercase() == llm_category.to_lowercase()) { + llm_category = "Autre".to_string(); + } + + let cat_key = if llm_category.to_lowercase() == "autre" { + "category_autre".to_string() + } else { + user_categories.iter().position(|c| c.to_lowercase() == llm_category.to_lowercase()) + .map(|i| format!("category_{}", i)) + .unwrap_or_else(|| "category_autre".to_string()) + }; + + let cat_filled = filled_counts.get(&llm_category).copied().unwrap_or(0); + let (final_cat_key, final_cat_name) = if cat_filled >= settings.max_items_per_category as usize && llm_category.to_lowercase() != "autre" { + let autre_filled = filled_counts.get("Autre").copied().unwrap_or(0); + if autre_filled >= settings.max_items_per_category as usize { + continue; + } + ("category_autre".to_string(), "Autre".to_string()) + } else { + (cat_key, llm_category) + }; + + article_scraped.entry(final_cat_key).or_default().push(NewsItem { + title: llm_title, + url: final_url.clone(), + summary: llm_summary, + }); + *filled_counts.entry(final_cat_name).or_insert(0) += 1; + + if let Some(domain) = extract_domain(&final_url) { + *source_counts.entry(domain).or_insert(0) += 1; + } + } + } + + processed += batch.len(); + + let total: usize = article_scraped.values().map(|v| v.len()).sum(); + if total >= max_total { + done = true; + } + } + } + } else { + // === EXISTING LLM SEARCH PATH (unchanged) === + emit_progress(tx, "search", "Recherche d'actualites complementaires...", 70); + check_rate_limit(state, &user_rate_limiter, &provider_name).await?; + + // ... (keep entire existing LLM search block) + } +} +``` + +Also update the source_type in the final save section (around line 677). Currently it checks `if source_url.is_some() { "personalized_source" } else { "web_search" }`. This should also handle Brave articles. Since Brave articles won't have a `source_url` in `url_source`, they'll correctly get `"web_search"`. To distinguish, we could track them, but the spec says to use `"brave_search"` — so add Brave URLs to `url_source` with a sentinel value: + +After filtering each Brave URL (after `seen_urls.insert(url_lower);`), add: + +```rust +url_source.insert(result.url.clone(), "brave_search".to_string()); +``` + +Then update the final save source_type logic (around line 677): + +```rust +let source_type = match url_source.get(&item.url).map(|s| s.as_str()) { + Some("brave_search") => "brave_search", + Some(_) => "personalized_source", + None => "web_search", +}; +trace_article(&state.pool, user_id, job_id, &item.url, &item.title, + source_type, + if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None }, + Some(§ion.title), Some(synthesis.id), "used", true).await; +``` + +- [ ] **Step 3: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add backend/src/services/synthesis.rs +git commit -m "feat: add Brave Search Phase 2 pipeline path" +``` + +--- + +### Task 5: Frontend — Brave Search section in Settings + +**Files:** +- Modify: `frontend/src/pages/Settings.tsx` + +- [ ] **Step 1: Add Brave Search section to Settings page** + +In `frontend/src/pages/Settings.tsx`, add a new section after the "Advanced extraction" section (after the `useLlmSourceLinks` checkbox block, around line 487). This section contains: +- A Brave API key input (standalone, not using `ApiKeyManager`) +- The `use_brave_search` toggle (disabled unless key is configured) + +The section needs to: +1. Check if a Brave key exists (from the `apiKeys` resource already loaded for `ApiKeyManager`) +2. Show a key input if not configured, key prefix + delete if configured +3. Show the toggle, disabled if no key + +```tsx +{/* Brave Search */} +
+

+ {t('settings.braveSearch')} +

+

{t('settings.useBraveSearchHelp')}

+ + {/* Brave API Key */} +
+ +

{t('settings.braveSearchKeyHelp')}

+ {/* Key input/display using apiKeysApi directly */} + {/* Similar to ProviderKeyCard but standalone */} +
+ + {/* Toggle */} +
+ + setSettings((prev) => ({ + ...prev, + use_brave_search: e.currentTarget.checked, + })) + } + class="h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 rounded disabled:opacity-50" + /> + +
+ +

{t('settings.braveSearchNotConfigured')}

+
+
+``` + +The `hasBraveKey()` signal checks whether the `apiKeys` resource contains a key with `provider_name === "brave_search"`. + +For the key input, create a minimal inline component with: input field, save button (calls `apiKeysApi.create`), and when configured: prefix display + delete button (calls `apiKeysApi.remove`, and if `use_brave_search` was on, also calls settings save with `use_brave_search: false`). + +- [ ] **Step 2: TypeScript check** + +Run: `cd frontend && npx tsc --noEmit` +Expected: No errors + +- [ ] **Step 3: Commit** + +```bash +git add frontend/src/pages/Settings.tsx +git commit -m "feat: add Brave Search section to Settings page" +```