diff --git a/backend/src/services/synthesis/mod.rs b/backend/src/services/synthesis/mod.rs index 5cdc8ec..4a9693d 100644 --- a/backend/src/services/synthesis/mod.rs +++ b/backend/src/services/synthesis/mod.rs @@ -28,6 +28,7 @@ use crate::services::llm::factory::create_provider; use crate::services::scraper; use crate::services::source_scraper; use crate::services::feed_parser; +use crate::services::site_search; mod helpers; pub(crate) use helpers::{ @@ -156,6 +157,22 @@ pub async fn run_generation_inner( let model_research = Arc::new(model_research); let classification_categories = Arc::new(classification_categories); + // Build the site search fallback provider (Brave if available, else LLM) + let site_search_provider = if settings.use_brave_search { + match resolve_brave_key(state, user_id).await { + Ok(key) => Arc::new(site_search::SiteSearchProvider::Brave { api_key: key }), + Err(_) => Arc::new(site_search::SiteSearchProvider::Llm { + provider: provider.clone(), + model: model_websearch.clone(), + }), + } + } else { + Arc::new(site_search::SiteSearchProvider::Llm { + provider: provider.clone(), + model: model_websearch.clone(), + }) + }; + // === PHASE 1: Personalized Sources === if !sources.is_empty() { emit_progress(tx, "sources", "Analyse des sources personnalisees...", 15); @@ -203,6 +220,9 @@ pub async fn run_generation_inner( let rss_url = source.rss_url.clone(); let rss_discovered_at = source.rss_discovered_at; let max_l = max_links; + let ss_provider = site_search_provider.clone(); + let ss_theme = theme.theme.clone(); + let ss_max_age = theme.max_age_days; join_set.spawn(async move { // Try RSS feed first let feed_result = feed_parser::detect_and_parse_feed( @@ -239,7 +259,29 @@ pub async fn run_generation_inner( feed_parser::FeedResult::Found { .. } => { // Feed found but too few entries — keep the cache, fall back to HTML let links = source_scraper::extract_article_links(&client, &source_url, max_l).await; - (source_url, source_title, links, None) + match links { + Ok(ref l) if l.is_empty() => { + // HTML also returned 0 links — try site search fallback + if let Some(domain) = crate::services::synthesis::extract_domain(&source_url) { + let ss_config = site_search::SiteSearchConfig { + domain, + theme: ss_theme, + max_results: max_l, + max_age_days: ss_max_age, + }; + let ss_links = site_search::search(&client, &ss_config, &ss_provider).await; + if !ss_links.is_empty() { + tracing::info!(source = %source_title, links = ss_links.len(), "Site search fallback produced links"); + (source_url, source_title, Ok(ss_links), None) + } else { + (source_url, source_title, links, None) + } + } else { + (source_url, source_title, links, None) + } + } + _ => (source_url, source_title, links, None), + } } feed_parser::FeedResult::NotFound => { // No feed discovered — fall back to HTML and clear any stale cache @@ -249,7 +291,29 @@ pub async fn run_generation_inner( } else { None }; - (source_url, source_title, links, update) + match links { + Ok(ref l) if l.is_empty() => { + // HTML also returned 0 links — try site search fallback + if let Some(domain) = crate::services::synthesis::extract_domain(&source_url) { + let ss_config = site_search::SiteSearchConfig { + domain, + theme: ss_theme, + max_results: max_l, + max_age_days: ss_max_age, + }; + let ss_links = site_search::search(&client, &ss_config, &ss_provider).await; + if !ss_links.is_empty() { + tracing::info!(source = %source_title, links = ss_links.len(), "Site search fallback produced links"); + (source_url, source_title, Ok(ss_links), update) + } else { + (source_url, source_title, links, update) + } + } else { + (source_url, source_title, links, update) + } + } + _ => (source_url, source_title, links, update), + } } } });