diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index 516e6c8..464a0fc 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -314,42 +314,28 @@ async fn run_generation_inner( // Step 7b: Filter out homepage URLs (path == "/" or empty) let parsed = filter_homepage_urls(parsed); - // Step 8: Adaptive pipeline — decide whether to scrape+rewrite or use search results directly + // Step 8: Scrape + rewrite pass // - // If the provider supports native web search and the search pass produced high-quality - // results (>70% valid URLs starting with http), we can skip the expensive scrape+rewrite - // pass and use the search results directly. - let final_sections = if provider.supports_web_search() && url_quality_sufficient(&parsed) { - tracing::info!( - provider = provider.provider_id(), - "Search pass URL quality sufficient, skipping scrape+rewrite pass" - ); - emit_progress( - tx, - "finalizing", - "Resultats de recherche de bonne qualite, finalisation directe...", - 85, - ); - build_final_sections(&raw_results, &settings.categories)? - } else { - // Full pipeline: scrape + rewrite - emit_progress(tx, "scraping", "Verification des sources...", 45); - let scraped = scrape_articles(state, &parsed, settings.max_age_days as i64, tx).await; - - // Rate limit check (pass 2) - check_rate_limit(state, &user_rate_limiter, &provider_name)?; + // Always run the full pipeline: the search pass URLs can be hallucinated + // by the LLM (Wikipedia, corporate sites instead of actual articles). + // The scrape pass fetches each URL and validates the content exists, + // then the rewrite pass produces summaries based on actual article content. + emit_progress(tx, "scraping", "Verification des sources...", 45); + let scraped = scrape_articles(state, &parsed, settings.max_age_days as i64, tx).await; + + // Rate limit check (pass 2) + check_rate_limit(state, &user_rate_limiter, &provider_name)?; - // LLM rewrite pass - emit_progress(tx, "rewrite", "Redaction des resumes...", 80); - let (rewrite_system, rewrite_user) = prompts::build_rewrite_prompt(&scraped); + // LLM rewrite pass + emit_progress(tx, "rewrite", "Redaction des resumes...", 80); + let (rewrite_system, rewrite_user) = prompts::build_rewrite_prompt(&scraped); - let final_results = provider - .generate_rewrite_pass(&model_writing, &rewrite_system, &rewrite_user, &schema) - .await?; + let final_results = provider + .generate_rewrite_pass(&model_writing, &rewrite_system, &rewrite_user, &schema) + .await?; - emit_progress(tx, "finalizing", "Finalisation...", 90); - build_final_sections(&final_results, &settings.categories)? - }; + emit_progress(tx, "finalizing", "Finalisation...", 90); + let final_sections = build_final_sections(&final_results, &settings.categories)?; // Step 12: Save synthesis to DB emit_progress(tx, "saving", "Sauvegarde de la synthese...", 95);