mod common; use ai_synth_backend::services::llm::mock::MockLlmProvider; use ai_synth_backend::services::synthesis; use std::sync::Arc; use tokio::sync::watch; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; async fn setup_mock_server() -> MockServer { let server = MockServer::start().await; // Source page with links to articles (for Phase 1 heuristic extraction) let base = server.uri(); Mock::given(method("GET")) .and(path("/blog")) .respond_with(ResponseTemplate::new(200).set_body_string(format!( r#" Article One Article Two Article Three "# ))) .mount(&server) .await; // Article pages for i in 1..=5 { Mock::given(method("GET")) .and(path(format!("/article-{}", i))) .respond_with(ResponseTemplate::new(200).set_body_string(format!( r#" Test Article {i}

This is the content of test article {i} about artificial intelligence.

"# ))) .mount(&server) .await; } server } async fn setup_user_with_settings( app: &common::TestApp, categories: Vec<&str>, max_items: i32, use_llm_for_links: bool, ) -> (uuid::Uuid, String) { let email = format!("pipeline-{}@test.com", uuid::Uuid::new_v4()); let (user_id, session) = app.create_authenticated_user(&email).await; let categories_json: Vec = categories.iter().map(|c| serde_json::json!(c)).collect(); let settings = serde_json::json!({ "theme": "Intelligence Artificielle", "max_age_days": 365, "categories": categories_json, "max_items_per_category": max_items, "max_articles_per_source": 10, "use_llm_for_source_links": use_llm_for_links, "use_brave_search": false, "article_history_days": 90, "batch_size": 5, "search_agent_behavior": "", "ai_provider": "", "ai_model": "", "ai_model_websearch": "", "rate_limit_max_requests": null, "rate_limit_time_window_seconds": null }); let (status, _) = app.put_with_session("/api/v1/settings", &settings, &session).await; assert_eq!(status.as_u16(), 200, "Settings save should succeed"); (user_id, session) } fn make_progress_channel() -> (Arc>, watch::Receiver) { let (tx, rx) = watch::channel(synthesis::ProgressEvent::Progress { step: "init".into(), message: "Starting...".into(), percent: 0, }); (Arc::new(tx), rx) } #[tokio::test] async fn phase1_with_llm_link_extraction_classifies_articles() { let app = common::TestApp::new().await; let mock_server = setup_mock_server().await; // Use LLM link extraction to bypass SSRF on source page let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await; // Add a source pointing to wiremock (same host as article URLs) let source_url = format!("{}/blog", mock_server.uri()); let source = serde_json::json!({"title": "Test Source", "url": source_url}); let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await; assert!(status.is_success()); // Mock provider: LLM link extraction returns wiremock article URLs (same domain) let article_urls: Vec = (1..=3) .map(|i| format!("{}/article-{}", mock_server.uri(), i)) .collect(); let mock_provider = MockLlmProvider::new() .with_default_category("AI News") .with_link_urls(article_urls) .into_arc(); let job_id = uuid::Uuid::new_v4(); let (tx, _rx) = make_progress_channel(); let state = ai_synth_backend::app_state::AppState::new( app.config.clone(), app.pool.clone(), reqwest::Client::new(), ); let result = synthesis::run_generation_inner( job_id, &state, user_id, &tx, Some(mock_provider), ).await; assert!(result.is_ok(), "Generation should succeed: {:?}", result.err()); let synthesis_id = result.unwrap(); // Verify synthesis was saved with articles let row: (serde_json::Value,) = sqlx::query_as( "SELECT sections FROM syntheses WHERE id = $1" ) .bind(synthesis_id) .fetch_one(&app.pool) .await .expect("Synthesis should exist"); let sections: Vec = serde_json::from_value(row.0).unwrap(); assert!(!sections.is_empty(), "Should have at least one section"); let first_section = §ions[0]; assert_eq!(first_section["title"], "AI News"); let items = first_section["items"].as_array().unwrap(); assert!(!items.is_empty(), "AI News section should have articles"); // Verify article history was recorded let history_count: (i64,) = sqlx::query_as( "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2" ) .bind(user_id) .bind(job_id) .fetch_one(&app.pool) .await .unwrap(); assert!(history_count.0 > 0, "Article history should have entries"); } #[tokio::test] async fn phase2_search_fills_gaps_when_no_sources() { let app = common::TestApp::new().await; let mock_server = setup_mock_server().await; // No sources — Phase 1 produces nothing let (user_id, _session) = setup_user_with_settings(&app, vec!["AI News"], 2, false).await; let mock_provider = MockLlmProvider::new() .with_default_category("AI News") .with_search_urls(vec![ format!("{}/article-1", mock_server.uri()), format!("{}/article-2", mock_server.uri()), ]) .into_arc(); let job_id = uuid::Uuid::new_v4(); let (tx, _rx) = make_progress_channel(); let state = ai_synth_backend::app_state::AppState::new( app.config.clone(), app.pool.clone(), reqwest::Client::new(), ); let result = synthesis::run_generation_inner( job_id, &state, user_id, &tx, Some(mock_provider), ).await; assert!(result.is_ok(), "Generation should succeed: {:?}", result.err()); // Verify synthesis has articles from Phase 2 let synthesis_id = result.unwrap(); let row: (serde_json::Value,) = sqlx::query_as( "SELECT sections FROM syntheses WHERE id = $1" ) .bind(synthesis_id) .fetch_one(&app.pool) .await .unwrap(); let sections: Vec = serde_json::from_value(row.0).unwrap(); assert!(!sections.is_empty(), "Should have sections from Phase 2 search"); } #[tokio::test] async fn category_overflow_spills_to_autre() { let app = common::TestApp::new().await; let mock_server = setup_mock_server().await; // max_items_per_category=1, but LLM classifies all articles to "AI News" let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await; let source_url = format!("{}/blog", mock_server.uri()); let source = serde_json::json!({"title": "Test Source", "url": source_url}); app.post_with_session("/api/v1/sources", &source, &session).await; let article_urls: Vec = (1..=3) .map(|i| format!("{}/article-{}", mock_server.uri(), i)) .collect(); let mock_provider = MockLlmProvider::new() .with_default_category("AI News") .with_link_urls(article_urls) .into_arc(); let job_id = uuid::Uuid::new_v4(); let (tx, _rx) = make_progress_channel(); let state = ai_synth_backend::app_state::AppState::new( app.config.clone(), app.pool.clone(), reqwest::Client::new(), ); let result = synthesis::run_generation_inner( job_id, &state, user_id, &tx, Some(mock_provider), ).await; assert!(result.is_ok(), "Generation should succeed"); let synthesis_id = result.unwrap(); let row: (serde_json::Value,) = sqlx::query_as( "SELECT sections FROM syntheses WHERE id = $1" ) .bind(synthesis_id) .fetch_one(&app.pool) .await .unwrap(); let sections: Vec = serde_json::from_value(row.0).unwrap(); // With max_items_per_category=1 and 3 articles all classified as "AI News": // - 1 goes to AI News // - Overflow goes to Autre let ai_section = sections.iter().find(|s| s["title"] == "AI News"); let autre_section = sections.iter().find(|s| s["title"] == "Autre"); assert!(ai_section.is_some(), "Should have AI News section"); let ai_items = ai_section.unwrap()["items"].as_array().unwrap(); assert_eq!(ai_items.len(), 1, "AI News should have exactly 1 item (max)"); if sections.len() > 1 { assert!(autre_section.is_some(), "Overflow should create Autre section"); } }