diff --git a/backend/Cargo.lock b/backend/Cargo.lock index 08dc5f6..184164a 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -87,6 +87,7 @@ dependencies = [ "tracing-subscriber", "url", "uuid", + "wiremock", "zeroize", ] @@ -161,6 +162,16 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -549,6 +560,24 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "der" version = "0.7.10" @@ -992,6 +1021,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1664,6 +1699,16 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -2021,6 +2066,18 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.14" @@ -3587,6 +3644,29 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64", + "deadpool", + "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/backend/Cargo.toml b/backend/Cargo.toml index b94ba2b..5958aca 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -86,3 +86,4 @@ serde_json = "1" uuid = { version = "1", features = ["v4"] } tokio = { version = "1", features = ["full"] } sqlx = { version = "0.8", features = ["runtime-tokio", "tls-rustls", "postgres", "uuid", "chrono", "json", "migrate"] } +wiremock = "0.6" diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs new file mode 100644 index 0000000..e9eba57 --- /dev/null +++ b/backend/tests/pipeline_test.rs @@ -0,0 +1,239 @@ +mod common; + +use ai_synth_backend::services::llm::mock::MockLlmProvider; +use ai_synth_backend::services::synthesis; +use std::sync::Arc; +use tokio::sync::watch; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +async fn setup_mock_server() -> MockServer { + let server = MockServer::start().await; + + // Article pages + for i in 1..=5 { + Mock::given(method("GET")) + .and(path(format!("/article-{}", i))) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#" + Test Article {i} +

This is the content of test article {i} about artificial intelligence.

+ "# + ))) + .mount(&server) + .await; + } + + server +} + +async fn setup_user_with_settings( + app: &common::TestApp, + categories: Vec<&str>, + max_items: i32, + use_llm_for_links: bool, +) -> (uuid::Uuid, String) { + let email = format!("pipeline-{}@test.com", uuid::Uuid::new_v4()); + let (user_id, session) = app.create_authenticated_user(&email).await; + + let categories_json: Vec = categories.iter().map(|c| serde_json::json!(c)).collect(); + let settings = serde_json::json!({ + "theme": "Intelligence Artificielle", + "max_age_days": 365, + "categories": categories_json, + "max_items_per_category": max_items, + "max_articles_per_source": 10, + "use_llm_for_source_links": use_llm_for_links, + "use_brave_search": false, + "article_history_days": 90, + "batch_size": 5, + "search_agent_behavior": "", + "ai_provider": "", + "ai_model": "", + "ai_model_websearch": "", + "rate_limit_max_requests": null, + "rate_limit_time_window_seconds": null + }); + let (status, _) = app.put_with_session("/api/v1/settings", &settings, &session).await; + assert_eq!(status.as_u16(), 200, "Settings save should succeed"); + + (user_id, session) +} + +fn make_progress_channel() -> (Arc>, watch::Receiver) { + let (tx, rx) = watch::channel(synthesis::ProgressEvent::Progress { + step: "init".into(), + message: "Starting...".into(), + percent: 0, + }); + (Arc::new(tx), rx) +} + +#[tokio::test] +async fn phase1_with_llm_link_extraction_classifies_articles() { + let app = common::TestApp::new().await; + let mock_server = setup_mock_server().await; + + // Use LLM link extraction to bypass SSRF on source page + let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await; + + // Add a source (URL doesn't matter much — LLM mock will return article URLs) + let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"}); + let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await; + assert!(status.is_success()); + + // Mock provider: LLM link extraction returns wiremock article URLs + let article_urls: Vec = (1..=3) + .map(|i| format!("{}/article-{}", mock_server.uri(), i)) + .collect(); + + let mock_provider = MockLlmProvider::new() + .with_default_category("AI News") + .with_link_urls(article_urls) + .into_arc(); + + let job_id = uuid::Uuid::new_v4(); + let (tx, _rx) = make_progress_channel(); + + let state = ai_synth_backend::app_state::AppState::new( + app.config.clone(), app.pool.clone(), reqwest::Client::new(), + ); + + let result = synthesis::run_generation_inner( + job_id, &state, user_id, &tx, Some(mock_provider), + ).await; + + assert!(result.is_ok(), "Generation should succeed: {:?}", result.err()); + + let synthesis_id = result.unwrap(); + + // Verify synthesis was saved with articles + let row: (serde_json::Value,) = sqlx::query_as( + "SELECT sections FROM syntheses WHERE id = $1" + ) + .bind(synthesis_id) + .fetch_one(&app.pool) + .await + .expect("Synthesis should exist"); + + let sections: Vec = serde_json::from_value(row.0).unwrap(); + assert!(!sections.is_empty(), "Should have at least one section"); + + let first_section = §ions[0]; + assert_eq!(first_section["title"], "AI News"); + let items = first_section["items"].as_array().unwrap(); + assert!(!items.is_empty(), "AI News section should have articles"); + + // Verify article history was recorded + let history_count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2" + ) + .bind(user_id) + .bind(job_id) + .fetch_one(&app.pool) + .await + .unwrap(); + assert!(history_count.0 > 0, "Article history should have entries"); +} + +#[tokio::test] +async fn phase2_search_fills_gaps_when_no_sources() { + let app = common::TestApp::new().await; + let mock_server = setup_mock_server().await; + + // No sources — Phase 1 produces nothing + let (user_id, _session) = setup_user_with_settings(&app, vec!["AI News"], 2, false).await; + + let mock_provider = MockLlmProvider::new() + .with_default_category("AI News") + .with_search_urls(vec![ + format!("{}/article-1", mock_server.uri()), + format!("{}/article-2", mock_server.uri()), + ]) + .into_arc(); + + let job_id = uuid::Uuid::new_v4(); + let (tx, _rx) = make_progress_channel(); + + let state = ai_synth_backend::app_state::AppState::new( + app.config.clone(), app.pool.clone(), reqwest::Client::new(), + ); + + let result = synthesis::run_generation_inner( + job_id, &state, user_id, &tx, Some(mock_provider), + ).await; + + assert!(result.is_ok(), "Generation should succeed: {:?}", result.err()); + + // Verify synthesis has articles from Phase 2 + let synthesis_id = result.unwrap(); + let row: (serde_json::Value,) = sqlx::query_as( + "SELECT sections FROM syntheses WHERE id = $1" + ) + .bind(synthesis_id) + .fetch_one(&app.pool) + .await + .unwrap(); + + let sections: Vec = serde_json::from_value(row.0).unwrap(); + assert!(!sections.is_empty(), "Should have sections from Phase 2 search"); +} + +#[tokio::test] +async fn category_overflow_spills_to_autre() { + let app = common::TestApp::new().await; + let mock_server = setup_mock_server().await; + + // max_items_per_category=1, but LLM classifies all articles to "AI News" + let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await; + + let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"}); + app.post_with_session("/api/v1/sources", &source, &session).await; + + let article_urls: Vec = (1..=3) + .map(|i| format!("{}/article-{}", mock_server.uri(), i)) + .collect(); + + let mock_provider = MockLlmProvider::new() + .with_default_category("AI News") + .with_link_urls(article_urls) + .into_arc(); + + let job_id = uuid::Uuid::new_v4(); + let (tx, _rx) = make_progress_channel(); + + let state = ai_synth_backend::app_state::AppState::new( + app.config.clone(), app.pool.clone(), reqwest::Client::new(), + ); + + let result = synthesis::run_generation_inner( + job_id, &state, user_id, &tx, Some(mock_provider), + ).await; + + assert!(result.is_ok(), "Generation should succeed"); + + let synthesis_id = result.unwrap(); + let row: (serde_json::Value,) = sqlx::query_as( + "SELECT sections FROM syntheses WHERE id = $1" + ) + .bind(synthesis_id) + .fetch_one(&app.pool) + .await + .unwrap(); + + let sections: Vec = serde_json::from_value(row.0).unwrap(); + + // With max_items_per_category=1 and 3 articles all classified as "AI News": + // - 1 goes to AI News + // - Overflow goes to Autre + let ai_section = sections.iter().find(|s| s["title"] == "AI News"); + let autre_section = sections.iter().find(|s| s["title"] == "Autre"); + + assert!(ai_section.is_some(), "Should have AI News section"); + let ai_items = ai_section.unwrap()["items"].as_array().unwrap(); + assert_eq!(ai_items.len(), 1, "AI News should have exactly 1 item (max)"); + + if sections.len() > 1 { + assert!(autre_section.is_some(), "Overflow should create Autre section"); + } +}