diff --git a/backend/Cargo.lock b/backend/Cargo.lock
index 08dc5f6..184164a 100644
--- a/backend/Cargo.lock
+++ b/backend/Cargo.lock
@@ -87,6 +87,7 @@ dependencies = [
"tracing-subscriber",
"url",
"uuid",
+ "wiremock",
"zeroize",
]
@@ -161,6 +162,16 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+[[package]]
+name = "assert-json-diff"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
[[package]]
name = "async-trait"
version = "0.1.89"
@@ -549,6 +560,24 @@ dependencies = [
"parking_lot_core",
]
+[[package]]
+name = "deadpool"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b"
+dependencies = [
+ "deadpool-runtime",
+ "lazy_static",
+ "num_cpus",
+ "tokio",
+]
+
+[[package]]
+name = "deadpool-runtime"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
+
[[package]]
name = "der"
version = "0.7.10"
@@ -992,6 +1021,12 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
[[package]]
name = "hex"
version = "0.4.3"
@@ -1664,6 +1699,16 @@ dependencies = [
"libm",
]
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
[[package]]
name = "once_cell"
version = "1.21.4"
@@ -2021,6 +2066,18 @@ dependencies = [
"bitflags",
]
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
[[package]]
name = "regex-automata"
version = "0.4.14"
@@ -3587,6 +3644,29 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+[[package]]
+name = "wiremock"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031"
+dependencies = [
+ "assert-json-diff",
+ "base64",
+ "deadpool",
+ "futures",
+ "http",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "log",
+ "once_cell",
+ "regex",
+ "serde",
+ "serde_json",
+ "tokio",
+ "url",
+]
+
[[package]]
name = "wit-bindgen"
version = "0.51.0"
diff --git a/backend/Cargo.toml b/backend/Cargo.toml
index b94ba2b..5958aca 100644
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -86,3 +86,4 @@ serde_json = "1"
uuid = { version = "1", features = ["v4"] }
tokio = { version = "1", features = ["full"] }
sqlx = { version = "0.8", features = ["runtime-tokio", "tls-rustls", "postgres", "uuid", "chrono", "json", "migrate"] }
+wiremock = "0.6"
diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs
new file mode 100644
index 0000000..e9eba57
--- /dev/null
+++ b/backend/tests/pipeline_test.rs
@@ -0,0 +1,239 @@
+mod common;
+
+use ai_synth_backend::services::llm::mock::MockLlmProvider;
+use ai_synth_backend::services::synthesis;
+use std::sync::Arc;
+use tokio::sync::watch;
+use wiremock::matchers::{method, path};
+use wiremock::{Mock, MockServer, ResponseTemplate};
+
+async fn setup_mock_server() -> MockServer {
+ let server = MockServer::start().await;
+
+ // Article pages
+ for i in 1..=5 {
+ Mock::given(method("GET"))
+ .and(path(format!("/article-{}", i)))
+ .respond_with(ResponseTemplate::new(200).set_body_string(format!(
+ r#"
+
Test Article {i}
+ This is the content of test article {i} about artificial intelligence.
+ "#
+ )))
+ .mount(&server)
+ .await;
+ }
+
+ server
+}
+
+async fn setup_user_with_settings(
+ app: &common::TestApp,
+ categories: Vec<&str>,
+ max_items: i32,
+ use_llm_for_links: bool,
+) -> (uuid::Uuid, String) {
+ let email = format!("pipeline-{}@test.com", uuid::Uuid::new_v4());
+ let (user_id, session) = app.create_authenticated_user(&email).await;
+
+ let categories_json: Vec = categories.iter().map(|c| serde_json::json!(c)).collect();
+ let settings = serde_json::json!({
+ "theme": "Intelligence Artificielle",
+ "max_age_days": 365,
+ "categories": categories_json,
+ "max_items_per_category": max_items,
+ "max_articles_per_source": 10,
+ "use_llm_for_source_links": use_llm_for_links,
+ "use_brave_search": false,
+ "article_history_days": 90,
+ "batch_size": 5,
+ "search_agent_behavior": "",
+ "ai_provider": "",
+ "ai_model": "",
+ "ai_model_websearch": "",
+ "rate_limit_max_requests": null,
+ "rate_limit_time_window_seconds": null
+ });
+ let (status, _) = app.put_with_session("/api/v1/settings", &settings, &session).await;
+ assert_eq!(status.as_u16(), 200, "Settings save should succeed");
+
+ (user_id, session)
+}
+
+fn make_progress_channel() -> (Arc>, watch::Receiver) {
+ let (tx, rx) = watch::channel(synthesis::ProgressEvent::Progress {
+ step: "init".into(),
+ message: "Starting...".into(),
+ percent: 0,
+ });
+ (Arc::new(tx), rx)
+}
+
+#[tokio::test]
+async fn phase1_with_llm_link_extraction_classifies_articles() {
+ let app = common::TestApp::new().await;
+ let mock_server = setup_mock_server().await;
+
+ // Use LLM link extraction to bypass SSRF on source page
+ let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await;
+
+ // Add a source (URL doesn't matter much — LLM mock will return article URLs)
+ let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"});
+ let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
+ assert!(status.is_success());
+
+ // Mock provider: LLM link extraction returns wiremock article URLs
+ let article_urls: Vec = (1..=3)
+ .map(|i| format!("{}/article-{}", mock_server.uri(), i))
+ .collect();
+
+ let mock_provider = MockLlmProvider::new()
+ .with_default_category("AI News")
+ .with_link_urls(article_urls)
+ .into_arc();
+
+ let job_id = uuid::Uuid::new_v4();
+ let (tx, _rx) = make_progress_channel();
+
+ let state = ai_synth_backend::app_state::AppState::new(
+ app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+ );
+
+ let result = synthesis::run_generation_inner(
+ job_id, &state, user_id, &tx, Some(mock_provider),
+ ).await;
+
+ assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
+
+ let synthesis_id = result.unwrap();
+
+ // Verify synthesis was saved with articles
+ let row: (serde_json::Value,) = sqlx::query_as(
+ "SELECT sections FROM syntheses WHERE id = $1"
+ )
+ .bind(synthesis_id)
+ .fetch_one(&app.pool)
+ .await
+ .expect("Synthesis should exist");
+
+ let sections: Vec = serde_json::from_value(row.0).unwrap();
+ assert!(!sections.is_empty(), "Should have at least one section");
+
+ let first_section = §ions[0];
+ assert_eq!(first_section["title"], "AI News");
+ let items = first_section["items"].as_array().unwrap();
+ assert!(!items.is_empty(), "AI News section should have articles");
+
+ // Verify article history was recorded
+ let history_count: (i64,) = sqlx::query_as(
+ "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2"
+ )
+ .bind(user_id)
+ .bind(job_id)
+ .fetch_one(&app.pool)
+ .await
+ .unwrap();
+ assert!(history_count.0 > 0, "Article history should have entries");
+}
+
+#[tokio::test]
+async fn phase2_search_fills_gaps_when_no_sources() {
+ let app = common::TestApp::new().await;
+ let mock_server = setup_mock_server().await;
+
+ // No sources — Phase 1 produces nothing
+ let (user_id, _session) = setup_user_with_settings(&app, vec!["AI News"], 2, false).await;
+
+ let mock_provider = MockLlmProvider::new()
+ .with_default_category("AI News")
+ .with_search_urls(vec![
+ format!("{}/article-1", mock_server.uri()),
+ format!("{}/article-2", mock_server.uri()),
+ ])
+ .into_arc();
+
+ let job_id = uuid::Uuid::new_v4();
+ let (tx, _rx) = make_progress_channel();
+
+ let state = ai_synth_backend::app_state::AppState::new(
+ app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+ );
+
+ let result = synthesis::run_generation_inner(
+ job_id, &state, user_id, &tx, Some(mock_provider),
+ ).await;
+
+ assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
+
+ // Verify synthesis has articles from Phase 2
+ let synthesis_id = result.unwrap();
+ let row: (serde_json::Value,) = sqlx::query_as(
+ "SELECT sections FROM syntheses WHERE id = $1"
+ )
+ .bind(synthesis_id)
+ .fetch_one(&app.pool)
+ .await
+ .unwrap();
+
+ let sections: Vec = serde_json::from_value(row.0).unwrap();
+ assert!(!sections.is_empty(), "Should have sections from Phase 2 search");
+}
+
+#[tokio::test]
+async fn category_overflow_spills_to_autre() {
+ let app = common::TestApp::new().await;
+ let mock_server = setup_mock_server().await;
+
+ // max_items_per_category=1, but LLM classifies all articles to "AI News"
+ let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await;
+
+ let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"});
+ app.post_with_session("/api/v1/sources", &source, &session).await;
+
+ let article_urls: Vec = (1..=3)
+ .map(|i| format!("{}/article-{}", mock_server.uri(), i))
+ .collect();
+
+ let mock_provider = MockLlmProvider::new()
+ .with_default_category("AI News")
+ .with_link_urls(article_urls)
+ .into_arc();
+
+ let job_id = uuid::Uuid::new_v4();
+ let (tx, _rx) = make_progress_channel();
+
+ let state = ai_synth_backend::app_state::AppState::new(
+ app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+ );
+
+ let result = synthesis::run_generation_inner(
+ job_id, &state, user_id, &tx, Some(mock_provider),
+ ).await;
+
+ assert!(result.is_ok(), "Generation should succeed");
+
+ let synthesis_id = result.unwrap();
+ let row: (serde_json::Value,) = sqlx::query_as(
+ "SELECT sections FROM syntheses WHERE id = $1"
+ )
+ .bind(synthesis_id)
+ .fetch_one(&app.pool)
+ .await
+ .unwrap();
+
+ let sections: Vec = serde_json::from_value(row.0).unwrap();
+
+ // With max_items_per_category=1 and 3 articles all classified as "AI News":
+ // - 1 goes to AI News
+ // - Overflow goes to Autre
+ let ai_section = sections.iter().find(|s| s["title"] == "AI News");
+ let autre_section = sections.iter().find(|s| s["title"] == "Autre");
+
+ assert!(ai_section.is_some(), "Should have AI News section");
+ let ai_items = ai_section.unwrap()["items"].as_array().unwrap();
+ assert_eq!(ai_items.len(), 1, "AI News should have exactly 1 item (max)");
+
+ if sections.len() > 1 {
+ assert!(autre_section.is_some(), "Overflow should create Autre section");
+ }
+}