feat: add pipeline integration tests with MockLlmProvider and wiremock
Add three integration tests that exercise the synthesis generation pipeline end-to-end using MockLlmProvider and wiremock for HTTP mocking: - phase1_with_llm_link_extraction_classifies_articles - phase2_search_fills_gaps_when_no_sources - category_overflow_spills_to_autre Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>master
parent
ccecaa2d13
commit
370e033506
@ -0,0 +1,239 @@
|
||||
mod common;
|
||||
|
||||
use ai_synth_backend::services::llm::mock::MockLlmProvider;
|
||||
use ai_synth_backend::services::synthesis;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::watch;
|
||||
use wiremock::matchers::{method, path};
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
async fn setup_mock_server() -> MockServer {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
// Article pages
|
||||
for i in 1..=5 {
|
||||
Mock::given(method("GET"))
|
||||
.and(path(format!("/article-{}", i)))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
|
||||
r#"<html>
|
||||
<head><title>Test Article {i}</title></head>
|
||||
<body><p>This is the content of test article {i} about artificial intelligence.</p></body>
|
||||
</html>"#
|
||||
)))
|
||||
.mount(&server)
|
||||
.await;
|
||||
}
|
||||
|
||||
server
|
||||
}
|
||||
|
||||
async fn setup_user_with_settings(
|
||||
app: &common::TestApp,
|
||||
categories: Vec<&str>,
|
||||
max_items: i32,
|
||||
use_llm_for_links: bool,
|
||||
) -> (uuid::Uuid, String) {
|
||||
let email = format!("pipeline-{}@test.com", uuid::Uuid::new_v4());
|
||||
let (user_id, session) = app.create_authenticated_user(&email).await;
|
||||
|
||||
let categories_json: Vec<serde_json::Value> = categories.iter().map(|c| serde_json::json!(c)).collect();
|
||||
let settings = serde_json::json!({
|
||||
"theme": "Intelligence Artificielle",
|
||||
"max_age_days": 365,
|
||||
"categories": categories_json,
|
||||
"max_items_per_category": max_items,
|
||||
"max_articles_per_source": 10,
|
||||
"use_llm_for_source_links": use_llm_for_links,
|
||||
"use_brave_search": false,
|
||||
"article_history_days": 90,
|
||||
"batch_size": 5,
|
||||
"search_agent_behavior": "",
|
||||
"ai_provider": "",
|
||||
"ai_model": "",
|
||||
"ai_model_websearch": "",
|
||||
"rate_limit_max_requests": null,
|
||||
"rate_limit_time_window_seconds": null
|
||||
});
|
||||
let (status, _) = app.put_with_session("/api/v1/settings", &settings, &session).await;
|
||||
assert_eq!(status.as_u16(), 200, "Settings save should succeed");
|
||||
|
||||
(user_id, session)
|
||||
}
|
||||
|
||||
fn make_progress_channel() -> (Arc<watch::Sender<synthesis::ProgressEvent>>, watch::Receiver<synthesis::ProgressEvent>) {
|
||||
let (tx, rx) = watch::channel(synthesis::ProgressEvent::Progress {
|
||||
step: "init".into(),
|
||||
message: "Starting...".into(),
|
||||
percent: 0,
|
||||
});
|
||||
(Arc::new(tx), rx)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn phase1_with_llm_link_extraction_classifies_articles() {
|
||||
let app = common::TestApp::new().await;
|
||||
let mock_server = setup_mock_server().await;
|
||||
|
||||
// Use LLM link extraction to bypass SSRF on source page
|
||||
let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await;
|
||||
|
||||
// Add a source (URL doesn't matter much — LLM mock will return article URLs)
|
||||
let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"});
|
||||
let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
|
||||
assert!(status.is_success());
|
||||
|
||||
// Mock provider: LLM link extraction returns wiremock article URLs
|
||||
let article_urls: Vec<String> = (1..=3)
|
||||
.map(|i| format!("{}/article-{}", mock_server.uri(), i))
|
||||
.collect();
|
||||
|
||||
let mock_provider = MockLlmProvider::new()
|
||||
.with_default_category("AI News")
|
||||
.with_link_urls(article_urls)
|
||||
.into_arc();
|
||||
|
||||
let job_id = uuid::Uuid::new_v4();
|
||||
let (tx, _rx) = make_progress_channel();
|
||||
|
||||
let state = ai_synth_backend::app_state::AppState::new(
|
||||
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
|
||||
);
|
||||
|
||||
let result = synthesis::run_generation_inner(
|
||||
job_id, &state, user_id, &tx, Some(mock_provider),
|
||||
).await;
|
||||
|
||||
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
|
||||
|
||||
let synthesis_id = result.unwrap();
|
||||
|
||||
// Verify synthesis was saved with articles
|
||||
let row: (serde_json::Value,) = sqlx::query_as(
|
||||
"SELECT sections FROM syntheses WHERE id = $1"
|
||||
)
|
||||
.bind(synthesis_id)
|
||||
.fetch_one(&app.pool)
|
||||
.await
|
||||
.expect("Synthesis should exist");
|
||||
|
||||
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
||||
assert!(!sections.is_empty(), "Should have at least one section");
|
||||
|
||||
let first_section = §ions[0];
|
||||
assert_eq!(first_section["title"], "AI News");
|
||||
let items = first_section["items"].as_array().unwrap();
|
||||
assert!(!items.is_empty(), "AI News section should have articles");
|
||||
|
||||
// Verify article history was recorded
|
||||
let history_count: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2"
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(job_id)
|
||||
.fetch_one(&app.pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(history_count.0 > 0, "Article history should have entries");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn phase2_search_fills_gaps_when_no_sources() {
|
||||
let app = common::TestApp::new().await;
|
||||
let mock_server = setup_mock_server().await;
|
||||
|
||||
// No sources — Phase 1 produces nothing
|
||||
let (user_id, _session) = setup_user_with_settings(&app, vec!["AI News"], 2, false).await;
|
||||
|
||||
let mock_provider = MockLlmProvider::new()
|
||||
.with_default_category("AI News")
|
||||
.with_search_urls(vec![
|
||||
format!("{}/article-1", mock_server.uri()),
|
||||
format!("{}/article-2", mock_server.uri()),
|
||||
])
|
||||
.into_arc();
|
||||
|
||||
let job_id = uuid::Uuid::new_v4();
|
||||
let (tx, _rx) = make_progress_channel();
|
||||
|
||||
let state = ai_synth_backend::app_state::AppState::new(
|
||||
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
|
||||
);
|
||||
|
||||
let result = synthesis::run_generation_inner(
|
||||
job_id, &state, user_id, &tx, Some(mock_provider),
|
||||
).await;
|
||||
|
||||
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
|
||||
|
||||
// Verify synthesis has articles from Phase 2
|
||||
let synthesis_id = result.unwrap();
|
||||
let row: (serde_json::Value,) = sqlx::query_as(
|
||||
"SELECT sections FROM syntheses WHERE id = $1"
|
||||
)
|
||||
.bind(synthesis_id)
|
||||
.fetch_one(&app.pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
||||
assert!(!sections.is_empty(), "Should have sections from Phase 2 search");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn category_overflow_spills_to_autre() {
|
||||
let app = common::TestApp::new().await;
|
||||
let mock_server = setup_mock_server().await;
|
||||
|
||||
// max_items_per_category=1, but LLM classifies all articles to "AI News"
|
||||
let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await;
|
||||
|
||||
let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"});
|
||||
app.post_with_session("/api/v1/sources", &source, &session).await;
|
||||
|
||||
let article_urls: Vec<String> = (1..=3)
|
||||
.map(|i| format!("{}/article-{}", mock_server.uri(), i))
|
||||
.collect();
|
||||
|
||||
let mock_provider = MockLlmProvider::new()
|
||||
.with_default_category("AI News")
|
||||
.with_link_urls(article_urls)
|
||||
.into_arc();
|
||||
|
||||
let job_id = uuid::Uuid::new_v4();
|
||||
let (tx, _rx) = make_progress_channel();
|
||||
|
||||
let state = ai_synth_backend::app_state::AppState::new(
|
||||
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
|
||||
);
|
||||
|
||||
let result = synthesis::run_generation_inner(
|
||||
job_id, &state, user_id, &tx, Some(mock_provider),
|
||||
).await;
|
||||
|
||||
assert!(result.is_ok(), "Generation should succeed");
|
||||
|
||||
let synthesis_id = result.unwrap();
|
||||
let row: (serde_json::Value,) = sqlx::query_as(
|
||||
"SELECT sections FROM syntheses WHERE id = $1"
|
||||
)
|
||||
.bind(synthesis_id)
|
||||
.fetch_one(&app.pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
||||
|
||||
// With max_items_per_category=1 and 3 articles all classified as "AI News":
|
||||
// - 1 goes to AI News
|
||||
// - Overflow goes to Autre
|
||||
let ai_section = sections.iter().find(|s| s["title"] == "AI News");
|
||||
let autre_section = sections.iter().find(|s| s["title"] == "Autre");
|
||||
|
||||
assert!(ai_section.is_some(), "Should have AI News section");
|
||||
let ai_items = ai_section.unwrap()["items"].as_array().unwrap();
|
||||
assert_eq!(ai_items.len(), 1, "AI News should have exactly 1 item (max)");
|
||||
|
||||
if sections.len() > 1 {
|
||||
assert!(autre_section.is_some(), "Overflow should create Autre section");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue