You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
712 lines
26 KiB
Rust
712 lines
26 KiB
Rust
mod common;
|
|
|
|
use ai_synth_backend::services::llm::mock::MockLlmProvider;
|
|
use ai_synth_backend::services::synthesis;
|
|
use std::sync::atomic::AtomicBool;
|
|
use std::sync::Arc;
|
|
use tokio::sync::watch;
|
|
use wiremock::matchers::{method, path};
|
|
use wiremock::{Mock, MockServer, ResponseTemplate};
|
|
|
|
async fn setup_mock_server() -> MockServer {
|
|
let server = MockServer::start().await;
|
|
|
|
// Source page with links to articles (for Phase 1 heuristic extraction)
|
|
let base = server.uri();
|
|
Mock::given(method("GET"))
|
|
.and(path("/blog"))
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
|
|
r#"<html><body>
|
|
<a href="{base}/article-1">Article One</a>
|
|
<a href="{base}/article-2">Article Two</a>
|
|
<a href="{base}/article-3">Article Three</a>
|
|
</body></html>"#
|
|
)))
|
|
.mount(&server)
|
|
.await;
|
|
|
|
// Article pages
|
|
for i in 1..=5 {
|
|
Mock::given(method("GET"))
|
|
.and(path(format!("/article-{}", i)))
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
|
|
r#"<html>
|
|
<head><title>Test Article {i}</title></head>
|
|
<body><p>This is the content of test article {i} about artificial intelligence.</p></body>
|
|
</html>"#
|
|
)))
|
|
.mount(&server)
|
|
.await;
|
|
}
|
|
|
|
server
|
|
}
|
|
|
|
async fn setup_user_with_settings(
|
|
app: &common::TestApp,
|
|
categories: Vec<&str>,
|
|
max_items: i32,
|
|
) -> (uuid::Uuid, String, uuid::Uuid) {
|
|
let email = format!("pipeline-{}@test.com", uuid::Uuid::new_v4());
|
|
let (user_id, session) = app.create_authenticated_user(&email).await;
|
|
|
|
let settings = serde_json::json!({
|
|
"max_articles_per_source": 10,
|
|
"max_links_per_source": 8,
|
|
"use_brave_search": false,
|
|
"article_history_days": 90,
|
|
"batch_size": 5,
|
|
"source_extraction_window": 3,
|
|
"search_agent_behavior": "",
|
|
"ai_provider": "",
|
|
"ai_model": "",
|
|
"ai_model_websearch": "",
|
|
"rate_limit_max_requests": null,
|
|
"rate_limit_time_window_seconds": null
|
|
});
|
|
let (status, _) = app.put_with_session("/api/v1/settings", &settings, &session).await;
|
|
assert_eq!(status.as_u16(), 200, "Settings save should succeed");
|
|
|
|
// Create a theme for the pipeline
|
|
let categories_json: Vec<serde_json::Value> = categories.iter().map(|c| serde_json::json!(c)).collect();
|
|
let theme_body = serde_json::json!({
|
|
"name": "Test Theme",
|
|
"theme": "Intelligence Artificielle",
|
|
"categories": categories_json,
|
|
"max_items_per_category": max_items,
|
|
"max_age_days": 365,
|
|
"summary_length": 3
|
|
});
|
|
let (theme_status, theme_resp) = app.post_with_session("/api/v1/themes", &theme_body, &session).await;
|
|
assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed");
|
|
let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap();
|
|
|
|
(user_id, session, theme_id)
|
|
}
|
|
|
|
fn make_progress_channel() -> (Arc<watch::Sender<synthesis::ProgressEvent>>, watch::Receiver<synthesis::ProgressEvent>) {
|
|
let (tx, rx) = watch::channel(synthesis::ProgressEvent::Progress {
|
|
step: "init".into(),
|
|
message: "Starting...".into(),
|
|
percent: 0,
|
|
});
|
|
(Arc::new(tx), rx)
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn phase1_heuristic_extraction_classifies_articles() {
|
|
let app = common::TestApp::new().await;
|
|
let mock_server = setup_mock_server().await;
|
|
|
|
let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 4).await;
|
|
|
|
// Add a source pointing to wiremock (same host as article URLs)
|
|
let source_url = format!("{}/blog", mock_server.uri());
|
|
let source = serde_json::json!({"title": "Test Source", "url": source_url, "theme_id": theme_id.to_string()});
|
|
let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
|
|
assert!(status.is_success());
|
|
|
|
let mock_provider = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.into_arc();
|
|
|
|
let job_id = uuid::Uuid::new_v4();
|
|
let (tx, _rx) = make_progress_channel();
|
|
|
|
let state = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
|
|
);
|
|
|
|
let result = synthesis::run_generation_inner(
|
|
job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
|
|
).await;
|
|
|
|
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
|
|
|
|
let synthesis_id = result.unwrap();
|
|
|
|
// Verify synthesis was saved with articles
|
|
let row: (serde_json::Value,) = sqlx::query_as(
|
|
"SELECT sections FROM syntheses WHERE id = $1"
|
|
)
|
|
.bind(synthesis_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.expect("Synthesis should exist");
|
|
|
|
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
|
assert!(!sections.is_empty(), "Should have at least one section");
|
|
|
|
let first_section = §ions[0];
|
|
assert_eq!(first_section["title"], "AI News");
|
|
let items = first_section["items"].as_array().unwrap();
|
|
assert!(!items.is_empty(), "AI News section should have articles");
|
|
|
|
// Verify article history was recorded
|
|
let history_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2"
|
|
)
|
|
.bind(user_id)
|
|
.bind(job_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
assert!(history_count.0 > 0, "Article history should have entries");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn phase2_search_fills_gaps_when_no_sources() {
|
|
let app = common::TestApp::new().await;
|
|
let mock_server = setup_mock_server().await;
|
|
|
|
// No sources -- Phase 1 produces nothing
|
|
let (user_id, _session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 2).await;
|
|
|
|
let mock_provider = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.with_search_urls(vec![
|
|
format!("{}/article-1", mock_server.uri()),
|
|
format!("{}/article-2", mock_server.uri()),
|
|
])
|
|
.into_arc();
|
|
|
|
let job_id = uuid::Uuid::new_v4();
|
|
let (tx, _rx) = make_progress_channel();
|
|
|
|
let state = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
|
|
);
|
|
|
|
let result = synthesis::run_generation_inner(
|
|
job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
|
|
).await;
|
|
|
|
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
|
|
|
|
// Verify synthesis has articles from Phase 2
|
|
let synthesis_id = result.unwrap();
|
|
let row: (serde_json::Value,) = sqlx::query_as(
|
|
"SELECT sections FROM syntheses WHERE id = $1"
|
|
)
|
|
.bind(synthesis_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
|
assert!(!sections.is_empty(), "Should have sections from Phase 2 search");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn category_overflow_spills_to_autre() {
|
|
let app = common::TestApp::new().await;
|
|
let mock_server = setup_mock_server().await;
|
|
|
|
// max_items_per_category=1, but LLM classifies all articles to "AI News"
|
|
let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 1).await;
|
|
|
|
let source_url = format!("{}/blog", mock_server.uri());
|
|
let source = serde_json::json!({"title": "Test Source", "url": source_url, "theme_id": theme_id.to_string()});
|
|
app.post_with_session("/api/v1/sources", &source, &session).await;
|
|
|
|
let mock_provider = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.into_arc();
|
|
|
|
let job_id = uuid::Uuid::new_v4();
|
|
let (tx, _rx) = make_progress_channel();
|
|
|
|
let state = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
|
|
);
|
|
|
|
let result = synthesis::run_generation_inner(
|
|
job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
|
|
).await;
|
|
|
|
assert!(result.is_ok(), "Generation should succeed");
|
|
|
|
let synthesis_id = result.unwrap();
|
|
let row: (serde_json::Value,) = sqlx::query_as(
|
|
"SELECT sections FROM syntheses WHERE id = $1"
|
|
)
|
|
.bind(synthesis_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
|
|
|
// With max_items_per_category=1 and 3 articles all classified as "AI News":
|
|
// - 1 goes to AI News
|
|
// - Overflow goes to Autre
|
|
let ai_section = sections.iter().find(|s| s["title"] == "AI News");
|
|
let divers_section = sections.iter().find(|s| s["title"] == "Divers");
|
|
|
|
assert!(ai_section.is_some(), "Should have AI News section");
|
|
let ai_items = ai_section.unwrap()["items"].as_array().unwrap();
|
|
assert_eq!(ai_items.len(), 1, "AI News should have exactly 1 item (max)");
|
|
|
|
if sections.len() > 1 {
|
|
assert!(divers_section.is_some(), "Overflow should create Divers section");
|
|
}
|
|
}
|
|
|
|
// ── GAP-03: Brave Search pipeline path ────────────────────────────────
|
|
//
|
|
// The Brave Search code path (`use_brave_search: true`) cannot be tested in
|
|
// integration tests without a real Brave API key. The pipeline calls
|
|
// `resolve_brave_key` which decrypts a key stored via `user_api_keys`, so we
|
|
// would need working AES-256-GCM encryption round-tripping in the test harness.
|
|
// The LLM-based web search fallback (Phase 2 without Brave) is already covered
|
|
// by `phase2_search_fills_gaps_when_no_sources`.
|
|
|
|
// ── GAP-05: Source diversity cap ──────────────────────────────────────
|
|
|
|
#[tokio::test]
|
|
async fn source_diversity_limits_articles_per_source() {
|
|
let app = common::TestApp::new().await;
|
|
let mock_server = setup_mock_server().await;
|
|
|
|
// Create user with default settings first, then override max_articles_per_source
|
|
let email = format!("diversity-{}@test.com", uuid::Uuid::new_v4());
|
|
let (user_id, session) = app.create_authenticated_user(&email).await;
|
|
|
|
// Create theme
|
|
let theme_body = serde_json::json!({
|
|
"name": "Diversity Theme",
|
|
"theme": "Intelligence Artificielle",
|
|
"categories": ["AI News"],
|
|
"max_items_per_category": 10,
|
|
"max_age_days": 365,
|
|
"summary_length": 1
|
|
});
|
|
let (theme_status, theme_resp) = app
|
|
.post_with_session("/api/v1/themes", &theme_body, &session)
|
|
.await;
|
|
assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed");
|
|
let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap();
|
|
|
|
// Update settings: max_articles_per_source = 1 so only 1 article from
|
|
// the mock server domain is accepted (the source page has 3 links).
|
|
let settings = serde_json::json!({
|
|
"max_articles_per_source": 1,
|
|
"max_links_per_source": 8,
|
|
"use_brave_search": false,
|
|
"article_history_days": 0,
|
|
"batch_size": 1,
|
|
"source_extraction_window": 3,
|
|
"search_agent_behavior": "",
|
|
"ai_provider": "",
|
|
"ai_model": "",
|
|
"ai_model_websearch": "",
|
|
"rate_limit_max_requests": null,
|
|
"rate_limit_time_window_seconds": null
|
|
});
|
|
let (settings_status, _) = app
|
|
.put_with_session("/api/v1/settings", &settings, &session)
|
|
.await;
|
|
assert_eq!(settings_status.as_u16(), 200, "Settings save should succeed");
|
|
|
|
// Add source pointing to mock server
|
|
let source_url = format!("{}/blog", mock_server.uri());
|
|
let source = serde_json::json!({
|
|
"title": "Diversity Source",
|
|
"url": source_url,
|
|
"theme_id": theme_id.to_string()
|
|
});
|
|
let (source_status, _) = app
|
|
.post_with_session("/api/v1/sources", &source, &session)
|
|
.await;
|
|
assert!(source_status.is_success(), "Source creation should succeed");
|
|
|
|
// Run pipeline
|
|
let mock_provider = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.into_arc();
|
|
|
|
let job_id = uuid::Uuid::new_v4();
|
|
let (tx, _rx) = make_progress_channel();
|
|
|
|
let state = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(),
|
|
app.pool.clone(),
|
|
reqwest::Client::new(),
|
|
);
|
|
|
|
let result = synthesis::run_generation_inner(
|
|
job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
|
|
)
|
|
.await;
|
|
|
|
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
|
|
|
|
// Verify that some articles were filtered by source diversity
|
|
let diversity_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'filtered_diversity'"
|
|
)
|
|
.bind(user_id)
|
|
.bind(job_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
// The mock source page has 3 article links, all from the same domain.
|
|
// With max_articles_per_source=1, at least 2 should be filtered.
|
|
assert!(
|
|
diversity_count.0 > 0,
|
|
"Should have diversity-filtered articles (got 0)"
|
|
);
|
|
|
|
// Verify the synthesis only contains 1 article (the cap)
|
|
let synthesis_id = result.unwrap();
|
|
let row: (serde_json::Value,) =
|
|
sqlx::query_as("SELECT sections FROM syntheses WHERE id = $1")
|
|
.bind(synthesis_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
|
let total_items: usize = sections
|
|
.iter()
|
|
.filter_map(|s| s["items"].as_array())
|
|
.map(|items| items.len())
|
|
.sum();
|
|
assert_eq!(
|
|
total_items, 1,
|
|
"With max_articles_per_source=1, only 1 article should appear in the synthesis"
|
|
);
|
|
}
|
|
|
|
// ── GAP-07: Article history dedup across syntheses ────────────────────
|
|
|
|
#[tokio::test]
|
|
async fn article_history_dedup_prevents_repeat_articles() {
|
|
let app = common::TestApp::new().await;
|
|
let mock_server = setup_mock_server().await;
|
|
|
|
let email = format!("dedup-{}@test.com", uuid::Uuid::new_v4());
|
|
let (user_id, session) = app.create_authenticated_user(&email).await;
|
|
|
|
// Create theme
|
|
let theme_body = serde_json::json!({
|
|
"name": "Dedup Theme",
|
|
"theme": "Intelligence Artificielle",
|
|
"categories": ["AI News"],
|
|
"max_items_per_category": 10,
|
|
"max_age_days": 365,
|
|
"summary_length": 1
|
|
});
|
|
let (theme_status, theme_resp) = app
|
|
.post_with_session("/api/v1/themes", &theme_body, &session)
|
|
.await;
|
|
assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed");
|
|
let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap();
|
|
|
|
// Settings with article_history_days > 0 to enable dedup
|
|
let settings = serde_json::json!({
|
|
"max_articles_per_source": 10,
|
|
"max_links_per_source": 8,
|
|
"use_brave_search": false,
|
|
"article_history_days": 90,
|
|
"batch_size": 5,
|
|
"source_extraction_window": 3,
|
|
"search_agent_behavior": "",
|
|
"ai_provider": "",
|
|
"ai_model": "",
|
|
"ai_model_websearch": "",
|
|
"rate_limit_max_requests": null,
|
|
"rate_limit_time_window_seconds": null
|
|
});
|
|
let (settings_status, _) = app
|
|
.put_with_session("/api/v1/settings", &settings, &session)
|
|
.await;
|
|
assert_eq!(settings_status.as_u16(), 200, "Settings save should succeed");
|
|
|
|
// Add source
|
|
let source_url = format!("{}/blog", mock_server.uri());
|
|
let source = serde_json::json!({
|
|
"title": "Dedup Source",
|
|
"url": source_url,
|
|
"theme_id": theme_id.to_string()
|
|
});
|
|
let (source_status, _) = app
|
|
.post_with_session("/api/v1/sources", &source, &session)
|
|
.await;
|
|
assert!(source_status.is_success(), "Source creation should succeed");
|
|
|
|
// ── First generation ──────────────────────────────────────────────
|
|
let mock1 = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.into_arc();
|
|
let job1 = uuid::Uuid::new_v4();
|
|
let (tx1, _rx1) = make_progress_channel();
|
|
let state1 = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(),
|
|
app.pool.clone(),
|
|
reqwest::Client::new(),
|
|
);
|
|
|
|
let result1 = synthesis::run_generation_inner(
|
|
job1, &state1, user_id, theme_id, &tx1, Some(mock1), &AtomicBool::new(false),
|
|
)
|
|
.await;
|
|
assert!(result1.is_ok(), "First generation should succeed: {:?}", result1.err());
|
|
|
|
// Verify first run produced articles
|
|
let used_count_1: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'used'"
|
|
)
|
|
.bind(user_id)
|
|
.bind(job1)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
assert!(used_count_1.0 > 0, "First run should produce used articles");
|
|
|
|
// ── Second generation — same source, same articles ────────────────
|
|
let mock2 = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.into_arc();
|
|
let job2 = uuid::Uuid::new_v4();
|
|
let (tx2, _rx2) = make_progress_channel();
|
|
let state2 = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(),
|
|
app.pool.clone(),
|
|
reqwest::Client::new(),
|
|
);
|
|
|
|
// The second run scrapes the same URLs, which are already in article_history.
|
|
// They should be filtered out as "filtered_history".
|
|
let _result2 = synthesis::run_generation_inner(
|
|
job2, &state2, user_id, theme_id, &tx2, Some(mock2), &AtomicBool::new(false),
|
|
)
|
|
.await;
|
|
// The second run may succeed (empty synthesis) or fail (no valid articles).
|
|
// Either way, history-dedup entries must exist.
|
|
|
|
let dedup_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'filtered_history'"
|
|
)
|
|
.bind(user_id)
|
|
.bind(job2)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(
|
|
dedup_count.0 > 0,
|
|
"Second run should have history-deduped articles (got 0)"
|
|
);
|
|
}
|
|
|
|
// ── GAP-08: Preferred source ordering ─────────────────────────────────
|
|
//
|
|
// The pipeline places preferred sources before non-preferred ones in the
|
|
// processing order (`ordered_sources = [preferred, non_preferred].concat()`).
|
|
// Within each wave, articles from preferred source URLs are also placed first
|
|
// before being shuffled independently within their group (preferred_urls and
|
|
// other_urls are each shuffled separately).
|
|
//
|
|
// Deterministic ordering at the individual article level cannot be guaranteed
|
|
// because articles within the preferred group are randomly shuffled.
|
|
//
|
|
// What we can verify deterministically:
|
|
// - With a preferred source and a non-preferred source, both contribute
|
|
// articles to the synthesis (i.e., preferred ordering does not prevent
|
|
// non-preferred sources from being processed).
|
|
// - The article_history table records entries from both sources.
|
|
//
|
|
// A test that tries to assert "article from preferred source appears before
|
|
// article from non-preferred source in the synthesis" would be flaky due to
|
|
// the intentional shuffle within each group.
|
|
|
|
#[tokio::test]
|
|
async fn preferred_sources_processed_first() {
|
|
let app = common::TestApp::new().await;
|
|
|
|
// Set up two source pages on the mock server:
|
|
// /blog-a (preferred) with /article-pref
|
|
// /blog-b (non-preferred) with /article-norm
|
|
let server = wiremock::MockServer::start().await;
|
|
let base = server.uri();
|
|
|
|
// Preferred source page
|
|
Mock::given(method("GET"))
|
|
.and(path("/blog-a"))
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
|
|
r#"<html><body><a href="{base}/article-pref">Preferred Article</a></body></html>"#
|
|
)))
|
|
.mount(&server)
|
|
.await;
|
|
|
|
// Non-preferred source page
|
|
Mock::given(method("GET"))
|
|
.and(path("/blog-b"))
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
|
|
r#"<html><body><a href="{base}/article-norm">Normal Article</a></body></html>"#
|
|
)))
|
|
.mount(&server)
|
|
.await;
|
|
|
|
// Article pages
|
|
Mock::given(method("GET"))
|
|
.and(path("/article-pref"))
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(
|
|
r#"<html><head><title>Preferred Article</title></head>
|
|
<body><p>This is a preferred article about artificial intelligence research.</p></body></html>"#
|
|
.to_string(),
|
|
))
|
|
.mount(&server)
|
|
.await;
|
|
|
|
Mock::given(method("GET"))
|
|
.and(path("/article-norm"))
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(
|
|
r#"<html><head><title>Normal Article</title></head>
|
|
<body><p>This is a normal article about machine learning and AI systems.</p></body></html>"#
|
|
.to_string(),
|
|
))
|
|
.mount(&server)
|
|
.await;
|
|
|
|
let email = format!("preferred-order-{}@test.com", uuid::Uuid::new_v4());
|
|
let (user_id, session) = app.create_authenticated_user(&email).await;
|
|
|
|
// Create theme
|
|
let theme_body = serde_json::json!({
|
|
"name": "Preferred Test Theme",
|
|
"theme": "Intelligence Artificielle",
|
|
"categories": ["AI News"],
|
|
"max_items_per_category": 10,
|
|
"max_age_days": 365,
|
|
"summary_length": 1
|
|
});
|
|
let (theme_status, theme_resp) = app
|
|
.post_with_session("/api/v1/themes", &theme_body, &session)
|
|
.await;
|
|
assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed");
|
|
let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap();
|
|
|
|
// Settings: batch_size=1, source_extraction_window=10 so both sources
|
|
// are processed in a single wave, article_history_days=90 to enable tracing
|
|
let settings = serde_json::json!({
|
|
"max_articles_per_source": 10,
|
|
"max_links_per_source": 8,
|
|
"use_brave_search": false,
|
|
"article_history_days": 90,
|
|
"batch_size": 1,
|
|
"source_extraction_window": 10,
|
|
"search_agent_behavior": "",
|
|
"ai_provider": "",
|
|
"ai_model": "",
|
|
"ai_model_websearch": "",
|
|
"rate_limit_max_requests": null,
|
|
"rate_limit_time_window_seconds": null
|
|
});
|
|
let (settings_status, _) = app
|
|
.put_with_session("/api/v1/settings", &settings, &session)
|
|
.await;
|
|
assert_eq!(settings_status.as_u16(), 200, "Settings save should succeed");
|
|
|
|
// Create source A (will be marked preferred)
|
|
let source_a_body = serde_json::json!({
|
|
"title": "Source A (preferred)",
|
|
"url": format!("{}/blog-a", base),
|
|
"theme_id": theme_id.to_string()
|
|
});
|
|
let (status_a, resp_a) = app
|
|
.post_with_session("/api/v1/sources", &source_a_body, &session)
|
|
.await;
|
|
assert!(status_a.is_success(), "Source A creation should succeed");
|
|
let source_a_id = resp_a["id"].as_str().expect("Source A should have an id");
|
|
|
|
// Create source B (non-preferred)
|
|
let source_b_body = serde_json::json!({
|
|
"title": "Source B (normal)",
|
|
"url": format!("{}/blog-b", base),
|
|
"theme_id": theme_id.to_string()
|
|
});
|
|
let (status_b, _) = app
|
|
.post_with_session("/api/v1/sources", &source_b_body, &session)
|
|
.await;
|
|
assert!(status_b.is_success(), "Source B creation should succeed");
|
|
|
|
// Mark source A as preferred
|
|
let pref_body = serde_json::json!({ "source_ids": [source_a_id], "theme_id": theme_id });
|
|
let (pref_status, _) = app
|
|
.put_with_session("/api/v1/sources/preferred", &pref_body, &session)
|
|
.await;
|
|
assert_eq!(pref_status.as_u16(), 204, "Setting preferred sources should succeed");
|
|
|
|
// Run the pipeline
|
|
let mock_provider = MockLlmProvider::new()
|
|
.with_default_category("AI News")
|
|
.into_arc();
|
|
|
|
let job_id = uuid::Uuid::new_v4();
|
|
let (tx, _rx) = make_progress_channel();
|
|
|
|
let state = ai_synth_backend::app_state::AppState::new(
|
|
app.config.clone(),
|
|
app.pool.clone(),
|
|
reqwest::Client::new(),
|
|
);
|
|
|
|
let result = synthesis::run_generation_inner(
|
|
job_id,
|
|
&state,
|
|
user_id,
|
|
theme_id,
|
|
&tx,
|
|
Some(mock_provider),
|
|
&AtomicBool::new(false),
|
|
)
|
|
.await;
|
|
|
|
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
|
|
|
|
// Verify both sources contributed articles to article_history.
|
|
// This confirms the preferred-first ordering does not prevent non-preferred
|
|
// sources from being processed. Asserting the exact order within the
|
|
// synthesis is not done here because articles within each group are
|
|
// randomly shuffled by the pipeline.
|
|
let used_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'used'",
|
|
)
|
|
.bind(user_id)
|
|
.bind(job_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(
|
|
used_count.0 >= 2,
|
|
"Both preferred and non-preferred source articles should appear in history (got {})",
|
|
used_count.0
|
|
);
|
|
|
|
// Verify the synthesis has articles from both sources
|
|
let synthesis_id = result.unwrap();
|
|
let row: (serde_json::Value,) =
|
|
sqlx::query_as("SELECT sections FROM syntheses WHERE id = $1")
|
|
.bind(synthesis_id)
|
|
.fetch_one(&app.pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
|
|
let total_items: usize = sections
|
|
.iter()
|
|
.filter_map(|s| s["items"].as_array())
|
|
.map(|items| items.len())
|
|
.sum();
|
|
|
|
assert!(
|
|
total_items >= 2,
|
|
"Synthesis should contain articles from both sources (got {})",
|
|
total_items
|
|
);
|
|
}
|