test: add pipeline tests for source diversity and history dedup (GAP-05, GAP-07)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent 14908cf603
commit da602d850d

@ -250,3 +250,252 @@ async fn category_overflow_spills_to_autre() {
assert!(autre_section.is_some(), "Overflow should create Autre section"); assert!(autre_section.is_some(), "Overflow should create Autre section");
} }
} }
// ── GAP-03: Brave Search pipeline path ────────────────────────────────
//
// The Brave Search code path (`use_brave_search: true`) cannot be tested in
// integration tests without a real Brave API key. The pipeline calls
// `resolve_brave_key` which decrypts a key stored via `user_api_keys`, so we
// would need working AES-256-GCM encryption round-tripping in the test harness.
// The LLM-based web search fallback (Phase 2 without Brave) is already covered
// by `phase2_search_fills_gaps_when_no_sources`.
// ── GAP-05: Source diversity cap ──────────────────────────────────────
#[tokio::test]
async fn source_diversity_limits_articles_per_source() {
let app = common::TestApp::new().await;
let mock_server = setup_mock_server().await;
// Create user with default settings first, then override max_articles_per_source
let email = format!("diversity-{}@test.com", uuid::Uuid::new_v4());
let (user_id, session) = app.create_authenticated_user(&email).await;
// Create theme
let theme_body = serde_json::json!({
"name": "Diversity Theme",
"theme": "Intelligence Artificielle",
"categories": ["AI News"],
"max_items_per_category": 10,
"max_age_days": 365,
"summary_length": 1
});
let (theme_status, theme_resp) = app
.post_with_session("/api/v1/themes", &theme_body, &session)
.await;
assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed");
let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap();
// Update settings: max_articles_per_source = 1 so only 1 article from
// the mock server domain is accepted (the source page has 3 links).
let settings = serde_json::json!({
"max_articles_per_source": 1,
"max_links_per_source": 8,
"use_brave_search": false,
"article_history_days": 0,
"batch_size": 5,
"source_extraction_window": 3,
"search_agent_behavior": "",
"ai_provider": "",
"ai_model": "",
"ai_model_websearch": "",
"rate_limit_max_requests": null,
"rate_limit_time_window_seconds": null
});
let (settings_status, _) = app
.put_with_session("/api/v1/settings", &settings, &session)
.await;
assert_eq!(settings_status.as_u16(), 200, "Settings save should succeed");
// Add source pointing to mock server
let source_url = format!("{}/blog", mock_server.uri());
let source = serde_json::json!({
"title": "Diversity Source",
"url": source_url,
"theme_id": theme_id.to_string()
});
let (source_status, _) = app
.post_with_session("/api/v1/sources", &source, &session)
.await;
assert!(source_status.is_success(), "Source creation should succeed");
// Run pipeline
let mock_provider = MockLlmProvider::new()
.with_default_category("AI News")
.into_arc();
let job_id = uuid::Uuid::new_v4();
let (tx, _rx) = make_progress_channel();
let state = ai_synth_backend::app_state::AppState::new(
app.config.clone(),
app.pool.clone(),
reqwest::Client::new(),
);
let result = synthesis::run_generation_inner(
job_id, &state, user_id, theme_id, &tx, Some(mock_provider),
)
.await;
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
// Verify that some articles were filtered by source diversity
let diversity_count: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'filtered_diversity'"
)
.bind(user_id)
.bind(job_id)
.fetch_one(&app.pool)
.await
.unwrap();
// The mock source page has 3 article links, all from the same domain.
// With max_articles_per_source=1, at least 2 should be filtered.
assert!(
diversity_count.0 > 0,
"Should have diversity-filtered articles (got 0)"
);
// Verify the synthesis only contains 1 article (the cap)
let synthesis_id = result.unwrap();
let row: (serde_json::Value,) =
sqlx::query_as("SELECT sections FROM syntheses WHERE id = $1")
.bind(synthesis_id)
.fetch_one(&app.pool)
.await
.unwrap();
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
let total_items: usize = sections
.iter()
.filter_map(|s| s["items"].as_array())
.map(|items| items.len())
.sum();
assert_eq!(
total_items, 1,
"With max_articles_per_source=1, only 1 article should appear in the synthesis"
);
}
// ── GAP-07: Article history dedup across syntheses ────────────────────
#[tokio::test]
async fn article_history_dedup_prevents_repeat_articles() {
let app = common::TestApp::new().await;
let mock_server = setup_mock_server().await;
let email = format!("dedup-{}@test.com", uuid::Uuid::new_v4());
let (user_id, session) = app.create_authenticated_user(&email).await;
// Create theme
let theme_body = serde_json::json!({
"name": "Dedup Theme",
"theme": "Intelligence Artificielle",
"categories": ["AI News"],
"max_items_per_category": 10,
"max_age_days": 365,
"summary_length": 1
});
let (theme_status, theme_resp) = app
.post_with_session("/api/v1/themes", &theme_body, &session)
.await;
assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed");
let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap();
// Settings with article_history_days > 0 to enable dedup
let settings = serde_json::json!({
"max_articles_per_source": 10,
"max_links_per_source": 8,
"use_brave_search": false,
"article_history_days": 90,
"batch_size": 5,
"source_extraction_window": 3,
"search_agent_behavior": "",
"ai_provider": "",
"ai_model": "",
"ai_model_websearch": "",
"rate_limit_max_requests": null,
"rate_limit_time_window_seconds": null
});
let (settings_status, _) = app
.put_with_session("/api/v1/settings", &settings, &session)
.await;
assert_eq!(settings_status.as_u16(), 200, "Settings save should succeed");
// Add source
let source_url = format!("{}/blog", mock_server.uri());
let source = serde_json::json!({
"title": "Dedup Source",
"url": source_url,
"theme_id": theme_id.to_string()
});
let (source_status, _) = app
.post_with_session("/api/v1/sources", &source, &session)
.await;
assert!(source_status.is_success(), "Source creation should succeed");
// ── First generation ──────────────────────────────────────────────
let mock1 = MockLlmProvider::new()
.with_default_category("AI News")
.into_arc();
let job1 = uuid::Uuid::new_v4();
let (tx1, _rx1) = make_progress_channel();
let state1 = ai_synth_backend::app_state::AppState::new(
app.config.clone(),
app.pool.clone(),
reqwest::Client::new(),
);
let result1 = synthesis::run_generation_inner(
job1, &state1, user_id, theme_id, &tx1, Some(mock1),
)
.await;
assert!(result1.is_ok(), "First generation should succeed: {:?}", result1.err());
// Verify first run produced articles
let used_count_1: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'used'"
)
.bind(user_id)
.bind(job1)
.fetch_one(&app.pool)
.await
.unwrap();
assert!(used_count_1.0 > 0, "First run should produce used articles");
// ── Second generation — same source, same articles ────────────────
let mock2 = MockLlmProvider::new()
.with_default_category("AI News")
.into_arc();
let job2 = uuid::Uuid::new_v4();
let (tx2, _rx2) = make_progress_channel();
let state2 = ai_synth_backend::app_state::AppState::new(
app.config.clone(),
app.pool.clone(),
reqwest::Client::new(),
);
// The second run scrapes the same URLs, which are already in article_history.
// They should be filtered out as "filtered_history".
let _result2 = synthesis::run_generation_inner(
job2, &state2, user_id, theme_id, &tx2, Some(mock2),
)
.await;
// The second run may succeed (empty synthesis) or fail (no valid articles).
// Either way, history-dedup entries must exist.
let dedup_count: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'filtered_history'"
)
.bind(user_id)
.bind(job2)
.fetch_one(&app.pool)
.await
.unwrap();
assert!(
dedup_count.0 > 0,
"Second run should have history-deduped articles (got 0)"
);
}

Loading…
Cancel
Save