test: add integration test for site_search fallback in pipeline

Verifies that when a source page returns no article links (blocked/empty),
the pipeline does not crash and still produces article_history entries via
the site_search fallback path or Phase 2.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
master
oabrivard 2 months ago
parent 75ab2470f2
commit 1bac084d98

@ -869,3 +869,82 @@ async fn phase1_rss_feed_extraction_persists_rss_url() {
assert!(rss_row2.0.is_some(), "rss_url should still be set after second generation");
}
// ── Site search fallback ─────────────────────────────────────────────
#[tokio::test]
async fn phase1_site_search_fallback_when_source_returns_no_links() {
let app = common::TestApp::new().await;
let server = MockServer::start().await;
let base = server.uri();
// Source page that returns NO article links (simulates Cloudflare block / empty page)
Mock::given(method("GET"))
.and(path("/blocked-site"))
.respond_with(ResponseTemplate::new(200).set_body_string(
"<html><head><title>Access Denied</title></head><body><p>Please verify you are human.</p></body></html>"
))
.mount(&server)
.await;
// Article pages (discovered via LLM site search fallback)
for i in 1..=3 {
Mock::given(method("GET"))
.and(path(format!("/article-{}", i)))
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
r#"<html>
<head><title>Fallback Article {i}</title></head>
<body><p>This is a fallback article {i} about artificial intelligence.</p></body>
</html>"#
)))
.mount(&server)
.await;
}
let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 4).await;
// Add a source pointing to the blocked page
let source_url = format!("{}/blocked-site", base);
let source = serde_json::json!({
"title": "Blocked Source",
"url": source_url,
"theme_id": theme_id.to_string()
});
let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
assert!(status.is_success(), "Source creation should succeed");
// MockLlmProvider with search_urls simulates the LLM site search returning articles
let mock_provider = MockLlmProvider::new()
.with_default_category("AI News")
.with_search_urls(vec![
format!("{}/article-1", base),
format!("{}/article-2", base),
format!("{}/article-3", base),
])
.into_arc();
let job_id = uuid::Uuid::new_v4();
let (tx, _rx) = make_progress_channel();
let state = ai_synth_backend::app_state::AppState::new(
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
);
let result = synthesis::run_generation_inner(
job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
).await;
assert!(result.is_ok(), "Generation should succeed: {:?}", result.err());
// Verify article history has entries — either from site_search or Phase 2
let history_count: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2"
)
.bind(user_id)
.bind(job_id)
.fetch_one(&app.pool)
.await
.unwrap();
assert!(history_count.0 > 0, "Should have article history entries from fallback");
}

Loading…
Cancel
Save