test: add integration test for RSS feed discovery and persistence in pipeline

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 2 months ago
parent e2ce401ea6
commit 1cb7bf6c6f

@ -709,3 +709,163 @@ async fn preferred_sources_processed_first() {
total_items
);
}
// ── RSS feed integration ─────────────────────────────────────────────
#[tokio::test]
async fn phase1_rss_feed_extraction_persists_rss_url() {
let app = common::TestApp::new().await;
let server = MockServer::start().await;
let base = server.uri();
// Source page with RSS link discovery
Mock::given(method("GET"))
.and(path("/blog-rss"))
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
r#"<html><head>
<link rel="alternate" type="application/rss+xml" href="{base}/feed.xml">
</head><body><p>Blog homepage</p></body></html>"#
)))
.mount(&server)
.await;
// RSS feed with 5 articles
Mock::given(method("GET"))
.and(path("/feed.xml"))
.respond_with(ResponseTemplate::new(200).set_body_raw(
format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Blog</title>
<item><title>Article 1</title><link>{base}/article-1</link><pubDate>Thu, 03 Apr 2026 10:00:00 GMT</pubDate></item>
<item><title>Article 2</title><link>{base}/article-2</link><pubDate>Wed, 02 Apr 2026 10:00:00 GMT</pubDate></item>
<item><title>Article 3</title><link>{base}/article-3</link><pubDate>Tue, 01 Apr 2026 10:00:00 GMT</pubDate></item>
<item><title>Article 4</title><link>{base}/article-4</link><pubDate>Mon, 31 Mar 2026 10:00:00 GMT</pubDate></item>
<item><title>Article 5</title><link>{base}/article-5</link><pubDate>Sun, 30 Mar 2026 10:00:00 GMT</pubDate></item>
</channel>
</rss>"#
),
"application/rss+xml",
))
.mount(&server)
.await;
// Article pages
for i in 1..=5 {
Mock::given(method("GET"))
.and(path(format!("/article-{}", i)))
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
r#"<html>
<head><title>RSS Article {i}</title></head>
<body><p>This is RSS article {i} about artificial intelligence.</p></body>
</html>"#
)))
.mount(&server)
.await;
}
let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 4).await;
// Add a source pointing to the blog page (not the feed directly)
let source_url = format!("{}/blog-rss", base);
let source = serde_json::json!({
"title": "RSS Source",
"url": source_url,
"theme_id": theme_id.to_string()
});
let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
assert!(status.is_success(), "Source creation should succeed");
// ── First generation: discovers and uses RSS feed ─────────────────
let mock_provider = MockLlmProvider::new()
.with_default_category("AI News")
.into_arc();
let job_id = uuid::Uuid::new_v4();
let (tx, _rx) = make_progress_channel();
let state = ai_synth_backend::app_state::AppState::new(
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
);
let result = synthesis::run_generation_inner(
job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
).await;
assert!(result.is_ok(), "First generation should succeed: {:?}", result.err());
// Verify synthesis has articles
let synthesis_id = result.unwrap();
let row: (serde_json::Value,) = sqlx::query_as(
"SELECT sections FROM syntheses WHERE id = $1"
)
.bind(synthesis_id)
.fetch_one(&app.pool)
.await
.expect("Synthesis should exist");
let sections: Vec<serde_json::Value> = serde_json::from_value(row.0).unwrap();
assert!(!sections.is_empty(), "Should have at least one section");
// Verify article history has personalized_source entries
let history_count: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND source_type = 'personalized_source'"
)
.bind(user_id)
.bind(job_id)
.fetch_one(&app.pool)
.await
.unwrap();
assert!(history_count.0 > 0, "Should have personalized_source entries in article history");
// ── Key assertion: RSS URL was persisted to the source ────────────
let rss_row: (Option<String>, Option<chrono::DateTime<chrono::Utc>>,) = sqlx::query_as(
"SELECT rss_url, rss_discovered_at FROM sources WHERE user_id = $1 AND url = $2"
)
.bind(user_id)
.bind(&source_url)
.fetch_one(&app.pool)
.await
.expect("Source should exist");
assert!(rss_row.0.is_some(), "rss_url should be set after generation");
assert!(
rss_row.0.as_ref().unwrap().contains("/feed.xml"),
"rss_url should point to the discovered feed: {:?}",
rss_row.0
);
assert!(rss_row.1.is_some(), "rss_discovered_at should be set");
// ── Second generation: uses cached RSS URL ───────────────────────
let mock_provider2 = MockLlmProvider::new()
.with_default_category("AI News")
.into_arc();
let job_id2 = uuid::Uuid::new_v4();
let (tx2, _rx2) = make_progress_channel();
let state2 = ai_synth_backend::app_state::AppState::new(
app.config.clone(), app.pool.clone(), reqwest::Client::new(),
);
let result2 = synthesis::run_generation_inner(
job_id2, &state2, user_id, theme_id, &tx2, Some(mock_provider2), &AtomicBool::new(false),
).await;
// Second run may produce empty synthesis (all articles already in history)
// but should not error
assert!(result2.is_ok(), "Second generation should succeed: {:?}", result2.err());
// RSS URL should still be persisted
let rss_row2: (Option<String>,) = sqlx::query_as(
"SELECT rss_url FROM sources WHERE user_id = $1 AND url = $2"
)
.bind(user_id)
.bind(&source_url)
.fetch_one(&app.pool)
.await
.unwrap();
assert!(rss_row2.0.is_some(), "rss_url should still be set after second generation");
}

Loading…
Cancel
Save