diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs
index a6ed0a1..46d4988 100644
--- a/backend/tests/pipeline_test.rs
+++ b/backend/tests/pipeline_test.rs
@@ -709,3 +709,163 @@ async fn preferred_sources_processed_first() {
total_items
);
}
+
+// ── RSS feed integration ─────────────────────────────────────────────
+
+#[tokio::test]
+async fn phase1_rss_feed_extraction_persists_rss_url() {
+ let app = common::TestApp::new().await;
+ let server = MockServer::start().await;
+ let base = server.uri();
+
+ // Source page with RSS link discovery
+ Mock::given(method("GET"))
+ .and(path("/blog-rss"))
+ .respond_with(ResponseTemplate::new(200).set_body_string(format!(
+ r#"
+
+ Blog homepage
"#
+ )))
+ .mount(&server)
+ .await;
+
+ // RSS feed with 5 articles
+ Mock::given(method("GET"))
+ .and(path("/feed.xml"))
+ .respond_with(ResponseTemplate::new(200).set_body_raw(
+ format!(
+ r#"
+
+
+ Test Blog
+ - Article 1{base}/article-1Thu, 03 Apr 2026 10:00:00 GMT
+ - Article 2{base}/article-2Wed, 02 Apr 2026 10:00:00 GMT
+ - Article 3{base}/article-3Tue, 01 Apr 2026 10:00:00 GMT
+ - Article 4{base}/article-4Mon, 31 Mar 2026 10:00:00 GMT
+ - Article 5{base}/article-5Sun, 30 Mar 2026 10:00:00 GMT
+
+"#
+ ),
+ "application/rss+xml",
+ ))
+ .mount(&server)
+ .await;
+
+ // Article pages
+ for i in 1..=5 {
+ Mock::given(method("GET"))
+ .and(path(format!("/article-{}", i)))
+ .respond_with(ResponseTemplate::new(200).set_body_string(format!(
+ r#"
+ RSS Article {i}
+ This is RSS article {i} about artificial intelligence.
+ "#
+ )))
+ .mount(&server)
+ .await;
+ }
+
+ let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 4).await;
+
+ // Add a source pointing to the blog page (not the feed directly)
+ let source_url = format!("{}/blog-rss", base);
+ let source = serde_json::json!({
+ "title": "RSS Source",
+ "url": source_url,
+ "theme_id": theme_id.to_string()
+ });
+ let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
+ assert!(status.is_success(), "Source creation should succeed");
+
+ // ── First generation: discovers and uses RSS feed ─────────────────
+ let mock_provider = MockLlmProvider::new()
+ .with_default_category("AI News")
+ .into_arc();
+
+ let job_id = uuid::Uuid::new_v4();
+ let (tx, _rx) = make_progress_channel();
+
+ let state = ai_synth_backend::app_state::AppState::new(
+ app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+ );
+
+ let result = synthesis::run_generation_inner(
+ job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false),
+ ).await;
+
+ assert!(result.is_ok(), "First generation should succeed: {:?}", result.err());
+
+ // Verify synthesis has articles
+ let synthesis_id = result.unwrap();
+ let row: (serde_json::Value,) = sqlx::query_as(
+ "SELECT sections FROM syntheses WHERE id = $1"
+ )
+ .bind(synthesis_id)
+ .fetch_one(&app.pool)
+ .await
+ .expect("Synthesis should exist");
+
+ let sections: Vec = serde_json::from_value(row.0).unwrap();
+ assert!(!sections.is_empty(), "Should have at least one section");
+
+ // Verify article history has personalized_source entries
+ let history_count: (i64,) = sqlx::query_as(
+ "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND source_type = 'personalized_source'"
+ )
+ .bind(user_id)
+ .bind(job_id)
+ .fetch_one(&app.pool)
+ .await
+ .unwrap();
+ assert!(history_count.0 > 0, "Should have personalized_source entries in article history");
+
+ // ── Key assertion: RSS URL was persisted to the source ────────────
+ let rss_row: (Option, Option>,) = sqlx::query_as(
+ "SELECT rss_url, rss_discovered_at FROM sources WHERE user_id = $1 AND url = $2"
+ )
+ .bind(user_id)
+ .bind(&source_url)
+ .fetch_one(&app.pool)
+ .await
+ .expect("Source should exist");
+
+ assert!(rss_row.0.is_some(), "rss_url should be set after generation");
+ assert!(
+ rss_row.0.as_ref().unwrap().contains("/feed.xml"),
+ "rss_url should point to the discovered feed: {:?}",
+ rss_row.0
+ );
+ assert!(rss_row.1.is_some(), "rss_discovered_at should be set");
+
+ // ── Second generation: uses cached RSS URL ───────────────────────
+ let mock_provider2 = MockLlmProvider::new()
+ .with_default_category("AI News")
+ .into_arc();
+
+ let job_id2 = uuid::Uuid::new_v4();
+ let (tx2, _rx2) = make_progress_channel();
+
+ let state2 = ai_synth_backend::app_state::AppState::new(
+ app.config.clone(), app.pool.clone(), reqwest::Client::new(),
+ );
+
+ let result2 = synthesis::run_generation_inner(
+ job_id2, &state2, user_id, theme_id, &tx2, Some(mock_provider2), &AtomicBool::new(false),
+ ).await;
+
+ // Second run may produce empty synthesis (all articles already in history)
+ // but should not error
+ assert!(result2.is_ok(), "Second generation should succeed: {:?}", result2.err());
+
+ // RSS URL should still be persisted
+ let rss_row2: (Option,) = sqlx::query_as(
+ "SELECT rss_url FROM sources WHERE user_id = $1 AND url = $2"
+ )
+ .bind(user_id)
+ .bind(&source_url)
+ .fetch_one(&app.pool)
+ .await
+ .unwrap();
+
+ assert!(rss_row2.0.is_some(), "rss_url should still be set after second generation");
+}