diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs index a6ed0a1..46d4988 100644 --- a/backend/tests/pipeline_test.rs +++ b/backend/tests/pipeline_test.rs @@ -709,3 +709,163 @@ async fn preferred_sources_processed_first() { total_items ); } + +// ── RSS feed integration ───────────────────────────────────────────── + +#[tokio::test] +async fn phase1_rss_feed_extraction_persists_rss_url() { + let app = common::TestApp::new().await; + let server = MockServer::start().await; + let base = server.uri(); + + // Source page with RSS link discovery + Mock::given(method("GET")) + .and(path("/blog-rss")) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#" + +

Blog homepage

"# + ))) + .mount(&server) + .await; + + // RSS feed with 5 articles + Mock::given(method("GET")) + .and(path("/feed.xml")) + .respond_with(ResponseTemplate::new(200).set_body_raw( + format!( + r#" + + + Test Blog + Article 1{base}/article-1Thu, 03 Apr 2026 10:00:00 GMT + Article 2{base}/article-2Wed, 02 Apr 2026 10:00:00 GMT + Article 3{base}/article-3Tue, 01 Apr 2026 10:00:00 GMT + Article 4{base}/article-4Mon, 31 Mar 2026 10:00:00 GMT + Article 5{base}/article-5Sun, 30 Mar 2026 10:00:00 GMT + +"# + ), + "application/rss+xml", + )) + .mount(&server) + .await; + + // Article pages + for i in 1..=5 { + Mock::given(method("GET")) + .and(path(format!("/article-{}", i))) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#" + RSS Article {i} +

This is RSS article {i} about artificial intelligence.

+ "# + ))) + .mount(&server) + .await; + } + + let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 4).await; + + // Add a source pointing to the blog page (not the feed directly) + let source_url = format!("{}/blog-rss", base); + let source = serde_json::json!({ + "title": "RSS Source", + "url": source_url, + "theme_id": theme_id.to_string() + }); + let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await; + assert!(status.is_success(), "Source creation should succeed"); + + // ── First generation: discovers and uses RSS feed ───────────────── + let mock_provider = MockLlmProvider::new() + .with_default_category("AI News") + .into_arc(); + + let job_id = uuid::Uuid::new_v4(); + let (tx, _rx) = make_progress_channel(); + + let state = ai_synth_backend::app_state::AppState::new( + app.config.clone(), app.pool.clone(), reqwest::Client::new(), + ); + + let result = synthesis::run_generation_inner( + job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false), + ).await; + + assert!(result.is_ok(), "First generation should succeed: {:?}", result.err()); + + // Verify synthesis has articles + let synthesis_id = result.unwrap(); + let row: (serde_json::Value,) = sqlx::query_as( + "SELECT sections FROM syntheses WHERE id = $1" + ) + .bind(synthesis_id) + .fetch_one(&app.pool) + .await + .expect("Synthesis should exist"); + + let sections: Vec = serde_json::from_value(row.0).unwrap(); + assert!(!sections.is_empty(), "Should have at least one section"); + + // Verify article history has personalized_source entries + let history_count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND source_type = 'personalized_source'" + ) + .bind(user_id) + .bind(job_id) + .fetch_one(&app.pool) + .await + .unwrap(); + assert!(history_count.0 > 0, "Should have personalized_source entries in article history"); + + // ── Key assertion: RSS URL was persisted to the source ──────────── + let rss_row: (Option, Option>,) = sqlx::query_as( + "SELECT rss_url, rss_discovered_at FROM sources WHERE user_id = $1 AND url = $2" + ) + .bind(user_id) + .bind(&source_url) + .fetch_one(&app.pool) + .await + .expect("Source should exist"); + + assert!(rss_row.0.is_some(), "rss_url should be set after generation"); + assert!( + rss_row.0.as_ref().unwrap().contains("/feed.xml"), + "rss_url should point to the discovered feed: {:?}", + rss_row.0 + ); + assert!(rss_row.1.is_some(), "rss_discovered_at should be set"); + + // ── Second generation: uses cached RSS URL ─────────────────────── + let mock_provider2 = MockLlmProvider::new() + .with_default_category("AI News") + .into_arc(); + + let job_id2 = uuid::Uuid::new_v4(); + let (tx2, _rx2) = make_progress_channel(); + + let state2 = ai_synth_backend::app_state::AppState::new( + app.config.clone(), app.pool.clone(), reqwest::Client::new(), + ); + + let result2 = synthesis::run_generation_inner( + job_id2, &state2, user_id, theme_id, &tx2, Some(mock_provider2), &AtomicBool::new(false), + ).await; + + // Second run may produce empty synthesis (all articles already in history) + // but should not error + assert!(result2.is_ok(), "Second generation should succeed: {:?}", result2.err()); + + // RSS URL should still be persisted + let rss_row2: (Option,) = sqlx::query_as( + "SELECT rss_url FROM sources WHERE user_id = $1 AND url = $2" + ) + .bind(user_id) + .bind(&source_url) + .fetch_one(&app.pool) + .await + .unwrap(); + + assert!(rss_row2.0.is_some(), "rss_url should still be set after second generation"); +}