diff --git a/backend/src/services/feed_parser.rs b/backend/src/services/feed_parser.rs index e6d9943..6234a26 100644 --- a/backend/src/services/feed_parser.rs +++ b/backend/src/services/feed_parser.rs @@ -684,4 +684,71 @@ mod tests { FeedResult::NotFound => panic!("Expected Found after re-discovery"), } } + + #[tokio::test] + async fn full_flow_rss_first_with_html_fallback() { + skip_ssrf_for_test(); + + // Source 1: has an RSS feed with 5 articles + let server1 = MockServer::start().await; + let rss_body = r#" +Blog + A1https://blog.example.com/1Thu, 03 Apr 2026 10:00:00 GMT + A2https://blog.example.com/2Wed, 02 Apr 2026 10:00:00 GMT + A3https://blog.example.com/3Tue, 01 Apr 2026 10:00:00 GMT + A4https://blog.example.com/4Mon, 31 Mar 2026 10:00:00 GMT + A5https://blog.example.com/5Sun, 30 Mar 2026 10:00:00 GMT +"#; + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(200).set_body_raw(rss_body, "application/rss+xml")) + .mount(&server1) + .await; + + let client = reqwest::Client::new(); + + // With cached RSS URL (fresh) — should use RSS directly + let result = detect_and_parse_feed( + &client, + "https://blog.example.com", + Some(&server1.uri()), + Some(Utc::now()), + 10, + ).await; + + match result { + FeedResult::Found { entries, .. } => { + assert_eq!(entries.len(), 5); + // Verify sorted newest first + for i in 0..entries.len() - 1 { + if let (Some(a), Some(b)) = (&entries[i].published_date, &entries[i + 1].published_date) { + assert!(a >= b, "Entries should be sorted newest first"); + } + } + } + FeedResult::NotFound => panic!("Expected Found"), + } + + // Source 2: no RSS feed, only HTML — should return NotFound + let server2 = MockServer::start().await; + let html = r#"No feed + Article 1 + "#; + + Mock::given(method("GET")) + .respond_with(ResponseTemplate::new(200).set_body_string(html)) + .mount(&server2) + .await; + + let result = detect_and_parse_feed( + &client, + &server2.uri(), + None, + None, + 10, + ).await; + + // No feed found — pipeline would fall back to source_scraper + assert!(matches!(result, FeedResult::NotFound)); + } }