test: add end-to-end RSS flow test for feed_parser

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
master
oabrivard 2 months ago
parent 027c576302
commit 7e1ab0996b

@ -684,4 +684,71 @@ mod tests {
FeedResult::NotFound => panic!("Expected Found after re-discovery"),
}
}
#[tokio::test]
async fn full_flow_rss_first_with_html_fallback() {
skip_ssrf_for_test();
// Source 1: has an RSS feed with 5 articles
let server1 = MockServer::start().await;
let rss_body = r#"<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"><channel><title>Blog</title>
<item><title>A1</title><link>https://blog.example.com/1</link><pubDate>Thu, 03 Apr 2026 10:00:00 GMT</pubDate></item>
<item><title>A2</title><link>https://blog.example.com/2</link><pubDate>Wed, 02 Apr 2026 10:00:00 GMT</pubDate></item>
<item><title>A3</title><link>https://blog.example.com/3</link><pubDate>Tue, 01 Apr 2026 10:00:00 GMT</pubDate></item>
<item><title>A4</title><link>https://blog.example.com/4</link><pubDate>Mon, 31 Mar 2026 10:00:00 GMT</pubDate></item>
<item><title>A5</title><link>https://blog.example.com/5</link><pubDate>Sun, 30 Mar 2026 10:00:00 GMT</pubDate></item>
</channel></rss>"#;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(200).set_body_raw(rss_body, "application/rss+xml"))
.mount(&server1)
.await;
let client = reqwest::Client::new();
// With cached RSS URL (fresh) — should use RSS directly
let result = detect_and_parse_feed(
&client,
"https://blog.example.com",
Some(&server1.uri()),
Some(Utc::now()),
10,
).await;
match result {
FeedResult::Found { entries, .. } => {
assert_eq!(entries.len(), 5);
// Verify sorted newest first
for i in 0..entries.len() - 1 {
if let (Some(a), Some(b)) = (&entries[i].published_date, &entries[i + 1].published_date) {
assert!(a >= b, "Entries should be sorted newest first");
}
}
}
FeedResult::NotFound => panic!("Expected Found"),
}
// Source 2: no RSS feed, only HTML — should return NotFound
let server2 = MockServer::start().await;
let html = r#"<html><head><title>No feed</title></head><body>
<a href="/article-1">Article 1</a>
</body></html>"#;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(200).set_body_string(html))
.mount(&server2)
.await;
let result = detect_and_parse_feed(
&client,
&server2.uri(),
None,
None,
10,
).await;
// No feed found — pipeline would fall back to source_scraper
assert!(matches!(result, FeedResult::NotFound));
}
}

Loading…
Cancel
Save