|
|
|
@ -684,4 +684,71 @@ mod tests {
|
|
|
|
FeedResult::NotFound => panic!("Expected Found after re-discovery"),
|
|
|
|
FeedResult::NotFound => panic!("Expected Found after re-discovery"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
|
|
|
async fn full_flow_rss_first_with_html_fallback() {
|
|
|
|
|
|
|
|
skip_ssrf_for_test();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Source 1: has an RSS feed with 5 articles
|
|
|
|
|
|
|
|
let server1 = MockServer::start().await;
|
|
|
|
|
|
|
|
let rss_body = r#"<?xml version="1.0" encoding="UTF-8"?>
|
|
|
|
|
|
|
|
<rss version="2.0"><channel><title>Blog</title>
|
|
|
|
|
|
|
|
<item><title>A1</title><link>https://blog.example.com/1</link><pubDate>Thu, 03 Apr 2026 10:00:00 GMT</pubDate></item>
|
|
|
|
|
|
|
|
<item><title>A2</title><link>https://blog.example.com/2</link><pubDate>Wed, 02 Apr 2026 10:00:00 GMT</pubDate></item>
|
|
|
|
|
|
|
|
<item><title>A3</title><link>https://blog.example.com/3</link><pubDate>Tue, 01 Apr 2026 10:00:00 GMT</pubDate></item>
|
|
|
|
|
|
|
|
<item><title>A4</title><link>https://blog.example.com/4</link><pubDate>Mon, 31 Mar 2026 10:00:00 GMT</pubDate></item>
|
|
|
|
|
|
|
|
<item><title>A5</title><link>https://blog.example.com/5</link><pubDate>Sun, 30 Mar 2026 10:00:00 GMT</pubDate></item>
|
|
|
|
|
|
|
|
</channel></rss>"#;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Mock::given(method("GET"))
|
|
|
|
|
|
|
|
.respond_with(ResponseTemplate::new(200).set_body_raw(rss_body, "application/rss+xml"))
|
|
|
|
|
|
|
|
.mount(&server1)
|
|
|
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let client = reqwest::Client::new();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// With cached RSS URL (fresh) — should use RSS directly
|
|
|
|
|
|
|
|
let result = detect_and_parse_feed(
|
|
|
|
|
|
|
|
&client,
|
|
|
|
|
|
|
|
"https://blog.example.com",
|
|
|
|
|
|
|
|
Some(&server1.uri()),
|
|
|
|
|
|
|
|
Some(Utc::now()),
|
|
|
|
|
|
|
|
10,
|
|
|
|
|
|
|
|
).await;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
match result {
|
|
|
|
|
|
|
|
FeedResult::Found { entries, .. } => {
|
|
|
|
|
|
|
|
assert_eq!(entries.len(), 5);
|
|
|
|
|
|
|
|
// Verify sorted newest first
|
|
|
|
|
|
|
|
for i in 0..entries.len() - 1 {
|
|
|
|
|
|
|
|
if let (Some(a), Some(b)) = (&entries[i].published_date, &entries[i + 1].published_date) {
|
|
|
|
|
|
|
|
assert!(a >= b, "Entries should be sorted newest first");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
FeedResult::NotFound => panic!("Expected Found"),
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Source 2: no RSS feed, only HTML — should return NotFound
|
|
|
|
|
|
|
|
let server2 = MockServer::start().await;
|
|
|
|
|
|
|
|
let html = r#"<html><head><title>No feed</title></head><body>
|
|
|
|
|
|
|
|
<a href="/article-1">Article 1</a>
|
|
|
|
|
|
|
|
</body></html>"#;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Mock::given(method("GET"))
|
|
|
|
|
|
|
|
.respond_with(ResponseTemplate::new(200).set_body_string(html))
|
|
|
|
|
|
|
|
.mount(&server2)
|
|
|
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let result = detect_and_parse_feed(
|
|
|
|
|
|
|
|
&client,
|
|
|
|
|
|
|
|
&server2.uri(),
|
|
|
|
|
|
|
|
None,
|
|
|
|
|
|
|
|
None,
|
|
|
|
|
|
|
|
10,
|
|
|
|
|
|
|
|
).await;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// No feed found — pipeline would fall back to source_scraper
|
|
|
|
|
|
|
|
assert!(matches!(result, FeedResult::NotFound));
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|