@ -199,6 +199,66 @@ pub async fn discover_feed(
None
}
/// Detect and parse an RSS/Atom feed for a source URL.
///
/// Orchestrates the discovery and parsing logic:
/// - If `rss_url` is cached and fresh (< 30 days), parse it directly.
/// - If `rss_url` is cached but stale (>= 30 days), re-discover from `source_url`.
/// - If no `rss_url` cached, attempt discovery from `source_url`.
///
/// Returns `FeedResult::Found` with the feed URL and sorted entries,
/// or `FeedResult::NotFound` if no feed could be found/parsed.
pub async fn detect_and_parse_feed (
http_client : & reqwest ::Client ,
source_url : & str ,
rss_url : Option < & str > ,
rss_discovered_at : Option < DateTime < Utc > > ,
max_links : usize ,
) -> FeedResult {
// Case 1: Cached and fresh — use directly
if let Some ( cached_url ) = rss_url {
let is_fresh = rss_discovered_at
. map ( | d | Utc ::now ( ) . signed_duration_since ( d ) . num_days ( ) < REDISCOVERY_DAYS )
. unwrap_or ( false ) ;
if is_fresh {
match parse_feed ( http_client , cached_url , max_links ) . await {
Ok ( entries ) if ! entries . is_empty ( ) = > {
return FeedResult ::Found {
feed_url : cached_url . to_string ( ) ,
entries ,
} ;
}
_ = > {
tracing ::warn ! ( url = cached_url , "Cached feed failed to parse, attempting re-discovery" ) ;
}
}
}
}
// Case 2: No cache or stale — discover
let discovered = discover_feed ( http_client , source_url ) . await ;
if let Some ( feed_url ) = discovered {
match parse_feed ( http_client , & feed_url , max_links ) . await {
Ok ( entries ) if ! entries . is_empty ( ) = > {
return FeedResult ::Found {
feed_url ,
entries ,
} ;
}
Ok ( _ ) = > {
tracing ::info ! ( url = feed_url , "Discovered feed is empty" ) ;
}
Err ( e ) = > {
tracing ::warn ! ( url = feed_url , error = % e , "Discovered feed failed to parse" ) ;
}
}
}
FeedResult ::NotFound
}
#[ cfg(test) ]
mod tests {
use super ::* ;
@ -450,4 +510,158 @@ mod tests {
assert! ( feed_url . starts_with ( & server . uri ( ) ) ) ;
assert! ( feed_url . ends_with ( "/feed.xml" ) ) ;
}
#[ tokio::test ]
async fn detect_and_parse_cached_fresh_feed ( ) {
let server = MockServer ::start ( ) . await ;
let rss_body = r#"<?xml version="1.0" encoding="UTF-8" ? >
< rss version = "2.0" > < channel > < title > T < / title >
< item > < title > A1 < / title > < link > https ://example.com/1</link><pubDate>Thu, 03 Apr 2026 10:00:00 GMT</pubDate></item>
< item > < title > A2 < / title > < link > https ://example.com/2</link><pubDate>Wed, 02 Apr 2026 10:00:00 GMT</pubDate></item>
< item > < title > A3 < / title > < link > https ://example.com/3</link><pubDate>Tue, 01 Apr 2026 10:00:00 GMT</pubDate></item>
< / channel > < / rss > " #;
Mock ::given ( method ( "GET" ) )
. respond_with ( ResponseTemplate ::new ( 200 ) . set_body_raw ( rss_body , "application/rss+xml" ) )
. mount ( & server )
. await ;
let client = reqwest ::Client ::new ( ) ;
let result = detect_and_parse_feed (
& client ,
"https://example.com" ,
Some ( & server . uri ( ) ) ,
Some ( Utc ::now ( ) ) , // fresh
10 ,
) . await ;
match result {
FeedResult ::Found { entries , .. } = > assert_eq! ( entries . len ( ) , 3 ) ,
FeedResult ::NotFound = > panic! ( "Expected Found" ) ,
}
}
#[ tokio::test ]
async fn detect_and_parse_no_cache_discovers_feed ( ) {
let server = MockServer ::start ( ) . await ;
// First request: HTML page with feed link
let feed_path = format! ( "{}/feed.xml" , server . uri ( ) ) ;
let html = format! (
r #" < html > < head >
< link rel = "alternate" type = "application/rss+xml" href = "{}" >
< / head > < body > < / body > < / html > " #,
feed_path
) ;
let rss_body = r#"<?xml version="1.0" encoding="UTF-8" ? >
< rss version = "2.0" > < channel > < title > T < / title >
< item > < title > A1 < / title > < link > https ://example.com/1</link><pubDate>Thu, 03 Apr 2026 10:00:00 GMT</pubDate></item>
< item > < title > A2 < / title > < link > https ://example.com/2</link><pubDate>Wed, 02 Apr 2026 10:00:00 GMT</pubDate></item>
< item > < title > A3 < / title > < link > https ://example.com/3</link><pubDate>Tue, 01 Apr 2026 10:00:00 GMT</pubDate></item>
< / channel > < / rss > " #;
// Mock: source page returns HTML
Mock ::given ( method ( "GET" ) )
. and ( wiremock ::matchers ::path ( "/" ) )
. respond_with ( ResponseTemplate ::new ( 200 ) . set_body_string ( html ) )
. mount ( & server )
. await ;
// Mock: feed URL returns RSS
Mock ::given ( method ( "GET" ) )
. and ( wiremock ::matchers ::path ( "/feed.xml" ) )
. respond_with ( ResponseTemplate ::new ( 200 ) . set_body_raw ( rss_body , "application/rss+xml" ) )
. mount ( & server )
. await ;
let client = reqwest ::Client ::new ( ) ;
let result = detect_and_parse_feed (
& client ,
& server . uri ( ) ,
None , // no cache
None ,
10 ,
) . await ;
match result {
FeedResult ::Found { feed_url , entries } = > {
assert! ( feed_url . contains ( "/feed.xml" ) ) ;
assert_eq! ( entries . len ( ) , 3 ) ;
}
FeedResult ::NotFound = > panic! ( "Expected Found" ) ,
}
}
#[ tokio::test ]
async fn detect_and_parse_no_feed_returns_not_found ( ) {
let server = MockServer ::start ( ) . await ;
let html = "<html><head><title>No feed</title></head><body></body></html>" ;
Mock ::given ( method ( "GET" ) )
. respond_with ( ResponseTemplate ::new ( 200 ) . set_body_string ( html ) )
. mount ( & server )
. await ;
let client = reqwest ::Client ::new ( ) ;
let result = detect_and_parse_feed (
& client ,
& server . uri ( ) ,
None ,
None ,
10 ,
) . await ;
assert! ( matches! ( result , FeedResult ::NotFound ) ) ;
}
#[ tokio::test ]
async fn detect_and_parse_stale_cache_rediscovers ( ) {
let server = MockServer ::start ( ) . await ;
let feed_path = format! ( "{}/feed.xml" , server . uri ( ) ) ;
let html = format! (
r #" < html > < head >
< link rel = "alternate" type = "application/rss+xml" href = "{}" >
< / head > < body > < / body > < / html > " #,
feed_path
) ;
let rss_body = r#"<?xml version="1.0" encoding="UTF-8" ? >
< rss version = "2.0" > < channel > < title > T < / title >
< item > < title > A1 < / title > < link > https ://example.com/1</link><pubDate>Thu, 03 Apr 2026 10:00:00 GMT</pubDate></item>
< item > < title > A2 < / title > < link > https ://example.com/2</link><pubDate>Wed, 02 Apr 2026 10:00:00 GMT</pubDate></item>
< item > < title > A3 < / title > < link > https ://example.com/3</link><pubDate>Tue, 01 Apr 2026 10:00:00 GMT</pubDate></item>
< / channel > < / rss > " #;
Mock ::given ( method ( "GET" ) )
. and ( wiremock ::matchers ::path ( "/" ) )
. respond_with ( ResponseTemplate ::new ( 200 ) . set_body_string ( html ) )
. mount ( & server )
. await ;
Mock ::given ( method ( "GET" ) )
. and ( wiremock ::matchers ::path ( "/feed.xml" ) )
. respond_with ( ResponseTemplate ::new ( 200 ) . set_body_raw ( rss_body , "application/rss+xml" ) )
. mount ( & server )
. await ;
let client = reqwest ::Client ::new ( ) ;
let stale_date = Utc ::now ( ) - chrono ::Duration ::days ( 31 ) ;
let result = detect_and_parse_feed (
& client ,
& server . uri ( ) ,
Some ( "https://old-feed.example.com/rss" ) , // stale cached URL
Some ( stale_date ) ,
10 ,
) . await ;
match result {
FeedResult ::Found { feed_url , entries } = > {
assert! ( feed_url . contains ( "/feed.xml" ) , "Should discover new feed URL" ) ;
assert_eq! ( entries . len ( ) , 3 ) ;
}
FeedResult ::NotFound = > panic! ( "Expected Found after re-discovery" ) ,
}
}
}