@ -281,32 +281,79 @@ async fn run_generation_inner(
let last_source = db ::article_history ::get_last_source_url ( & state . pool , user_id ) . await . unwrap_or ( None ) ;
let last_source = db ::article_history ::get_last_source_url ( & state . pool , user_id ) . await . unwrap_or ( None ) ;
let rotated_sources = rotate_sources ( sources . clone ( ) , last_source . as_deref ( ) ) ;
let rotated_sources = rotate_sources ( sources . clone ( ) , last_source . as_deref ( ) ) ;
let max_sources = rotated_sources . len ( ) . min ( 10 ) ;
let max_links = 15 usize ;
let max_links = 10 usize ;
// 1a. Extract article links + filter against history
// 1a. Extract article links from source pages (parallel, max 5 concurrent)
let mut candidate_urls : Vec < ( String , String ) > = Vec ::new ( ) ; // (article_url, source_url)
let mut candidate_urls : Vec < ( String , String ) > = Vec ::new ( ) ;
{
for source in rotated_sources . iter ( ) . take ( max_sources ) {
let mut join_set = tokio ::task ::JoinSet ::new ( ) ;
let links = if settings . use_llm_for_source_links {
let mut pending = rotated_sources . iter ( ) . peekable ( ) ;
let max_concurrent = 5 ;
// Seed initial tasks
for _ in 0 .. max_concurrent {
if let Some ( source ) = pending . next ( ) {
let client = state . http_client . clone ( ) ;
let source_url = source . url . clone ( ) ;
let source_title = source . title . clone ( ) ;
let use_llm = settings . use_llm_for_source_links ;
let provider_clone = std ::sync ::Arc ::clone ( & provider ) ;
let model = model_research . clone ( ) ;
let max_l = max_links ;
join_set . spawn ( async move {
let links = if use_llm {
source_scraper ::extract_article_links_with_llm (
source_scraper ::extract_article_links_with_llm (
& state . http_client , & source . url , max_links , & provider , & model_research ,
& client, & source _url, max_l , & provider_clone , & model ,
) . await
) . await
} else {
} else {
source_scraper ::extract_article_links (
source_scraper ::extract_article_links (
& state . http_client , & source . url , max_links ,
& client, & source _url, max_l ,
) . await
) . await
} ;
} ;
( source_url , source_title , links )
} ) ;
}
}
if let Ok ( links ) = links {
while let Some ( join_result ) = join_set . join_next ( ) . await {
tracing ::info ! ( source = % source . title , links = links . len ( ) , "Extracted links from source" ) ;
if let Ok ( ( source_url , source_title , links_result ) ) = join_result {
match links_result {
Ok ( links ) = > {
tracing ::info ! ( source = % source_title , links = links . len ( ) , "Extracted links from source" ) ;
for link in links {
for link in links {
if seen_urls . insert ( link . to_lowercase ( ) ) {
if seen_urls . insert ( link . to_lowercase ( ) ) {
candidate_urls . push ( ( link , source . url . clone ( ) ) ) ;
candidate_urls . push ( ( link , source_url . clone ( ) ) ) ;
}
}
}
Err ( e ) = > {
tracing ::warn ! ( source = % source_title , error = % e , "Failed to extract links" ) ;
}
}
}
}
}
} else if let Err ( e ) = links {
tracing ::warn ! ( source = % source . title , error = % e , "Failed to extract links" ) ;
// Spawn next task
if let Some ( source ) = pending . next ( ) {
let client = state . http_client . clone ( ) ;
let source_url = source . url . clone ( ) ;
let source_title = source . title . clone ( ) ;
let use_llm = settings . use_llm_for_source_links ;
let provider_clone = std ::sync ::Arc ::clone ( & provider ) ;
let model = model_research . clone ( ) ;
let max_l = max_links ;
join_set . spawn ( async move {
let links = if use_llm {
source_scraper ::extract_article_links_with_llm (
& client , & source_url , max_l , & provider_clone , & model ,
) . await
} else {
source_scraper ::extract_article_links (
& client , & source_url , max_l ,
) . await
} ;
( source_url , source_title , links )
} ) ;
}
}
}
}
}
@ -324,6 +371,10 @@ async fn run_generation_inner(
}
}
}
}
// Shuffle candidates to interleave articles from different sources
use rand ::seq ::SliceRandom ;
candidate_urls . shuffle ( & mut rand ::thread_rng ( ) ) ;
// Track url -> source
// Track url -> source
for ( url , source_url ) in & candidate_urls {
for ( url , source_url ) in & candidate_urls {
url_source . insert ( url . clone ( ) , source_url . clone ( ) ) ;
url_source . insert ( url . clone ( ) , source_url . clone ( ) ) ;