@ -375,7 +375,11 @@ async fn run_generation_inner(
. await ? ;
emit_progress ( tx , "finalizing" , "Finalisation..." , 90 ) ;
let final_sections = build_final_sections ( & final_results , & settings . categories ) ? ;
let mut final_sections = build_final_sections ( & final_results , & settings . categories ) ? ;
// Restore validated URLs from scraped data — the LLM rewrite pass may
// hallucinate different URLs despite being told to preserve them.
restore_scraped_urls ( & mut final_sections , & scraped , & settings . categories ) ;
// Step 12: Save synthesis to DB
emit_progress ( tx , "saving" , "Sauvegarde de la synthese..." , 95 ) ;
@ -884,6 +888,37 @@ fn build_final_sections(
Ok ( sections )
}
/// Restore validated URLs from scraped data into the final sections.
///
/// The LLM rewrite pass may hallucinate different URLs despite being
/// instructed to preserve them. This function replaces each article's URL
/// with the original scraped URL by matching on position (category index,
/// item index within category).
fn restore_scraped_urls (
sections : & mut [ NewsSection ] ,
scraped : & std ::collections ::HashMap < String , Vec < ScrapedNewsItem > > ,
categories : & [ String ] ,
) {
for ( i , section ) in sections . iter_mut ( ) . enumerate ( ) {
let key = format! ( "category_{}" , i ) ;
if let Some ( scraped_items ) = scraped . get ( & key ) {
for ( j , item ) in section . items . iter_mut ( ) . enumerate ( ) {
if let Some ( scraped_item ) = scraped_items . get ( j ) {
if item . url ! = scraped_item . url {
tracing ::debug ! (
category = % categories . get ( i ) . unwrap_or ( & key ) ,
original = % scraped_item . url ,
hallucinated = % item . url ,
"Restored hallucinated URL to scraped original"
) ;
item . url = scraped_item . url . clone ( ) ;
}
}
}
}
}
}
/// Minimum ratio of valid URLs (starting with `http`) required to skip the
/// scrape+rewrite pass and use the search pass results directly.
const URL_QUALITY_THRESHOLD : f64 = 0.70 ;