@ -316,6 +316,8 @@ async fn run_generation_inner(
let mut filled_counts : HashMap < String , usize > = HashMap ::new ( ) ;
let mut filled_counts : HashMap < String , usize > = HashMap ::new ( ) ;
// Combined scraped articles keyed by category
// Combined scraped articles keyed by category
let mut all_scraped : HashMap < String , Vec < ScrapedNewsItem > > = HashMap ::new ( ) ;
let mut all_scraped : HashMap < String , Vec < ScrapedNewsItem > > = HashMap ::new ( ) ;
// Overflow articles that didn't fit any category (used for fill-up)
let mut all_overflow : Vec < ScrapedNewsItem > = Vec ::new ( ) ;
// Track all URLs seen (for cross-phase dedup)
// Track all URLs seen (for cross-phase dedup)
let mut seen_urls : std ::collections ::HashSet < String > = std ::collections ::HashSet ::new ( ) ;
let mut seen_urls : std ::collections ::HashSet < String > = std ::collections ::HashSet ::new ( ) ;
@ -499,6 +501,8 @@ async fn run_generation_inner(
& mut filled_counts ,
& mut filled_counts ,
) ;
) ;
all_overflow . extend ( phase1_overflow ) ;
// Merge into all_scraped and track URLs
// Merge into all_scraped and track URLs
for ( cat_key , items ) in phase1_classified {
for ( cat_key , items ) in phase1_classified {
for item in & items {
for item in & items {
@ -706,6 +710,8 @@ async fn run_generation_inner(
& mut filled_counts ,
& mut filled_counts ,
) ;
) ;
all_overflow . extend ( phase2_overflow ) ;
// Merge Phase 2 into all_scraped
// Merge Phase 2 into all_scraped
for ( cat_key , items ) in phase2_classified {
for ( cat_key , items ) in phase2_classified {
for item in & items {
for item in & items {
@ -719,6 +725,59 @@ async fn run_generation_inner(
// ═══════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════
// COMBINED REWRITE PASS
// COMBINED REWRITE PASS
// ═══════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════
// Fill-up: if total articles are below 75% of max, expand "Autre" with overflow
let total_articles : usize = all_scraped . values ( ) . map ( | v | v . len ( ) ) . sum ( ) ;
let max_articles = settings . categories . len ( ) * settings . max_items_per_category as usize ;
let target = ( SYNTHESIS_MIN_FILL_RATIO * max_articles as f64 ) . ceil ( ) as usize ;
let shortfall = target . saturating_sub ( total_articles ) ;
if shortfall > 0 & & ! all_overflow . is_empty ( ) {
tracing ::info ! (
total = total_articles ,
target = target ,
shortfall = shortfall ,
overflow_available = all_overflow . len ( ) ,
"Synthesis under-filled, adding overflow to Autre"
) ;
// Count domain occurrences across all categories for source diversity enforcement
let mut domain_counts : HashMap < String , usize > = HashMap ::new ( ) ;
for items in all_scraped . values ( ) {
for item in items {
if let Some ( domain ) = extract_domain ( & item . url ) {
* domain_counts . entry ( domain ) . or_insert ( 0 ) + = 1 ;
}
}
}
let max_per_source = settings . max_articles_per_source as usize ;
let mut added = 0 usize ;
for article in all_overflow {
if added > = shortfall {
break ;
}
// Enforce source diversity on overflow articles
if let Some ( domain ) = extract_domain ( & article . url ) {
let count = domain_counts . get ( & domain ) . copied ( ) . unwrap_or ( 0 ) ;
if count > = max_per_source {
continue ;
}
* domain_counts . entry ( domain ) . or_insert ( 0 ) + = 1 ;
}
all_scraped
. entry ( "category_autre" . to_string ( ) )
. or_default ( )
. push ( article ) ;
added + = 1 ;
}
if added > 0 {
tracing ::info ! ( added = added , "Added overflow articles to Autre" ) ;
}
}
if all_scraped . values ( ) . all ( | items | items . is_empty ( ) ) {
if all_scraped . values ( ) . all ( | items | items . is_empty ( ) ) {
return Err ( AppError ::BadRequest (
return Err ( AppError ::BadRequest (
"Aucun article valide trouve. Verifiez vos sources et categories." . into ( ) ,
"Aucun article valide trouve. Verifiez vos sources et categories." . into ( ) ,
@ -2571,4 +2630,42 @@ mod tests {
let h2 = hash_article_url ( "https://example.com/article-2" ) ;
let h2 = hash_article_url ( "https://example.com/article-2" ) ;
assert_ne! ( h1 , h2 ) ;
assert_ne! ( h1 , h2 ) ;
}
}
// ── fill-up calculation tests ───────────────────────────────
#[ test ]
fn fillup_target_calculation ( ) {
// 4 categories x 4 items = 16 max, 75% = 12
let max = 4 * 4 ;
let target = ( 0.75_ f64 * max as f64 ) . ceil ( ) as usize ;
assert_eq! ( target , 12 ) ;
}
#[ test ]
fn fillup_shortfall_saturating ( ) {
let target : usize = 12 ;
let total : usize = 15 ;
let shortfall = target . saturating_sub ( total ) ;
assert_eq! ( shortfall , 0 ) ;
}
#[ test ]
fn classification_overflow_collected_when_all_full ( ) {
use crate ::models ::synthesis ::ScrapedNewsItem ;
let articles : Vec < ScrapedNewsItem > = ( 0 .. 6 ) . map ( | i | ScrapedNewsItem {
title : format ! ( "Art{}" , i ) , url : format ! ( "https://a.com/{}" , i ) ,
summary : "s" . into ( ) , original_title : "t" . into ( ) , scraped_content : "c" . into ( ) ,
} ) . collect ( ) ;
let categories = vec! [ "AI News" . to_string ( ) , "Autre" . to_string ( ) ] ;
let response = serde_json ::json ! ( {
"assignments" : ( 0 .. 6 ) . map ( | i | serde_json ::json ! ( { "index" : i , "category" : "AI News" } ) ) . collect ::< Vec < _ > > ( )
} ) ;
let mut filled = HashMap ::new ( ) ;
let ( result , overflow ) = parse_classification_response ( & response , & articles , & categories , 2 , & mut filled ) ;
// AI News capped at 2, Autre gets 2, remaining 2 go to overflow
assert_eq! ( result . get ( "category_0" ) . map ( | v | v . len ( ) ) , Some ( 2 ) ) ;
assert_eq! ( result . get ( "category_autre" ) . map ( | v | v . len ( ) ) , Some ( 2 ) ) ;
assert_eq! ( overflow . len ( ) , 2 , "2 articles should overflow when both categories are full" ) ;
}
}
}