@ -522,6 +522,24 @@ pub async fn run_generation_inner(
}
}
}
}
let llm_date = class_response . get ( "date" ) . and_then ( | d | d . as_str ( ) ) . filter ( | s | ! s . is_empty ( ) ) . map ( | s | s . to_string ( ) ) ;
// Articles without any date go to "Articles sans date" category
if llm_date . is_none ( ) {
let llm_title = class_response . get ( "title" ) . and_then ( | t | t . as_str ( ) ) . unwrap_or ( & page_title ) . to_string ( ) ;
let llm_summary = class_response . get ( "summary" ) . and_then ( | s | s . as_str ( ) ) . unwrap_or ( "" ) . to_string ( ) ;
article_scraped . entry ( "category_no_date" . to_string ( ) ) . or_default ( ) . push ( NewsItem {
title : llm_title ,
url : final_url . clone ( ) ,
summary : llm_summary ,
date : None ,
} ) ;
let source_domain = extract_domain ( & source_url ) . unwrap_or_default ( ) ;
* source_counts . entry ( source_domain ) . or_insert ( 0 ) + = 1 ;
continue ;
}
let Some ( ( final_cat_key , final_cat_name , llm_title , llm_summary ) ) = assign_category (
let Some ( ( final_cat_key , final_cat_name , llm_title , llm_summary ) ) = assign_category (
& class_response , & page_title , & user_categories , & classification_categories ,
& class_response , & page_title , & user_categories , & classification_categories ,
& filled_counts , settings . max_items_per_category as usize ,
& filled_counts , settings . max_items_per_category as usize ,
@ -529,7 +547,6 @@ pub async fn run_generation_inner(
continue ;
continue ;
} ;
} ;
let llm_date = class_response . get ( "date" ) . and_then ( | d | d . as_str ( ) ) . filter ( | s | ! s . is_empty ( ) ) . map ( | s | s . to_string ( ) ) ;
article_scraped . entry ( final_cat_key ) . or_default ( ) . push ( NewsItem {
article_scraped . entry ( final_cat_key ) . or_default ( ) . push ( NewsItem {
title : llm_title ,
title : llm_title ,
url : final_url . clone ( ) ,
url : final_url . clone ( ) ,
@ -747,6 +764,25 @@ pub async fn run_generation_inner(
}
}
}
}
let llm_date = class_response . get ( "date" ) . and_then ( | d | d . as_str ( ) ) . filter ( | s | ! s . is_empty ( ) ) . map ( | s | s . to_string ( ) ) ;
// Articles without any date go to "Articles sans date" category
if llm_date . is_none ( ) {
let llm_title = class_response . get ( "title" ) . and_then ( | t | t . as_str ( ) ) . unwrap_or ( & page_title ) . to_string ( ) ;
let llm_summary = class_response . get ( "summary" ) . and_then ( | s | s . as_str ( ) ) . unwrap_or ( "" ) . to_string ( ) ;
article_scraped . entry ( "category_no_date" . to_string ( ) ) . or_default ( ) . push ( NewsItem {
title : llm_title ,
url : final_url . clone ( ) ,
summary : llm_summary ,
date : None ,
} ) ;
if let Some ( domain ) = extract_domain ( & final_url ) {
* source_counts . entry ( domain ) . or_insert ( 0 ) + = 1 ;
}
continue ;
}
let Some ( ( final_cat_key , final_cat_name , llm_title , llm_summary ) ) = assign_category (
let Some ( ( final_cat_key , final_cat_name , llm_title , llm_summary ) ) = assign_category (
& class_response , & page_title , & user_categories , & classification_categories ,
& class_response , & page_title , & user_categories , & classification_categories ,
& filled_counts , settings . max_items_per_category as usize ,
& filled_counts , settings . max_items_per_category as usize ,
@ -754,7 +790,6 @@ pub async fn run_generation_inner(
continue ;
continue ;
} ;
} ;
let llm_date = class_response . get ( "date" ) . and_then ( | d | d . as_str ( ) ) . filter ( | s | ! s . is_empty ( ) ) . map ( | s | s . to_string ( ) ) ;
article_scraped . entry ( final_cat_key ) . or_default ( ) . push ( NewsItem {
article_scraped . entry ( final_cat_key ) . or_default ( ) . push ( NewsItem {
title : llm_title ,
title : llm_title ,
url : final_url . clone ( ) ,
url : final_url . clone ( ) ,
@ -885,6 +920,11 @@ pub async fn run_generation_inner(
final_sections . push ( NewsSection { title : "Divers" . to_string ( ) , items : autre_items . clone ( ) } ) ;
final_sections . push ( NewsSection { title : "Divers" . to_string ( ) , items : autre_items . clone ( ) } ) ;
}
}
}
}
if let Some ( no_date_items ) = article_scraped . get ( "category_no_date" ) {
if ! no_date_items . is_empty ( ) {
final_sections . push ( NewsSection { title : "Articles sans date" . to_string ( ) , items : no_date_items . clone ( ) } ) ;
}
}
let sections_json = serde_json ::to_value ( & final_sections ) . map_err ( | e | AppError ::Internal ( anyhow ::anyhow ! ( "Failed to serialize: {}" , e ) ) ) ? ;
let sections_json = serde_json ::to_value ( & final_sections ) . map_err ( | e | AppError ::Internal ( anyhow ::anyhow ! ( "Failed to serialize: {}" , e ) ) ) ? ;
let sections_json = sanitize_json_null_bytes ( sections_json ) ;
let sections_json = sanitize_json_null_bytes ( sections_json ) ;