diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index b001ad3..d28e243 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -522,6 +522,24 @@ pub async fn run_generation_inner( } } + let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string()); + + // Articles without any date go to "Articles sans date" category + if llm_date.is_none() { + let llm_title = class_response.get("title").and_then(|t| t.as_str()).unwrap_or(&page_title).to_string(); + let llm_summary = class_response.get("summary").and_then(|s| s.as_str()).unwrap_or("").to_string(); + article_scraped.entry("category_no_date".to_string()).or_default().push(NewsItem { + title: llm_title, + url: final_url.clone(), + summary: llm_summary, + date: None, + }); + + let source_domain = extract_domain(&source_url).unwrap_or_default(); + *source_counts.entry(source_domain).or_insert(0) += 1; + continue; + } + let Some((final_cat_key, final_cat_name, llm_title, llm_summary)) = assign_category( &class_response, &page_title, &user_categories, &classification_categories, &filled_counts, settings.max_items_per_category as usize, @@ -529,7 +547,6 @@ pub async fn run_generation_inner( continue; }; - let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string()); article_scraped.entry(final_cat_key).or_default().push(NewsItem { title: llm_title, url: final_url.clone(), @@ -747,6 +764,25 @@ pub async fn run_generation_inner( } } + let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string()); + + // Articles without any date go to "Articles sans date" category + if llm_date.is_none() { + let llm_title = class_response.get("title").and_then(|t| t.as_str()).unwrap_or(&page_title).to_string(); + let llm_summary = class_response.get("summary").and_then(|s| s.as_str()).unwrap_or("").to_string(); + article_scraped.entry("category_no_date".to_string()).or_default().push(NewsItem { + title: llm_title, + url: final_url.clone(), + summary: llm_summary, + date: None, + }); + + if let Some(domain) = extract_domain(&final_url) { + *source_counts.entry(domain).or_insert(0) += 1; + } + continue; + } + let Some((final_cat_key, final_cat_name, llm_title, llm_summary)) = assign_category( &class_response, &page_title, &user_categories, &classification_categories, &filled_counts, settings.max_items_per_category as usize, @@ -754,7 +790,6 @@ pub async fn run_generation_inner( continue; }; - let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string()); article_scraped.entry(final_cat_key).or_default().push(NewsItem { title: llm_title, url: final_url.clone(), @@ -885,6 +920,11 @@ pub async fn run_generation_inner( final_sections.push(NewsSection { title: "Divers".to_string(), items: autre_items.clone() }); } } + if let Some(no_date_items) = article_scraped.get("category_no_date") { + if !no_date_items.is_empty() { + final_sections.push(NewsSection { title: "Articles sans date".to_string(), items: no_date_items.clone() }); + } + } let sections_json = serde_json::to_value(&final_sections).map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize: {}", e)))?; let sections_json = sanitize_json_null_bytes(sections_json);