diff --git a/backend/src/services/scraper.rs b/backend/src/services/scraper.rs index bc63974..f625949 100644 --- a/backend/src/services/scraper.rs +++ b/backend/src/services/scraper.rs @@ -601,6 +601,35 @@ pub fn parse_date_string(s: &str) -> Option> { return Some(naive.and_utc()); } + // Try European/French formats: "25/03/2026", "25-03-2026" + for fmt in &["%d/%m/%Y", "%d-%m-%Y"] { + if let Ok(naive) = NaiveDate::parse_from_str(s, fmt) { + return naive.and_hms_opt(0, 0, 0).map(|ndt| ndt.and_utc()); + } + } + + // Try US format: "03/25/2026", "March 25, 2026" + for fmt in &["%m/%d/%Y", "%B %d, %Y", "%b %d, %Y"] { + if let Ok(naive) = NaiveDate::parse_from_str(s, fmt) { + return naive.and_hms_opt(0, 0, 0).map(|ndt| ndt.and_utc()); + } + } + + // Try French format: "25 mars 2026", "25 mar 2026" + // chrono doesn't parse French month names natively, so we translate first + let lowered = s.to_lowercase(); + let english = lowered + .replace("janvier", "January").replace("février", "February").replace("fevrier", "February") + .replace("mars", "March").replace("avril", "April").replace("mai", "May") + .replace("juin", "June").replace("juillet", "July").replace("août", "August").replace("aout", "August") + .replace("septembre", "September").replace("octobre", "October") + .replace("novembre", "November").replace("décembre", "December").replace("decembre", "December"); + for fmt in &["%d %B %Y", "%d %b %Y"] { + if let Ok(naive) = NaiveDate::parse_from_str(&english, fmt) { + return naive.and_hms_opt(0, 0, 0).map(|ndt| ndt.and_utc()); + } + } + None } @@ -1130,6 +1159,41 @@ mod tests { assert!(parse_date_string("").is_none()); } + #[test] + fn test_parse_european_date_slash() { + let dt = parse_date_string("25/03/2026"); + assert!(dt.is_some()); + assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25"); + } + + #[test] + fn test_parse_european_date_dash() { + let dt = parse_date_string("25-03-2026"); + assert!(dt.is_some()); + assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25"); + } + + #[test] + fn test_parse_us_date_long_month() { + let dt = parse_date_string("March 25, 2026"); + assert!(dt.is_some()); + assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25"); + } + + #[test] + fn test_parse_french_date() { + let dt = parse_date_string("25 mars 2026"); + assert!(dt.is_some()); + assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25"); + } + + #[test] + fn test_parse_french_date_accent() { + let dt = parse_date_string("15 février 2026"); + assert!(dt.is_some()); + assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-02-15"); + } + // ── Scheme Validation ─────────────────────────────────────────── #[test] diff --git a/backend/src/services/synthesis/mod.rs b/backend/src/services/synthesis/mod.rs index 9a67df9..2c018d3 100644 --- a/backend/src/services/synthesis/mod.rs +++ b/backend/src/services/synthesis/mod.rs @@ -576,11 +576,6 @@ pub async fn run_generation_inner( final_sections.push(NewsSection { title: "Divers".to_string(), items: autre_items.clone() }); } } - if let Some(no_date_items) = article_scraped.get("category_no_date") { - if !no_date_items.is_empty() { - final_sections.push(NewsSection { title: "Articles sans date".to_string(), items: no_date_items.clone() }); - } - } let sections_json = serde_json::to_value(&final_sections).map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize: {}", e)))?; let sections_json = sanitize_json_null_bytes(sections_json); @@ -789,23 +784,6 @@ async fn scrape_and_classify_batch( .and_then(extract_domain) .or_else(|| extract_domain(&final_url)); - // Articles without any date go to "Articles sans date" category - if llm_date.is_none() { - let llm_title = class_response.get("title").and_then(|t| t.as_str()).unwrap_or(&page_title).to_string(); - let llm_summary = class_response.get("summary").and_then(|s| s.as_str()).unwrap_or("").to_string(); - article_scraped.entry("category_no_date".to_string()).or_default().push(NewsItem { - title: llm_title, - url: final_url.clone(), - summary: llm_summary, - date: None, - }); - - if let Some(domain) = count_domain { - *source_counts.entry(domain).or_insert(0) += 1; - } - continue; - } - let Some((final_cat_key, final_cat_name, llm_title, llm_summary)) = assign_category( &class_response, &page_title, ctx.user_categories, ctx.classification_categories, filled_counts, ctx.max_items_per_category,