feat: add French/European/US date formats + remove "Articles sans date" category

Date parser now supports: 25/03/2026, 25-03-2026, March 25 2026,
25 mars 2026, 15 février 2026, and short month variants.

Articles without dates are no longer routed to a separate category —
they stay in their LLM-classified category with date shown as empty.
This prevents losing good articles in a catch-all section.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 2 months ago
parent 42ced9cfee
commit 9a310bbf19

@ -601,6 +601,35 @@ pub fn parse_date_string(s: &str) -> Option<DateTime<Utc>> {
return Some(naive.and_utc());
}
// Try European/French formats: "25/03/2026", "25-03-2026"
for fmt in &["%d/%m/%Y", "%d-%m-%Y"] {
if let Ok(naive) = NaiveDate::parse_from_str(s, fmt) {
return naive.and_hms_opt(0, 0, 0).map(|ndt| ndt.and_utc());
}
}
// Try US format: "03/25/2026", "March 25, 2026"
for fmt in &["%m/%d/%Y", "%B %d, %Y", "%b %d, %Y"] {
if let Ok(naive) = NaiveDate::parse_from_str(s, fmt) {
return naive.and_hms_opt(0, 0, 0).map(|ndt| ndt.and_utc());
}
}
// Try French format: "25 mars 2026", "25 mar 2026"
// chrono doesn't parse French month names natively, so we translate first
let lowered = s.to_lowercase();
let english = lowered
.replace("janvier", "January").replace("février", "February").replace("fevrier", "February")
.replace("mars", "March").replace("avril", "April").replace("mai", "May")
.replace("juin", "June").replace("juillet", "July").replace("août", "August").replace("aout", "August")
.replace("septembre", "September").replace("octobre", "October")
.replace("novembre", "November").replace("décembre", "December").replace("decembre", "December");
for fmt in &["%d %B %Y", "%d %b %Y"] {
if let Ok(naive) = NaiveDate::parse_from_str(&english, fmt) {
return naive.and_hms_opt(0, 0, 0).map(|ndt| ndt.and_utc());
}
}
None
}
@ -1130,6 +1159,41 @@ mod tests {
assert!(parse_date_string("").is_none());
}
#[test]
fn test_parse_european_date_slash() {
let dt = parse_date_string("25/03/2026");
assert!(dt.is_some());
assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25");
}
#[test]
fn test_parse_european_date_dash() {
let dt = parse_date_string("25-03-2026");
assert!(dt.is_some());
assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25");
}
#[test]
fn test_parse_us_date_long_month() {
let dt = parse_date_string("March 25, 2026");
assert!(dt.is_some());
assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25");
}
#[test]
fn test_parse_french_date() {
let dt = parse_date_string("25 mars 2026");
assert!(dt.is_some());
assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-03-25");
}
#[test]
fn test_parse_french_date_accent() {
let dt = parse_date_string("15 février 2026");
assert!(dt.is_some());
assert_eq!(dt.unwrap().format("%Y-%m-%d").to_string(), "2026-02-15");
}
// ── Scheme Validation ───────────────────────────────────────────
#[test]

@ -576,11 +576,6 @@ pub async fn run_generation_inner(
final_sections.push(NewsSection { title: "Divers".to_string(), items: autre_items.clone() });
}
}
if let Some(no_date_items) = article_scraped.get("category_no_date") {
if !no_date_items.is_empty() {
final_sections.push(NewsSection { title: "Articles sans date".to_string(), items: no_date_items.clone() });
}
}
let sections_json = serde_json::to_value(&final_sections).map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize: {}", e)))?;
let sections_json = sanitize_json_null_bytes(sections_json);
@ -789,23 +784,6 @@ async fn scrape_and_classify_batch(
.and_then(extract_domain)
.or_else(|| extract_domain(&final_url));
// Articles without any date go to "Articles sans date" category
if llm_date.is_none() {
let llm_title = class_response.get("title").and_then(|t| t.as_str()).unwrap_or(&page_title).to_string();
let llm_summary = class_response.get("summary").and_then(|s| s.as_str()).unwrap_or("").to_string();
article_scraped.entry("category_no_date".to_string()).or_default().push(NewsItem {
title: llm_title,
url: final_url.clone(),
summary: llm_summary,
date: None,
});
if let Some(domain) = count_domain {
*source_counts.entry(domain).or_insert(0) += 1;
}
continue;
}
let Some((final_cat_key, final_cat_name, llm_title, llm_summary)) = assign_category(
&class_response, &page_title, ctx.user_categories, ctx.classification_categories,
filled_counts, ctx.max_items_per_category,

Loading…
Cancel
Save