From c5a56c8fb88410b09967fc784f78d998f7a2513f Mon Sep 17 00:00:00 2001 From: oabrivard Date: Thu, 26 Mar 2026 15:56:30 +0100 Subject: [PATCH] feat: save publication date in article history and show in synthesis - Add published_date column to article_history table - Add date field to NewsItem (serialized in synthesis JSONB) - Pass LLM-extracted date through ArticleTrace to article history - Display date below article title in SynthesisDetail page Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 2 +- ...24_add_published_date_to_article_history.sql | 2 ++ backend/src/db/article_history.rs | 12 ++++++++---- backend/src/models/synthesis.rs | 5 +++++ backend/src/services/email.rs | 4 ++++ backend/src/services/export.rs | 3 +++ backend/src/services/synthesis.rs | 17 +++++++++++++++++ frontend/src/pages/SynthesisDetail.tsx | 3 +++ frontend/src/types.ts | 1 + 9 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 backend/migrations/20260326000024_add_published_date_to_article_history.sql diff --git a/CLAUDE.md b/CLAUDE.md index 7e67899..2d6ac1e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit - `GET /api/v1/admin/users` — user list - `PUT /api/v1/admin/users/:id/role` — role management -## Database (23 migrations) +## Database (24 migrations) Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log` ## Environment Variables diff --git a/backend/migrations/20260326000024_add_published_date_to_article_history.sql b/backend/migrations/20260326000024_add_published_date_to_article_history.sql new file mode 100644 index 0000000..ab7ec15 --- /dev/null +++ b/backend/migrations/20260326000024_add_published_date_to_article_history.sql @@ -0,0 +1,2 @@ +-- Add published_date to article_history for LLM-extracted dates +ALTER TABLE article_history ADD COLUMN published_date TEXT; diff --git a/backend/src/db/article_history.rs b/backend/src/db/article_history.rs index 305a339..42748c2 100644 --- a/backend/src/db/article_history.rs +++ b/backend/src/db/article_history.rs @@ -22,6 +22,7 @@ pub struct ArticleHistoryEntry { pub status: String, pub scraped_ok: bool, pub job_id: Uuid, + pub published_date: Option, } /// Row returned from article_history queries. @@ -106,11 +107,12 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry] let statuses: Vec<&str> = entries.iter().map(|e| e.status.as_str()).collect(); let scraped_oks: Vec = entries.iter().map(|e| e.scraped_ok).collect(); let job_ids: Vec = entries.iter().map(|e| e.job_id).collect(); + let published_dates: Vec> = entries.iter().map(|e| e.published_date.as_deref()).collect(); sqlx::query( r#" - INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id) - SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[]) + INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date) + SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[], $12::text[]) "#, ) .bind(&user_ids) @@ -124,6 +126,7 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry] .bind(&statuses) .bind(&scraped_oks) .bind(&job_ids) + .bind(&published_dates) .execute(pool) .await?; @@ -134,8 +137,8 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry] pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> { sqlx::query( r#" - INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) "#, ) .bind(entry.user_id) @@ -149,6 +152,7 @@ pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result< .bind(&entry.status) .bind(entry.scraped_ok) .bind(entry.job_id) + .bind(&entry.published_date) .execute(pool) .await?; Ok(()) diff --git a/backend/src/models/synthesis.rs b/backend/src/models/synthesis.rs index 8c21261..5a60de7 100644 --- a/backend/src/models/synthesis.rs +++ b/backend/src/models/synthesis.rs @@ -14,6 +14,8 @@ pub struct NewsItem { pub title: String, pub url: String, pub summary: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub date: Option, } /// A named section containing a list of news items. @@ -201,6 +203,7 @@ mod tests { title: "Test Article".into(), url: "https://example.com/article".into(), summary: "A brief summary of the article content.".into(), + date: None, }; let json = serde_json::to_value(&item).unwrap(); @@ -220,11 +223,13 @@ mod tests { title: "Article 1".into(), url: "https://example.com/1".into(), summary: "Summary 1".into(), + date: None, }, NewsItem { title: "Article 2".into(), url: "https://example.com/2".into(), summary: "Summary 2".into(), + date: None, }, ], }; diff --git a/backend/src/services/email.rs b/backend/src/services/email.rs index f4e3f15..ae84aa9 100644 --- a/backend/src/services/email.rs +++ b/backend/src/services/email.rs @@ -278,11 +278,13 @@ mod tests { title: "OpenAI lance GPT-5".into(), url: "https://openai.com/gpt5".into(), summary: "OpenAI a annonce GPT-5.".into(), + date: None, }, NewsItem { title: "Google DeepMind Gemini 3".into(), url: "https://deepmind.google/gemini3".into(), summary: "DeepMind presente Gemini 3.".into(), + date: None, }, ], }, @@ -292,6 +294,7 @@ mod tests { title: "Nouveau papier RLHF".into(), url: "https://arxiv.org/abs/2026.12345".into(), summary: "Approche RLHF prometteuse.".into(), + date: None, }], }, ] @@ -337,6 +340,7 @@ mod tests { title: "Title with \"quotes\" & ".into(), url: "https://example.com/test?a=1&b=2".into(), summary: "Summary with bold attempt.".into(), + date: None, }], }]; diff --git a/backend/src/services/export.rs b/backend/src/services/export.rs index 9f1b4c8..9c030c2 100644 --- a/backend/src/services/export.rs +++ b/backend/src/services/export.rs @@ -335,12 +335,14 @@ mod tests { summary: "OpenAI a annonce la sortie de GPT-5 avec des capacites ameliorees." .into(), + date: None, }, NewsItem { title: "Google DeepMind publie Gemini 3".into(), url: "https://deepmind.google/gemini3".into(), summary: "DeepMind presente Gemini 3, son nouveau modele multimodal.".into(), + date: None, }, ], }, @@ -350,6 +352,7 @@ mod tests { title: "Nouveau papier sur le RLHF".into(), url: "https://arxiv.org/abs/2026.12345".into(), summary: "Une nouvelle approche du RLHF prometteuse.".into(), + date: None, }], }, ] diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index 49b5c5c..934e3ba 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -386,6 +386,7 @@ pub async fn run_generation_inner( url, title: "", source_type: "personalized_source", source_url: Some(source_url), category: None, synthesis_id: None, status: "filtered_history", scraped_ok: false, + published_date: None, })); } } @@ -429,6 +430,7 @@ pub async fn run_generation_inner( url: &url, title: "", source_type: "personalized_source", source_url: Some(&source_url), category: None, synthesis_id: None, status: "filtered_diversity", scraped_ok: false, + published_date: None, })); continue; } @@ -463,6 +465,7 @@ pub async fn run_generation_inner( url: &final_url, title: &page_title, source_type: "personalized_source", source_url: Some(&source_url), category: None, synthesis_id: None, status: reason, scraped_ok: false, + published_date: None, })); } else { scraped_articles.push((final_url, source_url, body_text, page_title)); @@ -534,6 +537,7 @@ pub async fn run_generation_inner( url: &final_url, title: &page_title, source_type: "personalized_source", source_url: Some(&source_url), category: None, synthesis_id: None, status: "filtered_too_old", scraped_ok: true, + published_date: Some(date_str), })); continue; } @@ -548,10 +552,12 @@ pub async fn run_generation_inner( continue; }; + let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string()); article_scraped.entry(final_cat_key).or_default().push(NewsItem { title: llm_title, url: final_url.clone(), summary: llm_summary, + date: llm_date, }); *filled_counts.entry(final_cat_name).or_insert(0) += 1; @@ -607,6 +613,7 @@ pub async fn run_generation_inner( url: &result.url, title: &result.title, source_type: "brave_search", source_url: None, category: None, synthesis_id: None, status: reason, scraped_ok: false, + published_date: None, })); continue; } @@ -663,6 +670,7 @@ pub async fn run_generation_inner( url: &final_url, title: &page_title, source_type: "brave_search", source_url: None, category: None, synthesis_id: None, status: reason, scraped_ok: false, + published_date: None, })); } else { scraped_articles.push((final_url, body_text, page_title)); @@ -732,6 +740,7 @@ pub async fn run_generation_inner( url: &final_url, title: &page_title, source_type: "brave_search", source_url: None, category: None, synthesis_id: None, status: "filtered_too_old", scraped_ok: true, + published_date: Some(date_str), })); continue; } @@ -746,10 +755,12 @@ pub async fn run_generation_inner( continue; }; + let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string()); article_scraped.entry(final_cat_key).or_default().push(NewsItem { title: llm_title, url: final_url.clone(), summary: llm_summary, + date: llm_date, }); *filled_counts.entry(final_cat_name).or_insert(0) += 1; @@ -803,6 +814,7 @@ pub async fn run_generation_inner( url: &item.url, title: &item.title, source_type: "web_search", source_url: None, category: None, synthesis_id: None, status: reason, scraped_ok: false, + published_date: None, })); continue; } @@ -828,6 +840,7 @@ pub async fn run_generation_inner( url: &final_url, title: &item.title, source_type: "web_search", source_url: None, category: None, synthesis_id: None, status: reason, scraped_ok: false, + published_date: None, })); continue; } @@ -836,6 +849,7 @@ pub async fn run_generation_inner( title: item.title, url: final_url, summary: item.summary, + date: None, }); if let Some(domain) = extract_domain(&item.url) { @@ -891,6 +905,7 @@ pub async fn run_generation_inner( source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None }, category: Some(§ion.title), synthesis_id: Some(synthesis.id), status: "used", scraped_ok: true, + published_date: item.date.as_deref(), })); } } @@ -948,6 +963,7 @@ struct ArticleTrace<'a> { synthesis_id: Option, status: &'a str, scraped_ok: bool, + published_date: Option<&'a str>, } /// Build an article history entry from trace parameters (no DB call). @@ -968,6 +984,7 @@ fn build_trace_entry( status: trace.status.to_string(), scraped_ok: trace.scraped_ok, job_id, + published_date: trace.published_date.map(|s| s.to_string()), } } diff --git a/frontend/src/pages/SynthesisDetail.tsx b/frontend/src/pages/SynthesisDetail.tsx index 231e73b..357b16b 100644 --- a/frontend/src/pages/SynthesisDetail.tsx +++ b/frontend/src/pages/SynthesisDetail.tsx @@ -32,6 +32,9 @@ const NewsItemCard: Component<{ item: NewsItemType }> = (props) => { + +

{props.item.date}

+

{props.item.summary}

diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 659cbba..d9be0e7 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -114,6 +114,7 @@ export interface NewsItem { title: string; url: string; summary: string; + date?: string | null; } export interface NewsSection {