From c5a56c8fb88410b09967fc784f78d998f7a2513f Mon Sep 17 00:00:00 2001
From: oabrivard <olivier@abrivard.fr>
Date: Thu, 26 Mar 2026 15:56:30 +0100
Subject: [PATCH] feat: save publication date in article history and show in
 synthesis

- Add published_date column to article_history table
- Add date field to NewsItem (serialized in synthesis JSONB)
- Pass LLM-extracted date through ArticleTrace to article history
- Display date below article title in SynthesisDetail page

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                       |  2 +-
 ...24_add_published_date_to_article_history.sql |  2 ++
 backend/src/db/article_history.rs               | 12 ++++++++----
 backend/src/models/synthesis.rs                 |  5 +++++
 backend/src/services/email.rs                   |  4 ++++
 backend/src/services/export.rs                  |  3 +++
 backend/src/services/synthesis.rs               | 17 +++++++++++++++++
 frontend/src/pages/SynthesisDetail.tsx          |  3 +++
 frontend/src/types.ts                           |  1 +
 9 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 backend/migrations/20260326000024_add_published_date_to_article_history.sql
diff --git a/CLAUDE.md b/CLAUDE.md
index 7e67899..2d6ac1e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit
 - `GET /api/v1/admin/users` — user list
 - `PUT /api/v1/admin/users/:id/role` — role management
 
-## Database (23 migrations)
+## Database (24 migrations)
 Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log`
 
 ## Environment Variables
diff --git a/backend/migrations/20260326000024_add_published_date_to_article_history.sql b/backend/migrations/20260326000024_add_published_date_to_article_history.sql
new file mode 100644
index 0000000..ab7ec15
--- /dev/null
+++ b/backend/migrations/20260326000024_add_published_date_to_article_history.sql
@@ -0,0 +1,2 @@
+-- Add published_date to article_history for LLM-extracted dates
+ALTER TABLE article_history ADD COLUMN published_date TEXT;
diff --git a/backend/src/db/article_history.rs b/backend/src/db/article_history.rs
index 305a339..42748c2 100644
--- a/backend/src/db/article_history.rs
+++ b/backend/src/db/article_history.rs
@@ -22,6 +22,7 @@ pub struct ArticleHistoryEntry {
     pub status: String,
     pub scraped_ok: bool,
     pub job_id: Uuid,
+    pub published_date: Option<String>,
 }
 
 /// Row returned from article_history queries.
@@ -106,11 +107,12 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
     let statuses: Vec<&str> = entries.iter().map(|e| e.status.as_str()).collect();
     let scraped_oks: Vec<bool> = entries.iter().map(|e| e.scraped_ok).collect();
     let job_ids: Vec<Uuid> = entries.iter().map(|e| e.job_id).collect();
+    let published_dates: Vec<Option<&str>> = entries.iter().map(|e| e.published_date.as_deref()).collect();
 
     sqlx::query(
         r#"
-        INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id)
-        SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[])
+        INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date)
+        SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[], $12::text[])
         "#,
     )
     .bind(&user_ids)
@@ -124,6 +126,7 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
     .bind(&statuses)
     .bind(&scraped_oks)
     .bind(&job_ids)
+    .bind(&published_dates)
     .execute(pool)
     .await?;
 
@@ -134,8 +137,8 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
 pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> {
     sqlx::query(
         r#"
-        INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id)
-        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
+        INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date)
+        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
         "#,
     )
     .bind(entry.user_id)
@@ -149,6 +152,7 @@ pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<
     .bind(&entry.status)
     .bind(entry.scraped_ok)
     .bind(entry.job_id)
+    .bind(&entry.published_date)
     .execute(pool)
     .await?;
     Ok(())
diff --git a/backend/src/models/synthesis.rs b/backend/src/models/synthesis.rs
index 8c21261..5a60de7 100644
--- a/backend/src/models/synthesis.rs
+++ b/backend/src/models/synthesis.rs
@@ -14,6 +14,8 @@ pub struct NewsItem {
     pub title: String,
     pub url: String,
     pub summary: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub date: Option<String>,
 }
 
 /// A named section containing a list of news items.
@@ -201,6 +203,7 @@ mod tests {
             title: "Test Article".into(),
             url: "https://example.com/article".into(),
             summary: "A brief summary of the article content.".into(),
+            date: None,
         };
 
         let json = serde_json::to_value(&item).unwrap();
@@ -220,11 +223,13 @@ mod tests {
                     title: "Article 1".into(),
                     url: "https://example.com/1".into(),
                     summary: "Summary 1".into(),
+                    date: None,
                 },
                 NewsItem {
                     title: "Article 2".into(),
                     url: "https://example.com/2".into(),
                     summary: "Summary 2".into(),
+                    date: None,
                 },
             ],
         };
diff --git a/backend/src/services/email.rs b/backend/src/services/email.rs
index f4e3f15..ae84aa9 100644
--- a/backend/src/services/email.rs
+++ b/backend/src/services/email.rs
@@ -278,11 +278,13 @@ mod tests {
                         title: "OpenAI lance GPT-5".into(),
                         url: "https://openai.com/gpt5".into(),
                         summary: "OpenAI a annonce GPT-5.".into(),
+                        date: None,
                     },
                     NewsItem {
                         title: "Google DeepMind Gemini 3".into(),
                         url: "https://deepmind.google/gemini3".into(),
                         summary: "DeepMind presente Gemini 3.".into(),
+                        date: None,
                     },
                 ],
             },
@@ -292,6 +294,7 @@ mod tests {
                     title: "Nouveau papier RLHF".into(),
                     url: "https://arxiv.org/abs/2026.12345".into(),
                     summary: "Approche RLHF prometteuse.".into(),
+                    date: None,
                 }],
             },
         ]
@@ -337,6 +340,7 @@ mod tests {
                 title: "Title with \"quotes\" & <angle>".into(),
                 url: "https://example.com/test?a=1&b=2".into(),
                 summary: "Summary with <b>bold</b> attempt.".into(),
+                date: None,
             }],
         }];
 
diff --git a/backend/src/services/export.rs b/backend/src/services/export.rs
index 9f1b4c8..9c030c2 100644
--- a/backend/src/services/export.rs
+++ b/backend/src/services/export.rs
@@ -335,12 +335,14 @@ mod tests {
                         summary:
                             "OpenAI a annonce la sortie de GPT-5 avec des capacites ameliorees."
                                 .into(),
+                        date: None,
                     },
                     NewsItem {
                         title: "Google DeepMind publie Gemini 3".into(),
                         url: "https://deepmind.google/gemini3".into(),
                         summary:
                             "DeepMind presente Gemini 3, son nouveau modele multimodal.".into(),
+                        date: None,
                     },
                 ],
             },
@@ -350,6 +352,7 @@ mod tests {
                     title: "Nouveau papier sur le RLHF".into(),
                     url: "https://arxiv.org/abs/2026.12345".into(),
                     summary: "Une nouvelle approche du RLHF prometteuse.".into(),
+                    date: None,
                 }],
             },
         ]
diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs
index 49b5c5c..934e3ba 100644
--- a/backend/src/services/synthesis.rs
+++ b/backend/src/services/synthesis.rs
@@ -386,6 +386,7 @@ pub async fn run_generation_inner(
                             url, title: "", source_type: "personalized_source",
                             source_url: Some(source_url), category: None, synthesis_id: None,
                             status: "filtered_history", scraped_ok: false,
+                            published_date: None,
                         }));
                     }
                 }
@@ -429,6 +430,7 @@ pub async fn run_generation_inner(
                         url: &url, title: "", source_type: "personalized_source",
                         source_url: Some(&source_url), category: None, synthesis_id: None,
                         status: "filtered_diversity", scraped_ok: false,
+                        published_date: None,
                     }));
                     continue;
                 }
@@ -463,6 +465,7 @@ pub async fn run_generation_inner(
                             url: &final_url, title: &page_title, source_type: "personalized_source",
                             source_url: Some(&source_url), category: None, synthesis_id: None,
                             status: reason, scraped_ok: false,
+                            published_date: None,
                         }));
                     } else {
                         scraped_articles.push((final_url, source_url, body_text, page_title));
@@ -534,6 +537,7 @@ pub async fn run_generation_inner(
                                         url: &final_url, title: &page_title, source_type: "personalized_source",
                                         source_url: Some(&source_url), category: None, synthesis_id: None,
                                         status: "filtered_too_old", scraped_ok: true,
+                                        published_date: Some(date_str),
                                     }));
                                     continue;
                                 }
@@ -548,10 +552,12 @@ pub async fn run_generation_inner(
                         continue;
                     };
 
+                    let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string());
                     article_scraped.entry(final_cat_key).or_default().push(NewsItem {
                         title: llm_title,
                         url: final_url.clone(),
                         summary: llm_summary,
+                        date: llm_date,
                     });
                     *filled_counts.entry(final_cat_name).or_insert(0) += 1;
 
@@ -607,6 +613,7 @@ pub async fn run_generation_inner(
                         url: &result.url, title: &result.title, source_type: "brave_search",
                         source_url: None, category: None, synthesis_id: None,
                         status: reason, scraped_ok: false,
+                        published_date: None,
                     }));
                     continue;
                 }
@@ -663,6 +670,7 @@ pub async fn run_generation_inner(
                                     url: &final_url, title: &page_title, source_type: "brave_search",
                                     source_url: None, category: None, synthesis_id: None,
                                     status: reason, scraped_ok: false,
+                                    published_date: None,
                                 }));
                             } else {
                                 scraped_articles.push((final_url, body_text, page_title));
@@ -732,6 +740,7 @@ pub async fn run_generation_inner(
                                                 url: &final_url, title: &page_title, source_type: "brave_search",
                                                 source_url: None, category: None, synthesis_id: None,
                                                 status: "filtered_too_old", scraped_ok: true,
+                                                published_date: Some(date_str),
                                             }));
                                             continue;
                                         }
@@ -746,10 +755,12 @@ pub async fn run_generation_inner(
                                 continue;
                             };
 
+                            let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string());
                             article_scraped.entry(final_cat_key).or_default().push(NewsItem {
                                 title: llm_title,
                                 url: final_url.clone(),
                                 summary: llm_summary,
+                                date: llm_date,
                             });
                             *filled_counts.entry(final_cat_name).or_insert(0) += 1;
 
@@ -803,6 +814,7 @@ pub async fn run_generation_inner(
                             url: &item.url, title: &item.title, source_type: "web_search",
                             source_url: None, category: None, synthesis_id: None,
                             status: reason, scraped_ok: false,
+                            published_date: None,
                         }));
                         continue;
                     }
@@ -828,6 +840,7 @@ pub async fn run_generation_inner(
                         url: &final_url, title: &item.title, source_type: "web_search",
                         source_url: None, category: None, synthesis_id: None,
                         status: reason, scraped_ok: false,
+                        published_date: None,
                     }));
                     continue;
                 }
@@ -836,6 +849,7 @@ pub async fn run_generation_inner(
                     title: item.title,
                     url: final_url,
                     summary: item.summary,
+                    date: None,
                 });
 
                 if let Some(domain) = extract_domain(&item.url) {
@@ -891,6 +905,7 @@ pub async fn run_generation_inner(
                     source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None },
                     category: Some(&section.title), synthesis_id: Some(synthesis.id),
                     status: "used", scraped_ok: true,
+                    published_date: item.date.as_deref(),
                 }));
             }
         }
@@ -948,6 +963,7 @@ struct ArticleTrace<'a> {
     synthesis_id: Option<Uuid>,
     status: &'a str,
     scraped_ok: bool,
+    published_date: Option<&'a str>,
 }
 
 /// Build an article history entry from trace parameters (no DB call).
@@ -968,6 +984,7 @@ fn build_trace_entry(
         status: trace.status.to_string(),
         scraped_ok: trace.scraped_ok,
         job_id,
+        published_date: trace.published_date.map(|s| s.to_string()),
     }
 }
 
diff --git a/frontend/src/pages/SynthesisDetail.tsx b/frontend/src/pages/SynthesisDetail.tsx
index 231e73b..357b16b 100644
--- a/frontend/src/pages/SynthesisDetail.tsx
+++ b/frontend/src/pages/SynthesisDetail.tsx
@@ -32,6 +32,9 @@ const NewsItemCard: Component<{ item: NewsItemType }> = (props) => {
           <ExternalLink class="h-4 w-4 text-gray-400 flex-shrink-0" />
         </a>
       </h3>
+      <Show when={props.item.date}>
+        <p class="text-xs text-gray-400 mb-1">{props.item.date}</p>
+      </Show>
       <p class="text-gray-700 leading-relaxed text-sm">
         {props.item.summary}
       </p>
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 659cbba..d9be0e7 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -114,6 +114,7 @@ export interface NewsItem {
   title: string;
   url: string;
   summary: string;
+  date?: string | null;
 }
 
 export interface NewsSection {