feat: save publication date in article history and show in synthesis

- Add published_date column to article_history table - Add date field to NewsItem (serialized in synthesis JSONB) - Pass LLM-extracted date through ArticleTrace to article history - Display date below article title in SynthesisDetail page Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
3 months ago · c5a56c8fb8
parent de25a08d51
commit c5a56c8fb8
9 changed files with 44 additions and 5 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit
 - `GET /api/v1/admin/users` — user list
 - `PUT /api/v1/admin/users/:id/role` — role management
-## Database (23 migrations)
+## Database (24 migrations)
 Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log`
 ## Environment Variables
--- a/backend/migrations/20260326000024_add_published_date_to_article_history.sql
+++ b/backend/migrations/20260326000024_add_published_date_to_article_history.sql
@ -0,0 +1,2 @@
 -- Add published_date to article_history for LLM-extracted dates
 ALTER TABLE article_history ADD COLUMN published_date TEXT;
--- a/backend/src/db/article_history.rs
+++ b/backend/src/db/article_history.rs
@ -22,6 +22,7 @@ pub struct ArticleHistoryEntry {
    pub status: String,
    pub scraped_ok: bool,
    pub job_id: Uuid,
    pub published_date: Option<String>,
 }
 /// Row returned from article_history queries.
@ -106,11 +107,12 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
    let statuses: Vec<&str> = entries.iter().map(|e| e.status.as_str()).collect();
    let scraped_oks: Vec<bool> = entries.iter().map(|e| e.scraped_ok).collect();
    let job_ids: Vec<Uuid> = entries.iter().map(|e| e.job_id).collect();
    let published_dates: Vec<Option<&str>> = entries.iter().map(|e| e.published_date.as_deref()).collect();
    sqlx::query(
        r#"
-        INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id)
+        INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date)
-        SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[])
+        SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[], $12::text[])
        "#,
    )
    .bind(&user_ids)
@ -124,6 +126,7 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
    .bind(&statuses)
    .bind(&scraped_oks)
    .bind(&job_ids)
    .bind(&published_dates)
    .execute(pool)
    .await?;
@ -134,8 +137,8 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
 pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> {
    sqlx::query(
        r#"
-        INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id)
+        INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date)
-        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
+        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
        "#,
    )
    .bind(entry.user_id)
@ -149,6 +152,7 @@ pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<
    .bind(&entry.status)
    .bind(entry.scraped_ok)
    .bind(entry.job_id)
    .bind(&entry.published_date)
    .execute(pool)
    .await?;
    Ok(())
--- a/backend/src/models/synthesis.rs
+++ b/backend/src/models/synthesis.rs
@ -14,6 +14,8 @@ pub struct NewsItem {
    pub title: String,
    pub url: String,
    pub summary: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub date: Option<String>,
 }
 /// A named section containing a list of news items.
@ -201,6 +203,7 @@ mod tests {
            title: "Test Article".into(),
            url: "https://example.com/article".into(),
            summary: "A brief summary of the article content.".into(),
            date: None,
        };
        let json = serde_json::to_value(&item).unwrap();
@ -220,11 +223,13 @@ mod tests {
                    title: "Article 1".into(),
                    url: "https://example.com/1".into(),
                    summary: "Summary 1".into(),
                    date: None,
                },
                NewsItem {
                    title: "Article 2".into(),
                    url: "https://example.com/2".into(),
                    summary: "Summary 2".into(),
                    date: None,
                },
            ],
        };
--- a/backend/src/services/email.rs
+++ b/backend/src/services/email.rs
@ -278,11 +278,13 @@ mod tests {
                        title: "OpenAI lance GPT-5".into(),
                        url: "https://openai.com/gpt5".into(),
                        summary: "OpenAI a annonce GPT-5.".into(),
                        date: None,
                    },
                    NewsItem {
                        title: "Google DeepMind Gemini 3".into(),
                        url: "https://deepmind.google/gemini3".into(),
                        summary: "DeepMind presente Gemini 3.".into(),
                        date: None,
                    },
                ],
            },
@ -292,6 +294,7 @@ mod tests {
                    title: "Nouveau papier RLHF".into(),
                    url: "https://arxiv.org/abs/2026.12345".into(),
                    summary: "Approche RLHF prometteuse.".into(),
                    date: None,
                }],
            },
        ]
@ -337,6 +340,7 @@ mod tests {
                title: "Title with \"quotes\" & <angle>".into(),
                url: "https://example.com/test?a=1&b=2".into(),
                summary: "Summary with <b>bold</b> attempt.".into(),
                date: None,
            }],
        }];
--- a/backend/src/services/export.rs
+++ b/backend/src/services/export.rs
@ -335,12 +335,14 @@ mod tests {
                        summary:
                            "OpenAI a annonce la sortie de GPT-5 avec des capacites ameliorees."
                                .into(),
                        date: None,
                    },
                    NewsItem {
                        title: "Google DeepMind publie Gemini 3".into(),
                        url: "https://deepmind.google/gemini3".into(),
                        summary:
                            "DeepMind presente Gemini 3, son nouveau modele multimodal.".into(),
                        date: None,
                    },
                ],
            },
@ -350,6 +352,7 @@ mod tests {
                    title: "Nouveau papier sur le RLHF".into(),
                    url: "https://arxiv.org/abs/2026.12345".into(),
                    summary: "Une nouvelle approche du RLHF prometteuse.".into(),
                    date: None,
                }],
            },
        ]
--- a/backend/src/services/synthesis.rs
+++ b/backend/src/services/synthesis.rs
@ -386,6 +386,7 @@ pub async fn run_generation_inner(
                            url, title: "", source_type: "personalized_source",
                            source_url: Some(source_url), category: None, synthesis_id: None,
                            status: "filtered_history", scraped_ok: false,
                            published_date: None,
                        }));
                    }
                }
@ -429,6 +430,7 @@ pub async fn run_generation_inner(
                        url: &url, title: "", source_type: "personalized_source",
                        source_url: Some(&source_url), category: None, synthesis_id: None,
                        status: "filtered_diversity", scraped_ok: false,
                        published_date: None,
                    }));
                    continue;
                }
@ -463,6 +465,7 @@ pub async fn run_generation_inner(
                            url: &final_url, title: &page_title, source_type: "personalized_source",
                            source_url: Some(&source_url), category: None, synthesis_id: None,
                            status: reason, scraped_ok: false,
                            published_date: None,
                        }));
                    } else {
                        scraped_articles.push((final_url, source_url, body_text, page_title));
@ -534,6 +537,7 @@ pub async fn run_generation_inner(
                                        url: &final_url, title: &page_title, source_type: "personalized_source",
                                        source_url: Some(&source_url), category: None, synthesis_id: None,
                                        status: "filtered_too_old", scraped_ok: true,
                                        published_date: Some(date_str),
                                    }));
                                    continue;
                                }
@ -548,10 +552,12 @@ pub async fn run_generation_inner(
                        continue;
                    };
                    let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string());
                    article_scraped.entry(final_cat_key).or_default().push(NewsItem {
                        title: llm_title,
                        url: final_url.clone(),
                        summary: llm_summary,
                        date: llm_date,
                    });
                    *filled_counts.entry(final_cat_name).or_insert(0) += 1;
@ -607,6 +613,7 @@ pub async fn run_generation_inner(
                        url: &result.url, title: &result.title, source_type: "brave_search",
                        source_url: None, category: None, synthesis_id: None,
                        status: reason, scraped_ok: false,
                        published_date: None,
                    }));
                    continue;
                }
@ -663,6 +670,7 @@ pub async fn run_generation_inner(
                                    url: &final_url, title: &page_title, source_type: "brave_search",
                                    source_url: None, category: None, synthesis_id: None,
                                    status: reason, scraped_ok: false,
                                    published_date: None,
                                }));
                            } else {
                                scraped_articles.push((final_url, body_text, page_title));
@ -732,6 +740,7 @@ pub async fn run_generation_inner(
                                                url: &final_url, title: &page_title, source_type: "brave_search",
                                                source_url: None, category: None, synthesis_id: None,
                                                status: "filtered_too_old", scraped_ok: true,
                                                published_date: Some(date_str),
                                            }));
                                            continue;
                                        }
@ -746,10 +755,12 @@ pub async fn run_generation_inner(
                                continue;
                            };
                            let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string());
                            article_scraped.entry(final_cat_key).or_default().push(NewsItem {
                                title: llm_title,
                                url: final_url.clone(),
                                summary: llm_summary,
                                date: llm_date,
                            });
                            *filled_counts.entry(final_cat_name).or_insert(0) += 1;
@ -803,6 +814,7 @@ pub async fn run_generation_inner(
                            url: &item.url, title: &item.title, source_type: "web_search",
                            source_url: None, category: None, synthesis_id: None,
                            status: reason, scraped_ok: false,
                            published_date: None,
                        }));
                        continue;
                    }
@ -828,6 +840,7 @@ pub async fn run_generation_inner(
                        url: &final_url, title: &item.title, source_type: "web_search",
                        source_url: None, category: None, synthesis_id: None,
                        status: reason, scraped_ok: false,
                        published_date: None,
                    }));
                    continue;
                }
@ -836,6 +849,7 @@ pub async fn run_generation_inner(
                    title: item.title,
                    url: final_url,
                    summary: item.summary,
                    date: None,
                });
                if let Some(domain) = extract_domain(&item.url) {
@ -891,6 +905,7 @@ pub async fn run_generation_inner(
                    source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None },
                    category: Some(&section.title), synthesis_id: Some(synthesis.id),
                    status: "used", scraped_ok: true,
                    published_date: item.date.as_deref(),
                }));
            }
        }
@ -948,6 +963,7 @@ struct ArticleTrace<'a> {
    synthesis_id: Option<Uuid>,
    status: &'a str,
    scraped_ok: bool,
    published_date: Option<&'a str>,
 }
 /// Build an article history entry from trace parameters (no DB call).
@ -968,6 +984,7 @@ fn build_trace_entry(
        status: trace.status.to_string(),
        scraped_ok: trace.scraped_ok,
        job_id,
        published_date: trace.published_date.map(|s| s.to_string()),
    }
 }
--- a/frontend/src/pages/SynthesisDetail.tsx
+++ b/frontend/src/pages/SynthesisDetail.tsx
@ -32,6 +32,9 @@ const NewsItemCard: Component<{ item: NewsItemType }> = (props) => {
          <ExternalLink class="h-4 w-4 text-gray-400 flex-shrink-0" />
        </a>
      </h3>
      <Show when={props.item.date}>
        <p class="text-xs text-gray-400 mb-1">{props.item.date}</p>
      </Show>
      <p class="text-gray-700 leading-relaxed text-sm">
        {props.item.summary}
      </p>
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@ -114,6 +114,7 @@ export interface NewsItem {
  title: string;
  url: string;
  summary: string;
  date?: string | null;
 }
 export interface NewsSection {
		`@ -0,0 +1,2 @@`
							`-- Add published_date to article_history for LLM-extracted dates`
							`ALTER TABLE article_history ADD COLUMN published_date TEXT;`