feat: save publication date in article history and show in synthesis

- Add published_date column to article_history table
- Add date field to NewsItem (serialized in synthesis JSONB)
- Pass LLM-extracted date through ArticleTrace to article history
- Display date below article title in SynthesisDetail page

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent de25a08d51
commit c5a56c8fb8

@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit
- `GET /api/v1/admin/users` — user list - `GET /api/v1/admin/users` — user list
- `PUT /api/v1/admin/users/:id/role` — role management - `PUT /api/v1/admin/users/:id/role` — role management
## Database (23 migrations) ## Database (24 migrations)
Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log` Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log`
## Environment Variables ## Environment Variables

@ -0,0 +1,2 @@
-- Add published_date to article_history for LLM-extracted dates
ALTER TABLE article_history ADD COLUMN published_date TEXT;

@ -22,6 +22,7 @@ pub struct ArticleHistoryEntry {
pub status: String, pub status: String,
pub scraped_ok: bool, pub scraped_ok: bool,
pub job_id: Uuid, pub job_id: Uuid,
pub published_date: Option<String>,
} }
/// Row returned from article_history queries. /// Row returned from article_history queries.
@ -106,11 +107,12 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
let statuses: Vec<&str> = entries.iter().map(|e| e.status.as_str()).collect(); let statuses: Vec<&str> = entries.iter().map(|e| e.status.as_str()).collect();
let scraped_oks: Vec<bool> = entries.iter().map(|e| e.scraped_ok).collect(); let scraped_oks: Vec<bool> = entries.iter().map(|e| e.scraped_ok).collect();
let job_ids: Vec<Uuid> = entries.iter().map(|e| e.job_id).collect(); let job_ids: Vec<Uuid> = entries.iter().map(|e| e.job_id).collect();
let published_dates: Vec<Option<&str>> = entries.iter().map(|e| e.published_date.as_deref()).collect();
sqlx::query( sqlx::query(
r#" r#"
INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id) INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date)
SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[]) SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[], $12::text[])
"#, "#,
) )
.bind(&user_ids) .bind(&user_ids)
@ -124,6 +126,7 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
.bind(&statuses) .bind(&statuses)
.bind(&scraped_oks) .bind(&scraped_oks)
.bind(&job_ids) .bind(&job_ids)
.bind(&published_dates)
.execute(pool) .execute(pool)
.await?; .await?;
@ -134,8 +137,8 @@ pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]
pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> { pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> {
sqlx::query( sqlx::query(
r#" r#"
INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id) INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, published_date)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
"#, "#,
) )
.bind(entry.user_id) .bind(entry.user_id)
@ -149,6 +152,7 @@ pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<
.bind(&entry.status) .bind(&entry.status)
.bind(entry.scraped_ok) .bind(entry.scraped_ok)
.bind(entry.job_id) .bind(entry.job_id)
.bind(&entry.published_date)
.execute(pool) .execute(pool)
.await?; .await?;
Ok(()) Ok(())

@ -14,6 +14,8 @@ pub struct NewsItem {
pub title: String, pub title: String,
pub url: String, pub url: String,
pub summary: String, pub summary: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub date: Option<String>,
} }
/// A named section containing a list of news items. /// A named section containing a list of news items.
@ -201,6 +203,7 @@ mod tests {
title: "Test Article".into(), title: "Test Article".into(),
url: "https://example.com/article".into(), url: "https://example.com/article".into(),
summary: "A brief summary of the article content.".into(), summary: "A brief summary of the article content.".into(),
date: None,
}; };
let json = serde_json::to_value(&item).unwrap(); let json = serde_json::to_value(&item).unwrap();
@ -220,11 +223,13 @@ mod tests {
title: "Article 1".into(), title: "Article 1".into(),
url: "https://example.com/1".into(), url: "https://example.com/1".into(),
summary: "Summary 1".into(), summary: "Summary 1".into(),
date: None,
}, },
NewsItem { NewsItem {
title: "Article 2".into(), title: "Article 2".into(),
url: "https://example.com/2".into(), url: "https://example.com/2".into(),
summary: "Summary 2".into(), summary: "Summary 2".into(),
date: None,
}, },
], ],
}; };

@ -278,11 +278,13 @@ mod tests {
title: "OpenAI lance GPT-5".into(), title: "OpenAI lance GPT-5".into(),
url: "https://openai.com/gpt5".into(), url: "https://openai.com/gpt5".into(),
summary: "OpenAI a annonce GPT-5.".into(), summary: "OpenAI a annonce GPT-5.".into(),
date: None,
}, },
NewsItem { NewsItem {
title: "Google DeepMind Gemini 3".into(), title: "Google DeepMind Gemini 3".into(),
url: "https://deepmind.google/gemini3".into(), url: "https://deepmind.google/gemini3".into(),
summary: "DeepMind presente Gemini 3.".into(), summary: "DeepMind presente Gemini 3.".into(),
date: None,
}, },
], ],
}, },
@ -292,6 +294,7 @@ mod tests {
title: "Nouveau papier RLHF".into(), title: "Nouveau papier RLHF".into(),
url: "https://arxiv.org/abs/2026.12345".into(), url: "https://arxiv.org/abs/2026.12345".into(),
summary: "Approche RLHF prometteuse.".into(), summary: "Approche RLHF prometteuse.".into(),
date: None,
}], }],
}, },
] ]
@ -337,6 +340,7 @@ mod tests {
title: "Title with \"quotes\" & <angle>".into(), title: "Title with \"quotes\" & <angle>".into(),
url: "https://example.com/test?a=1&b=2".into(), url: "https://example.com/test?a=1&b=2".into(),
summary: "Summary with <b>bold</b> attempt.".into(), summary: "Summary with <b>bold</b> attempt.".into(),
date: None,
}], }],
}]; }];

@ -335,12 +335,14 @@ mod tests {
summary: summary:
"OpenAI a annonce la sortie de GPT-5 avec des capacites ameliorees." "OpenAI a annonce la sortie de GPT-5 avec des capacites ameliorees."
.into(), .into(),
date: None,
}, },
NewsItem { NewsItem {
title: "Google DeepMind publie Gemini 3".into(), title: "Google DeepMind publie Gemini 3".into(),
url: "https://deepmind.google/gemini3".into(), url: "https://deepmind.google/gemini3".into(),
summary: summary:
"DeepMind presente Gemini 3, son nouveau modele multimodal.".into(), "DeepMind presente Gemini 3, son nouveau modele multimodal.".into(),
date: None,
}, },
], ],
}, },
@ -350,6 +352,7 @@ mod tests {
title: "Nouveau papier sur le RLHF".into(), title: "Nouveau papier sur le RLHF".into(),
url: "https://arxiv.org/abs/2026.12345".into(), url: "https://arxiv.org/abs/2026.12345".into(),
summary: "Une nouvelle approche du RLHF prometteuse.".into(), summary: "Une nouvelle approche du RLHF prometteuse.".into(),
date: None,
}], }],
}, },
] ]

@ -386,6 +386,7 @@ pub async fn run_generation_inner(
url, title: "", source_type: "personalized_source", url, title: "", source_type: "personalized_source",
source_url: Some(source_url), category: None, synthesis_id: None, source_url: Some(source_url), category: None, synthesis_id: None,
status: "filtered_history", scraped_ok: false, status: "filtered_history", scraped_ok: false,
published_date: None,
})); }));
} }
} }
@ -429,6 +430,7 @@ pub async fn run_generation_inner(
url: &url, title: "", source_type: "personalized_source", url: &url, title: "", source_type: "personalized_source",
source_url: Some(&source_url), category: None, synthesis_id: None, source_url: Some(&source_url), category: None, synthesis_id: None,
status: "filtered_diversity", scraped_ok: false, status: "filtered_diversity", scraped_ok: false,
published_date: None,
})); }));
continue; continue;
} }
@ -463,6 +465,7 @@ pub async fn run_generation_inner(
url: &final_url, title: &page_title, source_type: "personalized_source", url: &final_url, title: &page_title, source_type: "personalized_source",
source_url: Some(&source_url), category: None, synthesis_id: None, source_url: Some(&source_url), category: None, synthesis_id: None,
status: reason, scraped_ok: false, status: reason, scraped_ok: false,
published_date: None,
})); }));
} else { } else {
scraped_articles.push((final_url, source_url, body_text, page_title)); scraped_articles.push((final_url, source_url, body_text, page_title));
@ -534,6 +537,7 @@ pub async fn run_generation_inner(
url: &final_url, title: &page_title, source_type: "personalized_source", url: &final_url, title: &page_title, source_type: "personalized_source",
source_url: Some(&source_url), category: None, synthesis_id: None, source_url: Some(&source_url), category: None, synthesis_id: None,
status: "filtered_too_old", scraped_ok: true, status: "filtered_too_old", scraped_ok: true,
published_date: Some(date_str),
})); }));
continue; continue;
} }
@ -548,10 +552,12 @@ pub async fn run_generation_inner(
continue; continue;
}; };
let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string());
article_scraped.entry(final_cat_key).or_default().push(NewsItem { article_scraped.entry(final_cat_key).or_default().push(NewsItem {
title: llm_title, title: llm_title,
url: final_url.clone(), url: final_url.clone(),
summary: llm_summary, summary: llm_summary,
date: llm_date,
}); });
*filled_counts.entry(final_cat_name).or_insert(0) += 1; *filled_counts.entry(final_cat_name).or_insert(0) += 1;
@ -607,6 +613,7 @@ pub async fn run_generation_inner(
url: &result.url, title: &result.title, source_type: "brave_search", url: &result.url, title: &result.title, source_type: "brave_search",
source_url: None, category: None, synthesis_id: None, source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false, status: reason, scraped_ok: false,
published_date: None,
})); }));
continue; continue;
} }
@ -663,6 +670,7 @@ pub async fn run_generation_inner(
url: &final_url, title: &page_title, source_type: "brave_search", url: &final_url, title: &page_title, source_type: "brave_search",
source_url: None, category: None, synthesis_id: None, source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false, status: reason, scraped_ok: false,
published_date: None,
})); }));
} else { } else {
scraped_articles.push((final_url, body_text, page_title)); scraped_articles.push((final_url, body_text, page_title));
@ -732,6 +740,7 @@ pub async fn run_generation_inner(
url: &final_url, title: &page_title, source_type: "brave_search", url: &final_url, title: &page_title, source_type: "brave_search",
source_url: None, category: None, synthesis_id: None, source_url: None, category: None, synthesis_id: None,
status: "filtered_too_old", scraped_ok: true, status: "filtered_too_old", scraped_ok: true,
published_date: Some(date_str),
})); }));
continue; continue;
} }
@ -746,10 +755,12 @@ pub async fn run_generation_inner(
continue; continue;
}; };
let llm_date = class_response.get("date").and_then(|d| d.as_str()).filter(|s| !s.is_empty()).map(|s| s.to_string());
article_scraped.entry(final_cat_key).or_default().push(NewsItem { article_scraped.entry(final_cat_key).or_default().push(NewsItem {
title: llm_title, title: llm_title,
url: final_url.clone(), url: final_url.clone(),
summary: llm_summary, summary: llm_summary,
date: llm_date,
}); });
*filled_counts.entry(final_cat_name).or_insert(0) += 1; *filled_counts.entry(final_cat_name).or_insert(0) += 1;
@ -803,6 +814,7 @@ pub async fn run_generation_inner(
url: &item.url, title: &item.title, source_type: "web_search", url: &item.url, title: &item.title, source_type: "web_search",
source_url: None, category: None, synthesis_id: None, source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false, status: reason, scraped_ok: false,
published_date: None,
})); }));
continue; continue;
} }
@ -828,6 +840,7 @@ pub async fn run_generation_inner(
url: &final_url, title: &item.title, source_type: "web_search", url: &final_url, title: &item.title, source_type: "web_search",
source_url: None, category: None, synthesis_id: None, source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false, status: reason, scraped_ok: false,
published_date: None,
})); }));
continue; continue;
} }
@ -836,6 +849,7 @@ pub async fn run_generation_inner(
title: item.title, title: item.title,
url: final_url, url: final_url,
summary: item.summary, summary: item.summary,
date: None,
}); });
if let Some(domain) = extract_domain(&item.url) { if let Some(domain) = extract_domain(&item.url) {
@ -891,6 +905,7 @@ pub async fn run_generation_inner(
source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None }, source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None },
category: Some(&section.title), synthesis_id: Some(synthesis.id), category: Some(&section.title), synthesis_id: Some(synthesis.id),
status: "used", scraped_ok: true, status: "used", scraped_ok: true,
published_date: item.date.as_deref(),
})); }));
} }
} }
@ -948,6 +963,7 @@ struct ArticleTrace<'a> {
synthesis_id: Option<Uuid>, synthesis_id: Option<Uuid>,
status: &'a str, status: &'a str,
scraped_ok: bool, scraped_ok: bool,
published_date: Option<&'a str>,
} }
/// Build an article history entry from trace parameters (no DB call). /// Build an article history entry from trace parameters (no DB call).
@ -968,6 +984,7 @@ fn build_trace_entry(
status: trace.status.to_string(), status: trace.status.to_string(),
scraped_ok: trace.scraped_ok, scraped_ok: trace.scraped_ok,
job_id, job_id,
published_date: trace.published_date.map(|s| s.to_string()),
} }
} }

@ -32,6 +32,9 @@ const NewsItemCard: Component<{ item: NewsItemType }> = (props) => {
<ExternalLink class="h-4 w-4 text-gray-400 flex-shrink-0" /> <ExternalLink class="h-4 w-4 text-gray-400 flex-shrink-0" />
</a> </a>
</h3> </h3>
<Show when={props.item.date}>
<p class="text-xs text-gray-400 mb-1">{props.item.date}</p>
</Show>
<p class="text-gray-700 leading-relaxed text-sm"> <p class="text-gray-700 leading-relaxed text-sm">
{props.item.summary} {props.item.summary}
</p> </p>

@ -114,6 +114,7 @@ export interface NewsItem {
title: string; title: string;
url: string; url: string;
summary: string; summary: string;
date?: string | null;
} }
export interface NewsSection { export interface NewsSection {

Loading…
Cancel
Save