From eba721266f2bf424d768e4549f68ff20436991c3 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Tue, 24 Mar 2026 18:52:26 +0100 Subject: [PATCH] feat: article history entry struct + insert/query/cleanup functions Co-Authored-By: Claude Sonnet 4.6 --- backend/src/db/article_history.rs | 136 +++++++++++++++++++++++++++++- 1 file changed, 135 insertions(+), 1 deletion(-) diff --git a/backend/src/db/article_history.rs b/backend/src/db/article_history.rs index be78650..7152109 100644 --- a/backend/src/db/article_history.rs +++ b/backend/src/db/article_history.rs @@ -3,10 +3,43 @@ //! Prevents the same article from appearing in multiple syntheses. use std::collections::HashSet; +use chrono::{DateTime, Utc}; +use serde::Serialize; use sqlx::PgPool; use uuid::Uuid; use crate::errors::AppError; +/// Entry for inserting into article_history with full tracing metadata. +pub struct ArticleHistoryEntry { + pub user_id: Uuid, + pub url: String, + pub url_hash: String, + pub title: String, + pub source_type: String, + pub source_url: Option, + pub category: Option, + pub synthesis_id: Option, + pub status: String, + pub scraped_ok: bool, + pub job_id: Uuid, +} + +/// Row returned from article_history queries. +#[derive(Debug, Clone, Serialize, sqlx::FromRow)] +pub struct ArticleHistoryRow { + pub id: Uuid, + pub url: String, + pub title: String, + pub source_type: String, + pub source_url: Option, + pub category: Option, + pub synthesis_id: Option, + pub status: String, + pub scraped_ok: bool, + pub job_id: Uuid, + pub created_at: DateTime, +} + /// Check which URL hashes already exist in history for this user. /// /// Returns the set of url_hashes that were found (i.e., already used). @@ -56,8 +89,109 @@ pub async fn insert_urls( Ok(()) } +/// Insert a single article history entry with full tracing metadata. +pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> { + sqlx::query( + r#" + INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + "#, + ) + .bind(entry.user_id) + .bind(&entry.url_hash) + .bind(&entry.url) + .bind(&entry.title) + .bind(&entry.source_type) + .bind(&entry.source_url) + .bind(&entry.category) + .bind(entry.synthesis_id) + .bind(&entry.status) + .bind(entry.scraped_ok) + .bind(entry.job_id) + .execute(pool) + .await?; + Ok(()) +} + +/// List article history with optional filters, paginated. +pub async fn list_history( + pool: &PgPool, + user_id: Uuid, + limit: i64, + offset: i64, + status_filter: Option<&str>, + source_type_filter: Option<&str>, +) -> Result, AppError> { + let rows = sqlx::query_as::<_, ArticleHistoryRow>( + r#" + SELECT id, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, created_at + FROM article_history + WHERE user_id = $1 + AND ($4::TEXT IS NULL OR status = $4) + AND ($5::TEXT IS NULL OR source_type = $5) + ORDER BY created_at DESC + LIMIT $2 OFFSET $3 + "#, + ) + .bind(user_id) + .bind(limit) + .bind(offset) + .bind(status_filter) + .bind(source_type_filter) + .fetch_all(pool) + .await?; + Ok(rows) +} + +/// Count article history entries with optional filters. +pub async fn count_history( + pool: &PgPool, + user_id: Uuid, + status_filter: Option<&str>, + source_type_filter: Option<&str>, +) -> Result { + let row = sqlx::query_scalar::<_, i64>( + r#" + SELECT COUNT(*) FROM article_history + WHERE user_id = $1 + AND ($2::TEXT IS NULL OR status = $2) + AND ($3::TEXT IS NULL OR source_type = $3) + "#, + ) + .bind(user_id) + .bind(status_filter) + .bind(source_type_filter) + .fetch_one(pool) + .await?; + Ok(row) +} + +/// List all article history entries for a generation job. +pub async fn list_by_job_id( + pool: &PgPool, + user_id: Uuid, + job_id: Uuid, +) -> Result, AppError> { + let rows = sqlx::query_as::<_, ArticleHistoryRow>( + r#" + SELECT id, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, created_at + FROM article_history + WHERE user_id = $1 AND job_id = $2 + ORDER BY created_at ASC + "#, + ) + .bind(user_id) + .bind(job_id) + .fetch_all(pool) + .await?; + Ok(rows) +} + /// Delete history entries older than N days for this user. /// +/// Only removes entries where synthesis_id IS NULL (dropped articles). +/// Used articles linked to syntheses stay until the synthesis is deleted. +/// /// Returns the number of deleted rows. pub async fn cleanup_old( pool: &PgPool, @@ -65,7 +199,7 @@ pub async fn cleanup_old( days: i32, ) -> Result { let result = sqlx::query( - "DELETE FROM article_history WHERE user_id = $1 AND created_at < now() - make_interval(days => $2)", + "DELETE FROM article_history WHERE user_id = $1 AND created_at < now() - make_interval(days => $2) AND synthesis_id IS NULL", ) .bind(user_id) .bind(days)