feat: article history entry struct + insert/query/cleanup functions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
master
oabrivard 3 months ago
parent d7afd08eaf
commit eba721266f

@ -3,10 +3,43 @@
//! Prevents the same article from appearing in multiple syntheses.
use std::collections::HashSet;
use chrono::{DateTime, Utc};
use serde::Serialize;
use sqlx::PgPool;
use uuid::Uuid;
use crate::errors::AppError;
/// Entry for inserting into article_history with full tracing metadata.
pub struct ArticleHistoryEntry {
pub user_id: Uuid,
pub url: String,
pub url_hash: String,
pub title: String,
pub source_type: String,
pub source_url: Option<String>,
pub category: Option<String>,
pub synthesis_id: Option<Uuid>,
pub status: String,
pub scraped_ok: bool,
pub job_id: Uuid,
}
/// Row returned from article_history queries.
#[derive(Debug, Clone, Serialize, sqlx::FromRow)]
pub struct ArticleHistoryRow {
pub id: Uuid,
pub url: String,
pub title: String,
pub source_type: String,
pub source_url: Option<String>,
pub category: Option<String>,
pub synthesis_id: Option<Uuid>,
pub status: String,
pub scraped_ok: bool,
pub job_id: Uuid,
pub created_at: DateTime<Utc>,
}
/// Check which URL hashes already exist in history for this user.
///
/// Returns the set of url_hashes that were found (i.e., already used).
@ -56,8 +89,109 @@ pub async fn insert_urls(
Ok(())
}
/// Insert a single article history entry with full tracing metadata.
pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> {
sqlx::query(
r#"
INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
"#,
)
.bind(entry.user_id)
.bind(&entry.url_hash)
.bind(&entry.url)
.bind(&entry.title)
.bind(&entry.source_type)
.bind(&entry.source_url)
.bind(&entry.category)
.bind(entry.synthesis_id)
.bind(&entry.status)
.bind(entry.scraped_ok)
.bind(entry.job_id)
.execute(pool)
.await?;
Ok(())
}
/// List article history with optional filters, paginated.
pub async fn list_history(
pool: &PgPool,
user_id: Uuid,
limit: i64,
offset: i64,
status_filter: Option<&str>,
source_type_filter: Option<&str>,
) -> Result<Vec<ArticleHistoryRow>, AppError> {
let rows = sqlx::query_as::<_, ArticleHistoryRow>(
r#"
SELECT id, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, created_at
FROM article_history
WHERE user_id = $1
AND ($4::TEXT IS NULL OR status = $4)
AND ($5::TEXT IS NULL OR source_type = $5)
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
"#,
)
.bind(user_id)
.bind(limit)
.bind(offset)
.bind(status_filter)
.bind(source_type_filter)
.fetch_all(pool)
.await?;
Ok(rows)
}
/// Count article history entries with optional filters.
pub async fn count_history(
pool: &PgPool,
user_id: Uuid,
status_filter: Option<&str>,
source_type_filter: Option<&str>,
) -> Result<i64, AppError> {
let row = sqlx::query_scalar::<_, i64>(
r#"
SELECT COUNT(*) FROM article_history
WHERE user_id = $1
AND ($2::TEXT IS NULL OR status = $2)
AND ($3::TEXT IS NULL OR source_type = $3)
"#,
)
.bind(user_id)
.bind(status_filter)
.bind(source_type_filter)
.fetch_one(pool)
.await?;
Ok(row)
}
/// List all article history entries for a generation job.
pub async fn list_by_job_id(
pool: &PgPool,
user_id: Uuid,
job_id: Uuid,
) -> Result<Vec<ArticleHistoryRow>, AppError> {
let rows = sqlx::query_as::<_, ArticleHistoryRow>(
r#"
SELECT id, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, created_at
FROM article_history
WHERE user_id = $1 AND job_id = $2
ORDER BY created_at ASC
"#,
)
.bind(user_id)
.bind(job_id)
.fetch_all(pool)
.await?;
Ok(rows)
}
/// Delete history entries older than N days for this user.
///
/// Only removes entries where synthesis_id IS NULL (dropped articles).
/// Used articles linked to syntheses stay until the synthesis is deleted.
///
/// Returns the number of deleted rows.
pub async fn cleanup_old(
pool: &PgPool,
@ -65,7 +199,7 @@ pub async fn cleanup_old(
days: i32,
) -> Result<u64, AppError> {
let result = sqlx::query(
"DELETE FROM article_history WHERE user_id = $1 AND created_at < now() - make_interval(days => $2)",
"DELETE FROM article_history WHERE user_id = $1 AND created_at < now() - make_interval(days => $2) AND synthesis_id IS NULL",
)
.bind(user_id)
.bind(days)

Loading…
Cancel
Save