docs: add article tracing implementation plan (7 tasks)
parent
445dad9963
commit
5a0495b02a
@ -0,0 +1,620 @@
|
|||||||
|
# Article Tracing — Implementation Plan
|
||||||
|
|
||||||
|
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||||
|
|
||||||
|
**Goal:** Track the origin and status of every article candidate in the pipeline, with frontend viewers for debugging synthesis quality.
|
||||||
|
|
||||||
|
**Architecture:** Enrich `article_history` table with metadata (source_type, status, job_id). Insert dropped articles at each filtering step. Two new API endpoints. Two frontend views (global history + per-synthesis provenance).
|
||||||
|
|
||||||
|
**Tech Stack:** Rust (sqlx), SolidJS, PostgreSQL
|
||||||
|
|
||||||
|
**Spec:** `docs/superpowers/specs/2026-03-24-article-tracing-design.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: Migration — enrich article_history + syntheses.job_id
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `backend/migrations/20260324000016_enrich_article_history.sql`
|
||||||
|
- Modify: `CLAUDE.md`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Create migration**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Enrich article_history with tracing metadata
|
||||||
|
ALTER TABLE article_history ADD COLUMN title TEXT NOT NULL DEFAULT '';
|
||||||
|
ALTER TABLE article_history ADD COLUMN source_type TEXT NOT NULL DEFAULT 'unknown';
|
||||||
|
ALTER TABLE article_history ADD COLUMN source_url TEXT;
|
||||||
|
ALTER TABLE article_history ADD COLUMN category TEXT;
|
||||||
|
ALTER TABLE article_history ADD COLUMN synthesis_id UUID REFERENCES syntheses(id) ON DELETE SET NULL;
|
||||||
|
ALTER TABLE article_history ADD COLUMN status TEXT NOT NULL DEFAULT 'used';
|
||||||
|
ALTER TABLE article_history ADD COLUMN scraped_ok BOOLEAN NOT NULL DEFAULT true;
|
||||||
|
ALTER TABLE article_history ADD COLUMN job_id UUID NOT NULL DEFAULT gen_random_uuid();
|
||||||
|
|
||||||
|
-- Drop unique index — table is now a trace log
|
||||||
|
DROP INDEX idx_article_history_user_url;
|
||||||
|
CREATE INDEX idx_article_history_user_url ON article_history(user_id, url_hash);
|
||||||
|
CREATE INDEX idx_article_history_job_id ON article_history(job_id);
|
||||||
|
|
||||||
|
-- Store job_id on syntheses for direct provenance lookup
|
||||||
|
ALTER TABLE syntheses ADD COLUMN job_id UUID;
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Update CLAUDE.md migration count to 16**
|
||||||
|
|
||||||
|
- [ ] **Step 3: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add backend/migrations/20260324000016_enrich_article_history.sql CLAUDE.md
|
||||||
|
git commit -m "feat: enrich article_history with tracing metadata + syntheses.job_id"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: DB module — ArticleHistoryEntry + insert/query functions
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/src/db/article_history.rs`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add `ArticleHistoryEntry` struct and `insert_entry` function**
|
||||||
|
|
||||||
|
Add a struct for inserting trace entries and a function to insert them:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
|
||||||
|
/// Entry for inserting into article_history with full tracing metadata.
|
||||||
|
pub struct ArticleHistoryEntry {
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub url: String,
|
||||||
|
pub url_hash: String,
|
||||||
|
pub title: String,
|
||||||
|
pub source_type: String,
|
||||||
|
pub source_url: Option<String>,
|
||||||
|
pub category: Option<String>,
|
||||||
|
pub synthesis_id: Option<Uuid>,
|
||||||
|
pub status: String,
|
||||||
|
pub scraped_ok: bool,
|
||||||
|
pub job_id: Uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a single article history entry with full tracing metadata.
|
||||||
|
pub async fn insert_entry(pool: &PgPool, entry: &ArticleHistoryEntry) -> Result<(), AppError> {
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
INSERT INTO article_history (user_id, url_hash, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(entry.user_id)
|
||||||
|
.bind(&entry.url_hash)
|
||||||
|
.bind(&entry.url)
|
||||||
|
.bind(&entry.title)
|
||||||
|
.bind(&entry.source_type)
|
||||||
|
.bind(&entry.source_url)
|
||||||
|
.bind(&entry.category)
|
||||||
|
.bind(entry.synthesis_id)
|
||||||
|
.bind(&entry.status)
|
||||||
|
.bind(entry.scraped_ok)
|
||||||
|
.bind(entry.job_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Add `ArticleHistoryRow` and query functions**
|
||||||
|
|
||||||
|
```rust
|
||||||
|
/// Row returned from article_history queries.
|
||||||
|
#[derive(Debug, Clone, serde::Serialize, sqlx::FromRow)]
|
||||||
|
pub struct ArticleHistoryRow {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub url: String,
|
||||||
|
pub title: String,
|
||||||
|
pub source_type: String,
|
||||||
|
pub source_url: Option<String>,
|
||||||
|
pub category: Option<String>,
|
||||||
|
pub synthesis_id: Option<Uuid>,
|
||||||
|
pub status: String,
|
||||||
|
pub scraped_ok: bool,
|
||||||
|
pub job_id: Uuid,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List article history with optional filters, paginated.
|
||||||
|
pub async fn list_history(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
limit: i64,
|
||||||
|
offset: i64,
|
||||||
|
status_filter: Option<&str>,
|
||||||
|
source_type_filter: Option<&str>,
|
||||||
|
) -> Result<Vec<ArticleHistoryRow>, AppError> {
|
||||||
|
let rows = sqlx::query_as::<_, ArticleHistoryRow>(
|
||||||
|
r#"
|
||||||
|
SELECT id, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, created_at
|
||||||
|
FROM article_history
|
||||||
|
WHERE user_id = $1
|
||||||
|
AND ($4::TEXT IS NULL OR status = $4)
|
||||||
|
AND ($5::TEXT IS NULL OR source_type = $5)
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $2 OFFSET $3
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.bind(status_filter)
|
||||||
|
.bind(source_type_filter)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count article history entries with optional filters.
|
||||||
|
pub async fn count_history(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
status_filter: Option<&str>,
|
||||||
|
source_type_filter: Option<&str>,
|
||||||
|
) -> Result<i64, AppError> {
|
||||||
|
let row = sqlx::query_scalar::<_, i64>(
|
||||||
|
r#"
|
||||||
|
SELECT COUNT(*) FROM article_history
|
||||||
|
WHERE user_id = $1
|
||||||
|
AND ($2::TEXT IS NULL OR status = $2)
|
||||||
|
AND ($3::TEXT IS NULL OR source_type = $3)
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(status_filter)
|
||||||
|
.bind(source_type_filter)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(row)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all article history entries for a generation job.
|
||||||
|
pub async fn list_by_job_id(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
job_id: Uuid,
|
||||||
|
) -> Result<Vec<ArticleHistoryRow>, AppError> {
|
||||||
|
let rows = sqlx::query_as::<_, ArticleHistoryRow>(
|
||||||
|
r#"
|
||||||
|
SELECT id, url, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id, created_at
|
||||||
|
FROM article_history
|
||||||
|
WHERE user_id = $1 AND job_id = $2
|
||||||
|
ORDER BY created_at ASC
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(job_id)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(rows)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 3: Update `cleanup_old` to preserve used entries**
|
||||||
|
|
||||||
|
Change the DELETE query to only remove entries where `synthesis_id IS NULL`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub async fn cleanup_old(pool: &PgPool, user_id: Uuid, days: i32) -> Result<u64, AppError> {
|
||||||
|
let result = sqlx::query(
|
||||||
|
"DELETE FROM article_history WHERE user_id = $1 AND created_at < now() - make_interval(days => $2) AND synthesis_id IS NULL",
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(days)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(result.rows_affected())
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run tests + commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd backend && cargo test --lib && cargo build
|
||||||
|
git add backend/src/db/article_history.rs
|
||||||
|
git commit -m "feat: article history entry struct + insert/query/cleanup functions"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 3: Update syntheses DB — save job_id
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/src/db/syntheses.rs`
|
||||||
|
- Modify: `backend/src/models/synthesis.rs`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add `job_id` to Synthesis model**
|
||||||
|
|
||||||
|
In `models/synthesis.rs`, add `pub job_id: Option<Uuid>` to the `Synthesis` struct. It's `Option` because old syntheses won't have it.
|
||||||
|
|
||||||
|
- [ ] **Step 2: Update `create` function to accept and save job_id**
|
||||||
|
|
||||||
|
In `db/syntheses.rs`, change the `create` function signature to accept `job_id: Uuid`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub async fn create(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
week: &str,
|
||||||
|
sections_json: &serde_json::Value,
|
||||||
|
job_id: Uuid,
|
||||||
|
) -> Result<Synthesis, AppError> {
|
||||||
|
let row = sqlx::query_as::<_, Synthesis>(
|
||||||
|
r#"
|
||||||
|
INSERT INTO syntheses (user_id, week, sections, status, job_id)
|
||||||
|
VALUES ($1, $2, $3, 'completed', $4)
|
||||||
|
RETURNING id, user_id, week, sections, status, created_at, job_id
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(week)
|
||||||
|
.bind(sections_json)
|
||||||
|
.bind(job_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(row)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Also update `list_for_user` and `get_by_id` RETURNING clauses to include `job_id`.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Update caller in synthesis.rs**
|
||||||
|
|
||||||
|
In `run_generation_inner`, the call to `db::syntheses::create(...)` needs to pass `job_id` (rename `_job_id` to `job_id` in the function signature). Search for `db::syntheses::create` and add `job_id` as the last argument.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run tests + commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd backend && cargo test --lib && cargo build
|
||||||
|
git add backend/src/db/syntheses.rs backend/src/models/synthesis.rs backend/src/services/synthesis.rs
|
||||||
|
git commit -m "feat: save job_id on syntheses for provenance lookup"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 4: Pipeline instrumentation — insert dropped articles at each filtering step
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/src/services/synthesis.rs`
|
||||||
|
- Modify: `backend/src/models/synthesis.rs`
|
||||||
|
|
||||||
|
This is the largest task. At each filtering step in `run_generation_inner`, insert dropped articles into `article_history`.
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add `source_url` to ScrapedNewsItem**
|
||||||
|
|
||||||
|
In `models/synthesis.rs`, add `pub source_url: Option<String>` to `ScrapedNewsItem` (after `scraped_content`). Add `#[serde(default)]` to make it optional during deserialization.
|
||||||
|
|
||||||
|
Update all places that construct `ScrapedNewsItem` in `synthesis.rs`:
|
||||||
|
- In `scrape_flat_urls` result handler — set `source_url: None` (will be enhanced later)
|
||||||
|
- In `scrape_articles` result handler — set `source_url: None`
|
||||||
|
|
||||||
|
- [ ] **Step 2: Thread source_url through Phase 1**
|
||||||
|
|
||||||
|
Change `candidate_urls` from `Vec<String>` to `Vec<(String, String)>` — `(article_url, source_page_url)`. Update the source scraping loop to pair each link with its source URL. Update `scrape_flat_urls` to accept `&[(String, String)]` and set `source_url` on each `ScrapedNewsItem`.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Add helper function to insert trace entries**
|
||||||
|
|
||||||
|
Add a convenience function in `synthesis.rs` to reduce boilerplate:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
/// Insert a trace entry into article_history.
|
||||||
|
async fn trace_article(
|
||||||
|
pool: &sqlx::PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
job_id: Uuid,
|
||||||
|
url: &str,
|
||||||
|
title: &str,
|
||||||
|
source_type: &str,
|
||||||
|
source_url: Option<&str>,
|
||||||
|
category: Option<&str>,
|
||||||
|
synthesis_id: Option<Uuid>,
|
||||||
|
status: &str,
|
||||||
|
scraped_ok: bool,
|
||||||
|
) {
|
||||||
|
let entry = db::article_history::ArticleHistoryEntry {
|
||||||
|
user_id,
|
||||||
|
url: url.to_string(),
|
||||||
|
url_hash: hash_article_url(url),
|
||||||
|
title: title.to_string(),
|
||||||
|
source_type: source_type.to_string(),
|
||||||
|
source_url: source_url.map(|s| s.to_string()),
|
||||||
|
category: category.map(|s| s.to_string()),
|
||||||
|
synthesis_id,
|
||||||
|
status: status.to_string(),
|
||||||
|
scraped_ok,
|
||||||
|
job_id,
|
||||||
|
};
|
||||||
|
db::article_history::insert_entry(pool, &entry).await.ok();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Instrument Phase 1 filtering steps**
|
||||||
|
|
||||||
|
At each Phase 1 filtering point, call `trace_article` for dropped articles. Key insertion points:
|
||||||
|
|
||||||
|
After empty content filter:
|
||||||
|
```rust
|
||||||
|
for article in &scraped_articles {
|
||||||
|
if article.scraped_content.trim().is_empty() {
|
||||||
|
trace_article(&state.pool, user_id, job_id, &article.url, &article.title,
|
||||||
|
"personalized_source", article.source_url.as_deref(), None, None,
|
||||||
|
"filtered_empty", false).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
After history filter (articles that matched existing history):
|
||||||
|
```rust
|
||||||
|
// Articles removed by history filter
|
||||||
|
for article in &valid_articles_before_filter {
|
||||||
|
if existing_hashes.contains(&hash_article_url(&article.url)) {
|
||||||
|
trace_article(&state.pool, user_id, job_id, &article.url, &article.title,
|
||||||
|
"personalized_source", article.source_url.as_deref(), None, None,
|
||||||
|
"filtered_history", true).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Similar patterns for source diversity drops, retry drops, etc.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Instrument Phase 2 filtering steps**
|
||||||
|
|
||||||
|
Same pattern for Phase 2 with `source_type: "web_search"` and `source_url: None`.
|
||||||
|
|
||||||
|
- [ ] **Step 6: Insert used articles after save**
|
||||||
|
|
||||||
|
Replace the old `insert_urls` call with `trace_article` calls for each used article:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
if settings.article_history_days > 0 {
|
||||||
|
for section in &final_sections {
|
||||||
|
for item in §ion.items {
|
||||||
|
trace_article(&state.pool, user_id, job_id, &item.url, &item.title,
|
||||||
|
"used", None, Some(§ion.title), Some(synthesis.id),
|
||||||
|
"used", true).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 7: Run tests + commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd backend && cargo test --lib && cargo build
|
||||||
|
git add backend/src/services/synthesis.rs backend/src/models/synthesis.rs
|
||||||
|
git commit -m "feat: instrument pipeline with article tracing at every filtering step"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 5: API endpoints — history listing + provenance
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `backend/src/handlers/article_history.rs`
|
||||||
|
- Modify: `backend/src/handlers/mod.rs`
|
||||||
|
- Modify: `backend/src/router.rs`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Create handler module**
|
||||||
|
|
||||||
|
Create `backend/src/handlers/article_history.rs` with two handlers:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
//! Handlers for article history and provenance endpoints.
|
||||||
|
|
||||||
|
use axum::extract::{Path, Query, State};
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::IntoResponse;
|
||||||
|
use axum::Json;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::app_state::AppState;
|
||||||
|
use crate::db;
|
||||||
|
use crate::errors::AppError;
|
||||||
|
use crate::middleware::auth::AuthUser;
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct HistoryQuery {
|
||||||
|
pub limit: Option<i64>,
|
||||||
|
pub offset: Option<i64>,
|
||||||
|
pub status: Option<String>,
|
||||||
|
pub source_type: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GET /api/v1/article-history
|
||||||
|
pub async fn list_history(
|
||||||
|
auth_user: AuthUser,
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Query(params): Query<HistoryQuery>,
|
||||||
|
) -> Result<impl IntoResponse, AppError> {
|
||||||
|
let limit = params.limit.unwrap_or(50).clamp(1, 200);
|
||||||
|
let offset = params.offset.unwrap_or(0).max(0);
|
||||||
|
|
||||||
|
let items = db::article_history::list_history(
|
||||||
|
&state.pool, auth_user.id, limit, offset,
|
||||||
|
params.status.as_deref(), params.source_type.as_deref(),
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
let total = db::article_history::count_history(
|
||||||
|
&state.pool, auth_user.id,
|
||||||
|
params.status.as_deref(), params.source_type.as_deref(),
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
Ok(Json(serde_json::json!({
|
||||||
|
"items": items,
|
||||||
|
"total": total
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GET /api/v1/syntheses/:id/provenance
|
||||||
|
pub async fn get_provenance(
|
||||||
|
auth_user: AuthUser,
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(synthesis_id): Path<Uuid>,
|
||||||
|
) -> Result<impl IntoResponse, AppError> {
|
||||||
|
// Get the synthesis to find its job_id
|
||||||
|
let synthesis = db::syntheses::get_by_id_for_user(&state.pool, synthesis_id, auth_user.id)
|
||||||
|
.await?
|
||||||
|
.ok_or_else(|| AppError::NotFound("Synthesis not found".into()))?;
|
||||||
|
|
||||||
|
let job_id = synthesis.job_id.ok_or_else(|| {
|
||||||
|
AppError::NotFound("No tracing data available for this synthesis".into())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let items = db::article_history::list_by_job_id(&state.pool, auth_user.id, job_id).await?;
|
||||||
|
|
||||||
|
Ok(Json(items))
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Register handler module and add routes**
|
||||||
|
|
||||||
|
In `handlers/mod.rs`, add `pub mod article_history;`.
|
||||||
|
|
||||||
|
In `router.rs`, add routes in the authenticated section:
|
||||||
|
```rust
|
||||||
|
.route("/api/v1/article-history", get(handlers::article_history::list_history))
|
||||||
|
.route("/api/v1/syntheses/:id/provenance", get(handlers::article_history::get_provenance))
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 3: Run tests + commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd backend && cargo test --lib && cargo build
|
||||||
|
git add backend/src/handlers/article_history.rs backend/src/handlers/mod.rs backend/src/router.rs
|
||||||
|
git commit -m "feat: API endpoints for article history listing and provenance"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 6: Frontend — article history page + provenance section
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `frontend/src/pages/ArticleHistory.tsx`
|
||||||
|
- Create: `frontend/src/api/articleHistory.ts`
|
||||||
|
- Modify: `frontend/src/pages/SynthesisDetail.tsx`
|
||||||
|
- Modify: `frontend/src/App.tsx`
|
||||||
|
- Modify: `frontend/src/pages/Settings.tsx`
|
||||||
|
- Modify: `frontend/src/i18n/fr.ts`
|
||||||
|
- Modify: `frontend/src/types.ts`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add types**
|
||||||
|
|
||||||
|
In `types.ts`:
|
||||||
|
```typescript
|
||||||
|
export interface ArticleHistoryEntry {
|
||||||
|
id: string;
|
||||||
|
url: string;
|
||||||
|
title: string;
|
||||||
|
source_type: string;
|
||||||
|
source_url: string | null;
|
||||||
|
category: string | null;
|
||||||
|
synthesis_id: string | null;
|
||||||
|
status: string;
|
||||||
|
scraped_ok: boolean;
|
||||||
|
job_id: string;
|
||||||
|
created_at: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ArticleHistoryResponse {
|
||||||
|
items: ArticleHistoryEntry[];
|
||||||
|
total: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Add API client**
|
||||||
|
|
||||||
|
Create `frontend/src/api/articleHistory.ts`:
|
||||||
|
```typescript
|
||||||
|
import { api } from './client';
|
||||||
|
import type { ArticleHistoryResponse, ArticleHistoryEntry } from '~/types';
|
||||||
|
|
||||||
|
export const articleHistoryApi = {
|
||||||
|
list: (params: { limit?: number; offset?: number; status?: string; source_type?: string } = {}): Promise<ArticleHistoryResponse> => {
|
||||||
|
const query = new URLSearchParams();
|
||||||
|
if (params.limit) query.set('limit', String(params.limit));
|
||||||
|
if (params.offset) query.set('offset', String(params.offset));
|
||||||
|
if (params.status) query.set('status', params.status);
|
||||||
|
if (params.source_type) query.set('source_type', params.source_type);
|
||||||
|
return api.get<ArticleHistoryResponse>(`/article-history?${query.toString()}`);
|
||||||
|
},
|
||||||
|
|
||||||
|
getProvenance: (synthesisId: string): Promise<ArticleHistoryEntry[]> =>
|
||||||
|
api.get<ArticleHistoryEntry[]>(`/syntheses/${synthesisId}/provenance`),
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 3: Add i18n labels**
|
||||||
|
|
||||||
|
In `fr.ts`, add labels for the history page and provenance section (article history title, column headers, status badges, filter labels, provenance section title, empty state messages).
|
||||||
|
|
||||||
|
- [ ] **Step 4: Create ArticleHistory page**
|
||||||
|
|
||||||
|
Create `frontend/src/pages/ArticleHistory.tsx` — a page with:
|
||||||
|
- Filter dropdowns for status and source_type
|
||||||
|
- Paginated table showing article history entries
|
||||||
|
- Color-coded status badges
|
||||||
|
- Clickable URLs and synthesis links
|
||||||
|
|
||||||
|
- [ ] **Step 5: Add route and Settings link**
|
||||||
|
|
||||||
|
In `App.tsx`, add route: `<Route path="/article-history" component={ArticleHistory} />`
|
||||||
|
In `Settings.tsx`, add a button/link to navigate to `/article-history`.
|
||||||
|
|
||||||
|
- [ ] **Step 6: Add provenance section to SynthesisDetail**
|
||||||
|
|
||||||
|
In `SynthesisDetail.tsx`, add a collapsible "Provenance" section at the bottom that calls `articleHistoryApi.getProvenance(id)` and displays the trace table.
|
||||||
|
|
||||||
|
- [ ] **Step 7: Run frontend tests + commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd frontend && npx tsc --noEmit && npx vitest run
|
||||||
|
git add frontend/src/types.ts frontend/src/api/articleHistory.ts frontend/src/pages/ArticleHistory.tsx frontend/src/pages/SynthesisDetail.tsx frontend/src/App.tsx frontend/src/pages/Settings.tsx frontend/src/i18n/fr.ts
|
||||||
|
git commit -m "feat: article history page + provenance section in synthesis detail"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 7: E2E test — verify provenance after generation
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `e2e/tests/generation-live.spec.ts`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add provenance verification**
|
||||||
|
|
||||||
|
After the synthesis is generated and validated, call the provenance endpoint and verify:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Verify provenance data exists
|
||||||
|
const provResp = await apiCall(page, 'GET', `/api/v1/syntheses/${synthesisId}/provenance`);
|
||||||
|
expect(provResp.status).toBe(200);
|
||||||
|
const provenance = provResp.data;
|
||||||
|
expect(Array.isArray(provenance)).toBe(true);
|
||||||
|
expect(provenance.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// At least some entries should be 'used'
|
||||||
|
const usedEntries = provenance.filter((e: any) => e.status === 'used');
|
||||||
|
expect(usedEntries.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Every used entry should have a synthesis_id
|
||||||
|
for (const entry of usedEntries) {
|
||||||
|
expect(entry.synthesis_id).toBe(synthesisId);
|
||||||
|
expect(entry.job_id).toBeTruthy();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run E2E + commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd e2e && docker compose -f docker-compose.test.yml down && docker compose -f docker-compose.test.yml up --build -d
|
||||||
|
sleep 25 && npx tsx seed.ts && npx playwright test generation-live --reporter=list
|
||||||
|
git add e2e/tests/generation-live.spec.ts
|
||||||
|
git commit -m "test: verify provenance endpoint returns tracing data after generation"
|
||||||
|
```
|
||||||
Loading…
Reference in New Issue