From f5f065660446ca8f83ddc5d4e32d4b05f8c58656 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Tue, 24 Mar 2026 17:09:13 +0100 Subject: [PATCH] docs: add Autre fill-up implementation plan --- .../plans/2026-03-24-autre-fillup.md | 256 ++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-24-autre-fillup.md diff --git a/docs/superpowers/plans/2026-03-24-autre-fillup.md b/docs/superpowers/plans/2026-03-24-autre-fillup.md new file mode 100644 index 0000000..d756b21 --- /dev/null +++ b/docs/superpowers/plans/2026-03-24-autre-fillup.md @@ -0,0 +1,256 @@ +# "Autre" Fill-Up — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** When total synthesis articles are below 75% capacity, expand "Autre" with overflow articles from classification. + +**Architecture:** Modify `parse_classification_response` to return overflow. Add fill-up logic between Phase 2 and rewrite pass. Single-file change. + +**Tech Stack:** Rust + +**Spec:** `docs/superpowers/specs/2026-03-24-autre-fillup-design.md` + +--- + +### Task 1: Modify `parse_classification_response` to collect overflow + +**Files:** +- Modify: `backend/src/services/synthesis.rs` + +- [ ] **Step 1: Add constant** + +At the top of the helper functions section, add: +```rust +/// Minimum fill ratio for synthesis. If total articles are below this percentage +/// of the maximum capacity, overflow articles are added to "Autre" to compensate. +const SYNTHESIS_MIN_FILL_RATIO: f64 = 0.75; +``` + +- [ ] **Step 2: Change `parse_classification_response` return type and body** + +Change return type from `HashMap>` to `(HashMap>, Vec)`. + +Add `let mut overflow: Vec = Vec::new();` at the start of the function. + +At the two drop points where articles are silently discarded, push to overflow instead: + +**Drop point 1 (line ~1735-1743)** — category full AND Autre full: +```rust + if filled >= max { + let autre_filled = filled_counts.get("Autre").copied().unwrap_or(0); + if autre_filled < max { + result.entry("category_autre".to_string()).or_default().push(articles[index].clone()); + *filled_counts.entry("Autre".to_string()).or_insert(0) += 1; + assigned_indices.insert(index); + } else { + // Both category and Autre full — collect as overflow + overflow.push(articles[index].clone()); + assigned_indices.insert(index); + } + continue; + } +``` + +**Drop point 2 (line ~1752-1759)** — unclassified, Autre full: +```rust + for (i, article) in articles.iter().enumerate() { + if !assigned_indices.contains(&i) { + let autre_filled = filled_counts.get("Autre").copied().unwrap_or(0); + if autre_filled < max { + result.entry("category_autre".to_string()).or_default().push(article.clone()); + *filled_counts.entry("Autre".to_string()).or_insert(0) += 1; + } else { + // Autre full — collect as overflow + overflow.push(article.clone()); + } + } + } +``` + +Change the return from `result` to `(result, overflow)`. + +- [ ] **Step 3: Update 2 production call sites** + +**Phase 1 (line ~494):** +```rust +// Before: +let phase1_classified = parse_classification_response(...); + +// After: +let (phase1_classified, phase1_overflow) = parse_classification_response(...); +``` + +**Phase 2 (line ~701):** +```rust +// Before: +let phase2_classified = parse_classification_response(...); + +// After: +let (phase2_classified, phase2_overflow) = parse_classification_response(...); +``` + +- [ ] **Step 4: Update 5 existing tests** + +All 5 tests that call `parse_classification_response` need to destructure the tuple. Change each from: +```rust +let result = parse_classification_response(...); +``` +To: +```rust +let (result, _overflow) = parse_classification_response(...); +``` + +For the test `classification_respects_max_per_category`, also verify overflow is captured: +```rust +let (result, overflow) = parse_classification_response(&response, &articles, &categories, 2, &mut filled); +assert_eq!(result.get("category_0").map(|v| v.len()), Some(2)); +assert!(result.get("category_autre").map(|v| v.len()).unwrap_or(0) > 0); +// Articles that overflowed both AI News AND Autre should be in overflow +assert!(!overflow.is_empty() || filled.get("Autre").copied().unwrap_or(0) <= 2); +``` + +- [ ] **Step 5: Run tests** + +Run: `cd backend && cargo test --lib` +Expected: all tests pass + +- [ ] **Step 6: Commit** + +```bash +git add backend/src/services/synthesis.rs +git commit -m "feat: parse_classification_response collects overflow articles" +``` + +--- + +### Task 2: Add fill-up logic and accumulate overflow in pipeline + +**Files:** +- Modify: `backend/src/services/synthesis.rs` + +- [ ] **Step 1: Accumulate overflow in `run_generation_inner`** + +Add `let mut all_overflow: Vec = Vec::new();` near the other `all_scraped` initialization (around line 305). + +After Phase 1 classification (where `phase1_overflow` is captured), add: +```rust +all_overflow.extend(phase1_overflow); +``` + +After Phase 2 classification (where `phase2_overflow` is captured), add: +```rust +all_overflow.extend(phase2_overflow); +``` + +- [ ] **Step 2: Add fill-up logic before rewrite pass** + +Between the "COMBINED REWRITE PASS" header (line ~719) and the empty-check (line ~722), insert: + +```rust + // Fill-up: if total articles are below 75% of max, expand "Autre" with overflow + let total_articles: usize = all_scraped.values().map(|v| v.len()).sum(); + let max_articles = settings.categories.len() * settings.max_items_per_category as usize; + let target = (SYNTHESIS_MIN_FILL_RATIO * max_articles as f64).ceil() as usize; + let shortfall = target.saturating_sub(total_articles); + + if shortfall > 0 && !all_overflow.is_empty() { + tracing::info!( + total = total_articles, + target = target, + shortfall = shortfall, + overflow_available = all_overflow.len(), + "Synthesis under-filled, adding overflow to Autre" + ); + + // Count domain occurrences across all categories for source diversity enforcement + let mut domain_counts: HashMap = HashMap::new(); + for items in all_scraped.values() { + for item in items { + if let Some(domain) = extract_domain(&item.url) { + *domain_counts.entry(domain).or_insert(0) += 1; + } + } + } + + let max_per_source = settings.max_articles_per_source as usize; + let mut added = 0usize; + + for article in all_overflow { + if added >= shortfall { + break; + } + // Enforce source diversity on overflow articles + if let Some(domain) = extract_domain(&article.url) { + let count = domain_counts.get(&domain).copied().unwrap_or(0); + if count >= max_per_source { + continue; + } + *domain_counts.entry(domain).or_insert(0) += 1; + } + all_scraped + .entry("category_autre".to_string()) + .or_default() + .push(article); + added += 1; + } + + if added > 0 { + tracing::info!(added = added, "Added overflow articles to Autre"); + } + } +``` + +- [ ] **Step 3: Add unit tests for fill-up calculation** + +```rust + // ── fill-up calculation tests ─────────────────────────────── + + #[test] + fn fillup_target_calculation() { + // 4 categories x 4 items = 16 max, 75% = 12 + let max = 4 * 4; + let target = (0.75_f64 * max as f64).ceil() as usize; + assert_eq!(target, 12); + } + + #[test] + fn fillup_shortfall_saturating() { + // If total exceeds target, shortfall should be 0 (not panic) + let target: usize = 12; + let total: usize = 15; + let shortfall = target.saturating_sub(total); + assert_eq!(shortfall, 0); + } + + #[test] + fn classification_overflow_collected_when_all_full() { + use crate::models::synthesis::ScrapedNewsItem; + let articles: Vec = (0..6).map(|i| ScrapedNewsItem { + title: format!("Art{}", i), url: format!("https://a.com/{}", i), + summary: "s".into(), original_title: "t".into(), scraped_content: "c".into(), + }).collect(); + let categories = vec!["AI News".to_string(), "Autre".to_string()]; + let response = serde_json::json!({ + "assignments": (0..6).map(|i| serde_json::json!({"index": i, "category": "AI News"})).collect::>() + }); + let mut filled = HashMap::new(); + let (result, overflow) = parse_classification_response(&response, &articles, &categories, 2, &mut filled); + + // AI News capped at 2, Autre gets 2 overflow, remaining 2 go to overflow vec + assert_eq!(result.get("category_0").map(|v| v.len()), Some(2)); + assert_eq!(result.get("category_autre").map(|v| v.len()), Some(2)); + assert_eq!(overflow.len(), 2, "2 articles should overflow when both AI News and Autre are full"); + } +``` + +- [ ] **Step 4: Run tests** + +Run: `cd backend && cargo test --lib` +Expected: all tests pass + +- [ ] **Step 5: Commit** + +```bash +git add backend/src/services/synthesis.rs +git commit -m "feat: Autre fill-up to 75% synthesis target with source diversity enforcement" +```