From 2e87ae854aafc983555ddc3ca72f4324abca6754 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Thu, 26 Mar 2026 09:25:03 +0100 Subject: [PATCH] docs: add implementation plan for polish and optimization items Co-Authored-By: Claude Opus 4.6 (1M context) --- .../plans/2026-03-26-polish-optimizations.md | 415 ++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-26-polish-optimizations.md diff --git a/docs/superpowers/plans/2026-03-26-polish-optimizations.md b/docs/superpowers/plans/2026-03-26-polish-optimizations.md new file mode 100644 index 0000000..46ce601 --- /dev/null +++ b/docs/superpowers/plans/2026-03-26-polish-optimizations.md @@ -0,0 +1,415 @@ +# Polish & Optimization — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** 8 polish/optimization items from the code audit: batch INSERTs, Arc clones, LazyLock selectors, createResource standardization, Button component usage, default settings alignment, SESSION_SECRET removal, Arc for master key. + +**Architecture:** All tasks are independent — can be executed in any order. No behavioral changes. Pure optimization and cleanup. + +**Tech Stack:** Rust (sqlx, std::sync::LazyLock), SolidJS, TypeScript + +**Spec:** `docs/superpowers/specs/2026-03-26-polish-optimizations-design.md` + +--- + +### Task 1: Remove unused `SESSION_SECRET` + +**Files:** +- Modify: `backend/src/config.rs` +- Modify: `.env.example` + +The simplest task — pure dead code removal. + +- [ ] **Step 1: Remove `session_secret` from `AppConfig`** + +In `backend/src/config.rs`: +- Remove `pub session_secret: String` from the `AppConfig` struct +- Remove `let session_secret = required_var("SESSION_SECRET")?;` from `from_env()` +- Remove `session_secret,` from the struct literal in `from_env()` +- Remove the `session_secret` length validation in `validate()` +- Remove `session_secret: "a".repeat(64),` from all test fixtures +- Remove the `test_validate_short_session_secret` test + +- [ ] **Step 2: Remove from `.env.example`** + +Remove the `SESSION_SECRET=` line from `.env.example`. Also remove any comment about it. + +- [ ] **Step 3: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add backend/src/config.rs .env.example +git commit -m "chore: remove unused SESSION_SECRET env var and config field" +``` + +--- + +### Task 2: Wrap `AppConfig` master key in `Arc` + +**Files:** +- Modify: `backend/src/config.rs` + +- [ ] **Step 1: Change field type to `Arc`** + +In `backend/src/config.rs`: +- Add `use std::sync::Arc;` to imports +- Change `pub master_encryption_key: String` to `pub master_encryption_key: Arc` +- In `from_env()`, wrap: `master_encryption_key: Arc::new(master_encryption_key),` +- In test fixtures, wrap: `master_encryption_key: Arc::new("a".repeat(64)),` + +All call sites use `&state.config.master_encryption_key` which works with `Arc` via `Deref`. + +- [ ] **Step 2: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 3: Commit** + +```bash +git add backend/src/config.rs +git commit -m "chore: wrap master_encryption_key in Arc to reduce secret copies" +``` + +--- + +### Task 3: Cache CSS selectors with `LazyLock` + +**Files:** +- Modify: `backend/src/services/source_scraper.rs` +- Modify: `backend/src/services/scraper.rs` + +- [ ] **Step 1: Fix `source_scraper.rs` selectors** + +In `backend/src/services/source_scraper.rs`, add at the top (after imports): + +```rust +use std::sync::LazyLock; + +static ANCHOR_SELECTOR: LazyLock = LazyLock::new(|| Selector::parse("a[href]").unwrap()); +``` + +Then replace both `Selector::parse("a[href]").unwrap()` calls (lines 80 and 137) with `&ANCHOR_SELECTOR`. + +- [ ] **Step 2: Fix `scraper.rs` selectors** + +In `backend/src/services/scraper.rs`, there are 14 `Selector::parse` calls. Many use `if let Ok(sel) = Selector::parse(...)` which handles parse failure gracefully. For the static selectors with `.unwrap()` (like `"title"`, `"h1"`, `"body"`), convert to `LazyLock`. For selectors inside `if let Ok(...)`, leave as-is since the pattern already handles failure. + +Add at the top: +```rust +use std::sync::LazyLock; + +static SEL_TITLE: LazyLock = LazyLock::new(|| Selector::parse("title").unwrap()); +static SEL_H1: LazyLock = LazyLock::new(|| Selector::parse("h1").unwrap()); +static SEL_BODY: LazyLock = LazyLock::new(|| Selector::parse("body").unwrap()); +``` + +Read the file carefully. Only convert `Selector::parse` calls that use `.unwrap()` to `LazyLock`. For calls using `if let Ok(sel) = Selector::parse(...)`, leave them as-is — these are error-handled and the selectors may vary. + +- [ ] **Step 3: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add backend/src/services/source_scraper.rs backend/src/services/scraper.rs +git commit -m "perf: cache CSS selectors with LazyLock to avoid re-parsing" +``` + +--- + +### Task 4: Reduce `.clone()` in pipeline with `Arc` + +**Files:** +- Modify: `backend/src/services/synthesis.rs` + +- [ ] **Step 1: Identify clone targets** + +Read `backend/src/services/synthesis.rs`. In `run_generation_inner`, find values that are cloned into every spawned task but never mutated: +- `model_research` (String) — cloned ~4 times per batch iteration +- `classify_schema` (serde_json::Value) — cloned ~2 times per batch +- `classification_categories` (Vec) — cloned ~2 times per batch + +These should be wrapped in `Arc` at the point of creation, before the batch loops. + +- [ ] **Step 2: Wrap immutable values in `Arc`** + +At the point where these values are first created (before the batch loops), wrap them: + +```rust +let model_research = Arc::new(model_research); // was String +let classify_schema = Arc::new(classify_schema); // was serde_json::Value +let classification_categories = Arc::new(classification_categories); // was Vec +``` + +Add `use std::sync::Arc;` if not already imported (it likely is). + +Then in the spawned tasks, change: +```rust +// Before: +let model = model_research.clone(); // clones String +// After: +let model = Arc::clone(&model_research); // clones Arc pointer +``` + +For `model` usage inside the task: `call_llm(&model, ...)` works because `Arc` derefs to `&str`. + +For `classify_schema`: `Arc` derefs to `&Value`. Check that `.clone()` calls in the task body that need an owned `Value` are updated if needed (the task may need `(*schema).clone()` if it passes owned values somewhere — read carefully). + +For `classification_categories`: `Arc>` derefs to `&[String]`. Update any place that calls `.clone()` on the inner vec to use `Arc::clone()` instead. + +**Important:** Only change the variables that are cloned into spawned tasks. Don't change variables that are mutated (like `filled_counts`, `article_scraped`, `source_counts`). + +- [ ] **Step 3: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add backend/src/services/synthesis.rs +git commit -m "perf: use Arc for immutable values in pipeline to reduce cloning" +``` + +--- + +### Task 5: Align frontend default settings with backend + +**Files:** +- Modify: `frontend/src/types.ts` + +- [ ] **Step 1: Update `DEFAULT_SETTINGS`** + +In `frontend/src/types.ts`, update `DEFAULT_SETTINGS` to match the backend's `Default for UserSettings` in `backend/src/models/settings.rs`: + +```typescript +export const DEFAULT_SETTINGS: UserSettings = { + theme: 'Intelligence Artificielle', + max_age_days: 7, + max_items_per_category: 4, + max_articles_per_source: 3, + use_llm_for_source_links: false, + use_brave_search: false, + article_history_days: 90, + batch_size: 5, + search_agent_behavior: '', // backend default is empty string + ai_model: '', + ai_model_websearch: '', + ai_provider: '', + rate_limit_max_requests: null, + rate_limit_time_window_seconds: null, + categories: [ + 'Annonces majeures', + 'Recherche et innovation', + 'Industrie et entreprises', + 'Secteur public', + 'Opinions et analyses', + ], +}; +``` + +Key changes: +- `search_agent_behavior`: `"Tu peux..."` → `''` (backend defaults to empty) +- `categories`: update to match backend's 5 categories exactly + +- [ ] **Step 2: TypeScript check** + +Run: `cd frontend && npx tsc --noEmit` +Expected: No errors + +- [ ] **Step 3: Commit** + +```bash +git add frontend/src/types.ts +git commit -m "chore: align frontend DEFAULT_SETTINGS with backend defaults" +``` + +--- + +### Task 6: Standardize frontend data fetching on `createResource` + +**Files:** +- Modify: Multiple frontend pages + +- [ ] **Step 1: Identify pages to convert** + +Read these pages and check which use `onMount` + `createSignal` for data loading: +- `frontend/src/pages/Home.tsx` +- `frontend/src/pages/ArticleHistory.tsx` +- `frontend/src/pages/SynthesisDetail.tsx` +- `frontend/src/pages/LlmLogs.tsx` + +For each, the pattern to look for is: +```tsx +const [data, setData] = createSignal([]); +const [loading, setLoading] = createSignal(true); +onMount(async () => { + try { setData(await api.fetch()); } + catch { setError(...); } + finally { setLoading(false); } +}); +``` + +- [ ] **Step 2: Convert to `createResource`** + +Replace with: +```tsx +const [data] = createResource(() => api.fetch()); +``` + +Then use `data.loading` instead of `loading()`, `data()` instead of `data()` (same), and `data.error` for error state. + +Only convert pages where it's a clean substitution. Skip pages with complex conditional fetching, SSE, or mutation-heavy patterns. + +- [ ] **Step 3: TypeScript check** + +Run: `cd frontend && npx tsc --noEmit` +Expected: No errors + +- [ ] **Step 4: Commit** + +```bash +git add frontend/src/pages/ +git commit -m "refactor: standardize data fetching on createResource" +``` + +--- + +### Task 7: Use existing Button component + +**Files:** +- Modify: Multiple frontend pages + +- [ ] **Step 1: Read the Button component** + +Read `frontend/src/components/ui/Button.tsx` to understand its API (props, variants, sizes). + +- [ ] **Step 2: Find and replace obvious inline buttons** + +Search for inline buttons with duplicated Tailwind classes across pages. Replace clear-cut cases where the inline button matches the `Button` component's API. Focus on primary action buttons (save, submit, generate) — don't force icon buttons or toggle buttons into the component. + +- [ ] **Step 3: TypeScript check** + +Run: `cd frontend && npx tsc --noEmit` +Expected: No errors + +- [ ] **Step 4: Commit** + +```bash +git add frontend/src/pages/ frontend/src/components/ +git commit -m "refactor: use existing Button component to reduce inline Tailwind duplication" +``` + +--- + +### Task 8: Batch article history INSERTs + +**Files:** +- Modify: `backend/src/db/article_history.rs` +- Modify: `backend/src/services/synthesis.rs` + +- [ ] **Step 1: Add `batch_insert_entries` in `article_history.rs`** + +Add a batch insert function using PostgreSQL `unnest()`: + +```rust +/// Insert multiple article history entries in a single query. +pub async fn batch_insert_entries(pool: &PgPool, entries: &[ArticleHistoryEntry]) -> Result<(), AppError> { + if entries.is_empty() { + return Ok(()); + } + + let user_ids: Vec = entries.iter().map(|e| e.user_id).collect(); + let urls: Vec<&str> = entries.iter().map(|e| e.url.as_str()).collect(); + let url_hashes: Vec<&str> = entries.iter().map(|e| e.url_hash.as_str()).collect(); + let titles: Vec<&str> = entries.iter().map(|e| e.title.as_str()).collect(); + let source_types: Vec<&str> = entries.iter().map(|e| e.source_type.as_str()).collect(); + let source_urls: Vec> = entries.iter().map(|e| e.source_url.as_deref()).collect(); + let categories: Vec> = entries.iter().map(|e| e.category.as_deref()).collect(); + let synthesis_ids: Vec> = entries.iter().map(|e| e.synthesis_id).collect(); + let statuses: Vec<&str> = entries.iter().map(|e| e.status.as_str()).collect(); + let scraped_oks: Vec = entries.iter().map(|e| e.scraped_ok).collect(); + let job_ids: Vec = entries.iter().map(|e| e.job_id).collect(); + + sqlx::query( + r#" + INSERT INTO article_history (user_id, url, url_hash, title, source_type, source_url, category, synthesis_id, status, scraped_ok, job_id) + SELECT * FROM unnest($1::uuid[], $2::text[], $3::text[], $4::text[], $5::text[], $6::text[], $7::text[], $8::uuid[], $9::text[], $10::bool[], $11::uuid[]) + "#, + ) + .bind(&user_ids) + .bind(&urls) + .bind(&url_hashes) + .bind(&titles) + .bind(&source_types) + .bind(&source_urls) + .bind(&categories) + .bind(&synthesis_ids) + .bind(&statuses) + .bind(&scraped_oks) + .bind(&job_ids) + .execute(pool) + .await?; + + Ok(()) +} +``` + +- [ ] **Step 2: Update `synthesis.rs` to batch traces** + +In `backend/src/services/synthesis.rs`, update `trace_article` to collect entries instead of inserting immediately. The approach: + +1. Change `trace_article` to build and return an `ArticleHistoryEntry` instead of inserting: + +```rust +fn build_trace_entry( + user_id: Uuid, + job_id: Uuid, + trace: &ArticleTrace<'_>, +) -> db::article_history::ArticleHistoryEntry { + db::article_history::ArticleHistoryEntry { + user_id, + url: trace.url.to_string(), + url_hash: hash_article_url(trace.url), + title: trace.title.to_string(), + source_type: trace.source_type.to_string(), + source_url: trace.source_url.map(|s| s.to_string()), + category: trace.category.map(|s| s.to_string()), + synthesis_id: trace.synthesis_id, + status: trace.status.to_string(), + scraped_ok: trace.scraped_ok, + job_id, + } +} +``` + +2. In `run_generation_inner`, add a `Vec` to collect traces +3. Replace `trace_article(...).await` calls with `pending_traces.push(build_trace_entry(...))` +4. Flush at key points (end of Phase 1, end of Phase 2, final save): +```rust +if !pending_traces.is_empty() { + db::article_history::batch_insert_entries(&state.pool, &pending_traces).await.ok(); + pending_traces.clear(); +} +``` + +**Important:** This is a large change touching many call sites. Read the file carefully. The `trace_article` calls inside spawned tasks (JoinSet) cannot easily collect into a shared Vec — for those, keep the individual inserts or collect results after the JoinSet completes. + +- [ ] **Step 3: Build and test** + +Run: `cd backend && cargo build && cargo test --lib` +Expected: All pass + +- [ ] **Step 4: Commit** + +```bash +git add backend/src/db/article_history.rs backend/src/services/synthesis.rs +git commit -m "perf: batch article history INSERTs to reduce DB round-trips" +```