From 54d54f2a06dbd0fafb6dc95375115a4da2c2647f Mon Sep 17 00:00:00 2001
From: oabrivard <olivier@abrivard.fr>
Date: Sun, 22 Mar 2026 22:04:26 +0100
Subject: [PATCH] =?UTF-8?q?fix:=20architect=20assessment=20remediation=20?=
 =?UTF-8?q?=E2=80=94=206=20issues=20across=20backend,=20frontend,=20and=20?=
 =?UTF-8?q?infra?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Wire hardened scraper client into runtime (SSRF redirect validation was defined but unused)
- Stream scraper body with per-chunk size limit instead of post-download check (DoS/OOM)
- Persist user rate-limit overrides across generation jobs via AppState DashMap
- Roll back magic-link token on email send failure to prevent quota exhaustion
- Fix API error UX: prefer human message over machine error code in frontend
- Unwrap GET /syntheses { items } wrapper in frontend API layer (contract mismatch)
- Bind Postgres to localhost in docker-compose (was exposed on all interfaces)
- Fix CLAUDE.md: runtime queries not compile-time, 10 migrations not 9

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                         |  4 ++--
 backend/src/app_state.rs          |  7 ++++++
 backend/src/db/magic_links.rs     | 12 ++++++++++
 backend/src/handlers/auth.rs      | 24 +++++++++++++++-----
 backend/src/main.rs               |  3 ++-
 backend/src/services/scraper.rs   | 32 +++++++++++++++++---------
 backend/src/services/synthesis.rs | 37 ++++++++++++++++++++++++-------
 docker-compose.yml                |  2 +-
 frontend/src/api/client.ts        |  2 +-
 frontend/src/api/syntheses.ts     |  6 +++--
 10 files changed, 99 insertions(+), 30 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index e650746..7a0263c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,7 +6,7 @@ AI Weekly Synth is a self-hosted web application that generates AI-powered weekl
 ## Architecture
 - **Backend**: Rust (Axum) — `backend/`
 - **Frontend**: SolidJS + Tailwind CSS v4 — `frontend/`
-- **Database**: PostgreSQL (via sqlx with compile-time checked queries)
+- **Database**: PostgreSQL (via sqlx with runtime-checked queries)
 - **Deployment**: Docker only (`docker-compose.yml`)
 
 ## Project Structure
@@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit
 - `GET /api/v1/admin/users` — user list
 - `PUT /api/v1/admin/users/:id/role` — role management
 
-## Database (9 migrations)
+## Database (10 migrations)
 Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log`
 
 ## Environment Variables
diff --git a/backend/src/app_state.rs b/backend/src/app_state.rs
index d54a913..493f178 100644
--- a/backend/src/app_state.rs
+++ b/backend/src/app_state.rs
@@ -1,6 +1,8 @@
 //! Shared application state passed to all handlers via Axum's `State` extractor.
 
+use dashmap::DashMap;
 use sqlx::PgPool;
+use uuid::Uuid;
 
 use crate::config::AppConfig;
 use crate::services::rate_limiter::{ProviderRateLimiter, RateLimiter};
@@ -19,6 +21,10 @@ pub struct AppState {
     /// Per-provider rate limiter for LLM API calls.
     /// Loaded from DB at startup, hot-reloaded when admin updates config.
     pub provider_rate_limiter: ProviderRateLimiter,
+    /// Per-user rate limiters for generation jobs.
+    /// Keyed by user_id. Created on first generation, replaced if settings change.
+    /// Tuple: (max_requests, window_seconds, limiter).
+    pub user_rate_limiters: DashMap<Uuid, (i32, i32, RateLimiter)>,
     /// In-memory store for active generation jobs.
     /// Maps job_id -> progress watch channel.
     pub job_store: JobStore,
@@ -42,6 +48,7 @@ impl AppState {
             http_client,
             auth_rate_limiter,
             provider_rate_limiter,
+            user_rate_limiters: DashMap::new(),
             job_store,
         }
     }
diff --git a/backend/src/db/magic_links.rs b/backend/src/db/magic_links.rs
index 1402b13..299e8d7 100644
--- a/backend/src/db/magic_links.rs
+++ b/backend/src/db/magic_links.rs
@@ -86,6 +86,18 @@ pub async fn delete_expired(pool: &PgPool) -> Result<u64, AppError> {
     Ok(result.rows_affected())
 }
 
+/// Delete a magic link token by its hash.
+///
+/// Used to roll back a token when email delivery fails, so it doesn't
+/// consume a quota slot.
+pub async fn delete_by_hash(pool: &PgPool, token_hash: &str) -> Result<(), AppError> {
+    sqlx::query("DELETE FROM magic_tokens WHERE token_hash = $1")
+        .bind(token_hash)
+        .execute(pool)
+        .await?;
+    Ok(())
+}
+
 /// Count unused, non-expired tokens for a given email.
 ///
 /// Used to prevent spamming magic link requests.
diff --git a/backend/src/handlers/auth.rs b/backend/src/handlers/auth.rs
index fa3f78b..c94167e 100644
--- a/backend/src/handlers/auth.rs
+++ b/backend/src/handlers/auth.rs
@@ -96,9 +96,9 @@ pub async fn register(
         tracing::info!(email = %email_lower, "New user registered");
     }
 
-    // Generate and send magic link
+    // Generate and send magic link — roll back token if email send fails
     if let Some(raw_token) = auth::create_magic_link(&state.pool, &email_lower).await? {
-        email::send_magic_link(
+        if let Err(e) = email::send_magic_link(
             &state.http_client,
             &state.config.resend_api_key,
             &state.config.email_from,
@@ -106,7 +106,14 @@ pub async fn register(
             &state.config.app_url,
             &raw_token,
         )
-        .await?;
+        .await
+        {
+            let token_hash = crate::util::token::hash_token(&raw_token);
+            db::magic_links::delete_by_hash(&state.pool, &token_hash)
+                .await
+                .ok();
+            return Err(e);
+        }
     }
 
     Ok((
@@ -151,7 +158,7 @@ pub async fn login(
 
     if existing.is_some() {
         if let Some(raw_token) = auth::create_magic_link(&state.pool, &email_lower).await? {
-            email::send_magic_link(
+            if let Err(e) = email::send_magic_link(
                 &state.http_client,
                 &state.config.resend_api_key,
                 &state.config.email_from,
@@ -159,7 +166,14 @@ pub async fn login(
                 &state.config.app_url,
                 &raw_token,
             )
-            .await?;
+            .await
+            {
+                let token_hash = crate::util::token::hash_token(&raw_token);
+                db::magic_links::delete_by_hash(&state.pool, &token_hash)
+                    .await
+                    .ok();
+                return Err(e);
+            }
         }
     } else {
         // Add a small random delay to prevent timing attacks
diff --git a/backend/src/main.rs b/backend/src/main.rs
index 7691728..77b26f9 100644
--- a/backend/src/main.rs
+++ b/backend/src/main.rs
@@ -15,6 +15,7 @@ use ai_synth_backend::config::AppConfig;
 use ai_synth_backend::db;
 use ai_synth_backend::models::user::UserRole;
 use ai_synth_backend::router;
+use ai_synth_backend::services::scraper;
 
 use crate::cli::{Cli, Commands};
 
@@ -50,7 +51,7 @@ async fn main() -> anyhow::Result<()> {
 
     match cli.command.unwrap_or(Commands::Serve) {
         Commands::Serve => {
-            let http_client = reqwest::Client::new();
+            let http_client = scraper::build_scraper_client()?;
             let state = app_state::AppState::new(config.clone(), pool, http_client);
 
             // Load provider rate limits from DB into in-memory limiter
diff --git a/backend/src/services/scraper.rs b/backend/src/services/scraper.rs
index 41716de..814408f 100644
--- a/backend/src/services/scraper.rs
+++ b/backend/src/services/scraper.rs
@@ -122,7 +122,7 @@ pub async fn scrape_url(
     check_ssrf(&parsed_url).await?;
 
     // Fetch the page
-    let response = http_client
+    let mut response = http_client
         .get(url)
         .send()
         .await
@@ -145,16 +145,28 @@ pub async fn scrape_url(
     // Capture final URL BEFORE consuming the response body (follows redirects)
     let final_url = response.url().clone();
 
-    // Read body with size limit
-    let bytes = response
-        .bytes()
-        .await
-        .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to read response body: {}", e)))?;
+    // Read body with streaming size limit — bail early to avoid OOM on huge responses.
+    // Check Content-Length first (fast path), then enforce while streaming chunks.
+    if let Some(content_length) = response.content_length() {
+        if content_length as usize > MAX_BODY_SIZE {
+            return Err(AppError::BadRequest(
+                "Response body exceeds 5 MB limit".into(),
+            ));
+        }
+    }
 
-    if bytes.len() > MAX_BODY_SIZE {
-        return Err(AppError::BadRequest(
-            "Response body exceeds 5 MB limit".into(),
-        ));
+    let mut bytes = Vec::new();
+    while let Some(chunk) = response
+        .chunk()
+        .await
+        .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to read response body: {}", e)))?
+    {
+        if bytes.len() + chunk.len() > MAX_BODY_SIZE {
+            return Err(AppError::BadRequest(
+                "Response body exceeds 5 MB limit".into(),
+            ));
+        }
+        bytes.extend_from_slice(&chunk);
     }
 
     let html_text = String::from_utf8_lossy(&bytes);
diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs
index e1337f4..6a809d1 100644
--- a/backend/src/services/synthesis.rs
+++ b/backend/src/services/synthesis.rs
@@ -289,9 +289,8 @@ async fn run_generation_inner(
         model_research.clone()
     };
 
-    // Build per-user rate limiter if the user has overrides configured.
-    // Created once and reused across both passes so the limit is actually enforced.
-    let user_rate_limiter = build_user_rate_limiter(&settings);
+    // Look up or create per-user rate limiter from AppState so limits persist across jobs.
+    let user_rate_limiter = get_user_rate_limiter(state, &settings, user_id);
 
     // Step 5: Rate limit check (pass 1)
     check_rate_limit(state, &user_rate_limiter, &provider_name)?;
@@ -379,24 +378,46 @@ fn emit_progress(tx: &watch::Sender<ProgressEvent>, step: &str, message: &str, p
     .ok();
 }
 
-/// Build a per-user rate limiter from settings, if both override fields are configured.
+/// Look up or create a per-user rate limiter stored in AppState.
 ///
 /// Returns `None` if the user has no rate limit overrides, in which case the
 /// global provider rate limiter should be used instead.
-fn build_user_rate_limiter(
+///
+/// The limiter is stored in `state.user_rate_limiters` keyed by `user_id` so that
+/// rate limit history persists across generation jobs. If the user's settings have
+/// changed since the limiter was created, a fresh limiter replaces the old one.
+fn get_user_rate_limiter(
+    state: &AppState,
     settings: &UserSettings,
+    user_id: Uuid,
 ) -> Option<crate::services::rate_limiter::RateLimiter> {
     match (
         settings.rate_limit_max_requests,
         settings.rate_limit_time_window_seconds,
     ) {
         (Some(max_req), Some(window_sec)) => {
-            Some(crate::services::rate_limiter::RateLimiter::new(
+            // Reuse existing limiter if settings haven't changed
+            if let Some(entry) = state.user_rate_limiters.get(&user_id) {
+                let (stored_max, stored_window, ref limiter) = *entry;
+                if stored_max == max_req && stored_window == window_sec {
+                    return Some(limiter.clone());
+                }
+            }
+            // Settings changed or first generation — create and store
+            let limiter = crate::services::rate_limiter::RateLimiter::new(
                 max_req as usize,
                 Duration::from_secs(window_sec as u64),
-            ))
+            );
+            state
+                .user_rate_limiters
+                .insert(user_id, (max_req, window_sec, limiter.clone()));
+            Some(limiter)
+        }
+        _ => {
+            // User cleared their overrides — remove stale limiter
+            state.user_rate_limiters.remove(&user_id);
+            None
         }
-        _ => None,
     }
 }
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 0030831..4f7383f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -35,7 +35,7 @@ services:
     networks:
       - internal
     ports:
-      - "5432:5432"
+      - "127.0.0.1:5432:5432"
     healthcheck:
       test: ["CMD-SHELL", "pg_isready -U ai_synth -d ai_synth"]
       interval: 10s
diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts
index 24bc90a..fad5ce9 100644
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -65,7 +65,7 @@ class ApiClient {
         .catch(() => ({ error: 'Unknown error' }));
       const apiError: ApiError = {
         status: response.status,
-        message: errorBody.error || errorBody.message || `HTTP ${response.status}`,
+        message: errorBody.message || errorBody.error || `HTTP ${response.status}`,
         field_errors: errorBody.field_errors,
       };
       throw apiError;
diff --git a/frontend/src/api/syntheses.ts b/frontend/src/api/syntheses.ts
index 048f35c..de2021a 100644
--- a/frontend/src/api/syntheses.ts
+++ b/frontend/src/api/syntheses.ts
@@ -60,8 +60,10 @@ export async function fetchFile(path: string): Promise<Response> {
 /** Synthesis API endpoints (CRUD, generation, export, email). */
 export const synthesesApi = {
   /** GET /syntheses -- paginated list of the user's syntheses. */
-  list: (limit = 50, offset = 0): Promise<SynthesisListItem[]> =>
-    api.get<SynthesisListItem[]>(`/syntheses?limit=${limit}&offset=${offset}`),
+  list: async (limit = 50, offset = 0): Promise<SynthesisListItem[]> => {
+    const response = await api.get<{ items: SynthesisListItem[] }>(`/syntheses?limit=${limit}&offset=${offset}`);
+    return response.items;
+  },
 
   /** GET /syntheses/:id -- fetch a single synthesis with full content. */
   get: (id: string): Promise<Synthesis> =>