From 54d54f2a06dbd0fafb6dc95375115a4da2c2647f Mon Sep 17 00:00:00 2001 From: oabrivard Date: Sun, 22 Mar 2026 22:04:26 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20architect=20assessment=20remediation=20?= =?UTF-8?q?=E2=80=94=206=20issues=20across=20backend,=20frontend,=20and=20?= =?UTF-8?q?infra?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Wire hardened scraper client into runtime (SSRF redirect validation was defined but unused) - Stream scraper body with per-chunk size limit instead of post-download check (DoS/OOM) - Persist user rate-limit overrides across generation jobs via AppState DashMap - Roll back magic-link token on email send failure to prevent quota exhaustion - Fix API error UX: prefer human message over machine error code in frontend - Unwrap GET /syntheses { items } wrapper in frontend API layer (contract mismatch) - Bind Postgres to localhost in docker-compose (was exposed on all interfaces) - Fix CLAUDE.md: runtime queries not compile-time, 10 migrations not 9 Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 4 ++-- backend/src/app_state.rs | 7 ++++++ backend/src/db/magic_links.rs | 12 ++++++++++ backend/src/handlers/auth.rs | 24 +++++++++++++++----- backend/src/main.rs | 3 ++- backend/src/services/scraper.rs | 32 +++++++++++++++++--------- backend/src/services/synthesis.rs | 37 ++++++++++++++++++++++++------- docker-compose.yml | 2 +- frontend/src/api/client.ts | 2 +- frontend/src/api/syntheses.ts | 6 +++-- 10 files changed, 99 insertions(+), 30 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index e650746..7a0263c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ AI Weekly Synth is a self-hosted web application that generates AI-powered weekl ## Architecture - **Backend**: Rust (Axum) — `backend/` - **Frontend**: SolidJS + Tailwind CSS v4 — `frontend/` -- **Database**: PostgreSQL (via sqlx with compile-time checked queries) +- **Database**: PostgreSQL (via sqlx with runtime-checked queries) - **Deployment**: Docker only (`docker-compose.yml`) ## Project Structure @@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit - `GET /api/v1/admin/users` — user list - `PUT /api/v1/admin/users/:id/role` — role management -## Database (9 migrations) +## Database (10 migrations) Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log` ## Environment Variables diff --git a/backend/src/app_state.rs b/backend/src/app_state.rs index d54a913..493f178 100644 --- a/backend/src/app_state.rs +++ b/backend/src/app_state.rs @@ -1,6 +1,8 @@ //! Shared application state passed to all handlers via Axum's `State` extractor. +use dashmap::DashMap; use sqlx::PgPool; +use uuid::Uuid; use crate::config::AppConfig; use crate::services::rate_limiter::{ProviderRateLimiter, RateLimiter}; @@ -19,6 +21,10 @@ pub struct AppState { /// Per-provider rate limiter for LLM API calls. /// Loaded from DB at startup, hot-reloaded when admin updates config. pub provider_rate_limiter: ProviderRateLimiter, + /// Per-user rate limiters for generation jobs. + /// Keyed by user_id. Created on first generation, replaced if settings change. + /// Tuple: (max_requests, window_seconds, limiter). + pub user_rate_limiters: DashMap, /// In-memory store for active generation jobs. /// Maps job_id -> progress watch channel. pub job_store: JobStore, @@ -42,6 +48,7 @@ impl AppState { http_client, auth_rate_limiter, provider_rate_limiter, + user_rate_limiters: DashMap::new(), job_store, } } diff --git a/backend/src/db/magic_links.rs b/backend/src/db/magic_links.rs index 1402b13..299e8d7 100644 --- a/backend/src/db/magic_links.rs +++ b/backend/src/db/magic_links.rs @@ -86,6 +86,18 @@ pub async fn delete_expired(pool: &PgPool) -> Result { Ok(result.rows_affected()) } +/// Delete a magic link token by its hash. +/// +/// Used to roll back a token when email delivery fails, so it doesn't +/// consume a quota slot. +pub async fn delete_by_hash(pool: &PgPool, token_hash: &str) -> Result<(), AppError> { + sqlx::query("DELETE FROM magic_tokens WHERE token_hash = $1") + .bind(token_hash) + .execute(pool) + .await?; + Ok(()) +} + /// Count unused, non-expired tokens for a given email. /// /// Used to prevent spamming magic link requests. diff --git a/backend/src/handlers/auth.rs b/backend/src/handlers/auth.rs index fa3f78b..c94167e 100644 --- a/backend/src/handlers/auth.rs +++ b/backend/src/handlers/auth.rs @@ -96,9 +96,9 @@ pub async fn register( tracing::info!(email = %email_lower, "New user registered"); } - // Generate and send magic link + // Generate and send magic link — roll back token if email send fails if let Some(raw_token) = auth::create_magic_link(&state.pool, &email_lower).await? { - email::send_magic_link( + if let Err(e) = email::send_magic_link( &state.http_client, &state.config.resend_api_key, &state.config.email_from, @@ -106,7 +106,14 @@ pub async fn register( &state.config.app_url, &raw_token, ) - .await?; + .await + { + let token_hash = crate::util::token::hash_token(&raw_token); + db::magic_links::delete_by_hash(&state.pool, &token_hash) + .await + .ok(); + return Err(e); + } } Ok(( @@ -151,7 +158,7 @@ pub async fn login( if existing.is_some() { if let Some(raw_token) = auth::create_magic_link(&state.pool, &email_lower).await? { - email::send_magic_link( + if let Err(e) = email::send_magic_link( &state.http_client, &state.config.resend_api_key, &state.config.email_from, @@ -159,7 +166,14 @@ pub async fn login( &state.config.app_url, &raw_token, ) - .await?; + .await + { + let token_hash = crate::util::token::hash_token(&raw_token); + db::magic_links::delete_by_hash(&state.pool, &token_hash) + .await + .ok(); + return Err(e); + } } } else { // Add a small random delay to prevent timing attacks diff --git a/backend/src/main.rs b/backend/src/main.rs index 7691728..77b26f9 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -15,6 +15,7 @@ use ai_synth_backend::config::AppConfig; use ai_synth_backend::db; use ai_synth_backend::models::user::UserRole; use ai_synth_backend::router; +use ai_synth_backend::services::scraper; use crate::cli::{Cli, Commands}; @@ -50,7 +51,7 @@ async fn main() -> anyhow::Result<()> { match cli.command.unwrap_or(Commands::Serve) { Commands::Serve => { - let http_client = reqwest::Client::new(); + let http_client = scraper::build_scraper_client()?; let state = app_state::AppState::new(config.clone(), pool, http_client); // Load provider rate limits from DB into in-memory limiter diff --git a/backend/src/services/scraper.rs b/backend/src/services/scraper.rs index 41716de..814408f 100644 --- a/backend/src/services/scraper.rs +++ b/backend/src/services/scraper.rs @@ -122,7 +122,7 @@ pub async fn scrape_url( check_ssrf(&parsed_url).await?; // Fetch the page - let response = http_client + let mut response = http_client .get(url) .send() .await @@ -145,16 +145,28 @@ pub async fn scrape_url( // Capture final URL BEFORE consuming the response body (follows redirects) let final_url = response.url().clone(); - // Read body with size limit - let bytes = response - .bytes() - .await - .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to read response body: {}", e)))?; + // Read body with streaming size limit — bail early to avoid OOM on huge responses. + // Check Content-Length first (fast path), then enforce while streaming chunks. + if let Some(content_length) = response.content_length() { + if content_length as usize > MAX_BODY_SIZE { + return Err(AppError::BadRequest( + "Response body exceeds 5 MB limit".into(), + )); + } + } - if bytes.len() > MAX_BODY_SIZE { - return Err(AppError::BadRequest( - "Response body exceeds 5 MB limit".into(), - )); + let mut bytes = Vec::new(); + while let Some(chunk) = response + .chunk() + .await + .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to read response body: {}", e)))? + { + if bytes.len() + chunk.len() > MAX_BODY_SIZE { + return Err(AppError::BadRequest( + "Response body exceeds 5 MB limit".into(), + )); + } + bytes.extend_from_slice(&chunk); } let html_text = String::from_utf8_lossy(&bytes); diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index e1337f4..6a809d1 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -289,9 +289,8 @@ async fn run_generation_inner( model_research.clone() }; - // Build per-user rate limiter if the user has overrides configured. - // Created once and reused across both passes so the limit is actually enforced. - let user_rate_limiter = build_user_rate_limiter(&settings); + // Look up or create per-user rate limiter from AppState so limits persist across jobs. + let user_rate_limiter = get_user_rate_limiter(state, &settings, user_id); // Step 5: Rate limit check (pass 1) check_rate_limit(state, &user_rate_limiter, &provider_name)?; @@ -379,24 +378,46 @@ fn emit_progress(tx: &watch::Sender, step: &str, message: &str, p .ok(); } -/// Build a per-user rate limiter from settings, if both override fields are configured. +/// Look up or create a per-user rate limiter stored in AppState. /// /// Returns `None` if the user has no rate limit overrides, in which case the /// global provider rate limiter should be used instead. -fn build_user_rate_limiter( +/// +/// The limiter is stored in `state.user_rate_limiters` keyed by `user_id` so that +/// rate limit history persists across generation jobs. If the user's settings have +/// changed since the limiter was created, a fresh limiter replaces the old one. +fn get_user_rate_limiter( + state: &AppState, settings: &UserSettings, + user_id: Uuid, ) -> Option { match ( settings.rate_limit_max_requests, settings.rate_limit_time_window_seconds, ) { (Some(max_req), Some(window_sec)) => { - Some(crate::services::rate_limiter::RateLimiter::new( + // Reuse existing limiter if settings haven't changed + if let Some(entry) = state.user_rate_limiters.get(&user_id) { + let (stored_max, stored_window, ref limiter) = *entry; + if stored_max == max_req && stored_window == window_sec { + return Some(limiter.clone()); + } + } + // Settings changed or first generation — create and store + let limiter = crate::services::rate_limiter::RateLimiter::new( max_req as usize, Duration::from_secs(window_sec as u64), - )) + ); + state + .user_rate_limiters + .insert(user_id, (max_req, window_sec, limiter.clone())); + Some(limiter) + } + _ => { + // User cleared their overrides — remove stale limiter + state.user_rate_limiters.remove(&user_id); + None } - _ => None, } } diff --git a/docker-compose.yml b/docker-compose.yml index 0030831..4f7383f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -35,7 +35,7 @@ services: networks: - internal ports: - - "5432:5432" + - "127.0.0.1:5432:5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U ai_synth -d ai_synth"] interval: 10s diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 24bc90a..fad5ce9 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -65,7 +65,7 @@ class ApiClient { .catch(() => ({ error: 'Unknown error' })); const apiError: ApiError = { status: response.status, - message: errorBody.error || errorBody.message || `HTTP ${response.status}`, + message: errorBody.message || errorBody.error || `HTTP ${response.status}`, field_errors: errorBody.field_errors, }; throw apiError; diff --git a/frontend/src/api/syntheses.ts b/frontend/src/api/syntheses.ts index 048f35c..de2021a 100644 --- a/frontend/src/api/syntheses.ts +++ b/frontend/src/api/syntheses.ts @@ -60,8 +60,10 @@ export async function fetchFile(path: string): Promise { /** Synthesis API endpoints (CRUD, generation, export, email). */ export const synthesesApi = { /** GET /syntheses -- paginated list of the user's syntheses. */ - list: (limit = 50, offset = 0): Promise => - api.get(`/syntheses?limit=${limit}&offset=${offset}`), + list: async (limit = 50, offset = 0): Promise => { + const response = await api.get<{ items: SynthesisListItem[] }>(`/syntheses?limit=${limit}&offset=${offset}`); + return response.items; + }, /** GET /syntheses/:id -- fetch a single synthesis with full content. */ get: (id: string): Promise =>