From 4c6381b09a0751073845a9d4696cd5061f40db5f Mon Sep 17 00:00:00 2001 From: oabrivard Date: Wed, 25 Mar 2026 10:02:00 +0100 Subject: [PATCH] feat: add batch_size setting for Phase 1 parallelism Add a user-configurable batch_size setting (default 5, range 1-20) that controls how many articles are processed in parallel during Phase 1 scrape+classify. Previously hardcoded to 5. Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 2 +- .../20260325000020_add_batch_size_setting.sql | 2 ++ backend/src/db/settings.rs | 17 +++++++---- backend/src/models/settings.rs | 29 +++++++++++++++++++ backend/src/services/prompts.rs | 1 + backend/src/services/synthesis.rs | 2 +- backend/tests/api_syntheses_test.rs | 3 +- e2e/tests/generation-live.spec.ts | 1 + frontend/src/i18n/fr.ts | 2 ++ frontend/src/pages/Settings.tsx | 27 +++++++++++++++++ frontend/src/types.ts | 2 ++ 11 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 backend/migrations/20260325000020_add_batch_size_setting.sql diff --git a/CLAUDE.md b/CLAUDE.md index 1bfd1eb..8565db9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit - `GET /api/v1/admin/users` — user list - `PUT /api/v1/admin/users/:id/role` — role management -## Database (19 migrations) +## Database (20 migrations) Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log` ## Environment Variables diff --git a/backend/migrations/20260325000020_add_batch_size_setting.sql b/backend/migrations/20260325000020_add_batch_size_setting.sql new file mode 100644 index 0000000..88979a8 --- /dev/null +++ b/backend/migrations/20260325000020_add_batch_size_setting.sql @@ -0,0 +1,2 @@ +-- Add batch_size column to settings (parallelism for Phase 1 scrape+classify) +ALTER TABLE settings ADD COLUMN batch_size INTEGER NOT NULL DEFAULT 5; diff --git a/backend/src/db/settings.rs b/backend/src/db/settings.rs index 2e2e688..c049090 100644 --- a/backend/src/db/settings.rs +++ b/backend/src/db/settings.rs @@ -20,6 +20,7 @@ struct SettingsRow { max_articles_per_source: i32, use_llm_for_source_links: bool, article_history_days: i32, + batch_size: i32, search_agent_behavior: String, ai_provider: String, ai_model: String, @@ -46,6 +47,7 @@ impl TryFrom for UserSettings { max_articles_per_source: row.max_articles_per_source, use_llm_for_source_links: row.use_llm_for_source_links, article_history_days: row.article_history_days, + batch_size: row.batch_size, search_agent_behavior: row.search_agent_behavior, ai_provider: row.ai_provider, ai_model: row.ai_model, @@ -72,10 +74,10 @@ pub async fn get_or_create_default( let row = sqlx::query_as::<_, SettingsRow>( r#" - INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) + INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, batch_size) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) ON CONFLICT (user_id) DO UPDATE SET user_id = settings.user_id - RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at + RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, batch_size, updated_at "#, ) .bind(user_id) @@ -92,6 +94,7 @@ pub async fn get_or_create_default( .bind(defaults.max_articles_per_source) .bind(defaults.use_llm_for_source_links) .bind(defaults.article_history_days) + .bind(defaults.batch_size) .fetch_one(pool) .await?; @@ -110,8 +113,8 @@ pub async fn upsert( let row = sqlx::query_as::<_, SettingsRow>( r#" - INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) + INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, batch_size) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) ON CONFLICT (user_id) DO UPDATE SET theme = EXCLUDED.theme, max_age_days = EXCLUDED.max_age_days, @@ -126,8 +129,9 @@ pub async fn upsert( max_articles_per_source = EXCLUDED.max_articles_per_source, use_llm_for_source_links = EXCLUDED.use_llm_for_source_links, article_history_days = EXCLUDED.article_history_days, + batch_size = EXCLUDED.batch_size, updated_at = now() - RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at + RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, batch_size, updated_at "#, ) .bind(user_id) @@ -144,6 +148,7 @@ pub async fn upsert( .bind(req.max_articles_per_source) .bind(req.use_llm_for_source_links) .bind(req.article_history_days) + .bind(req.batch_size) .fetch_one(pool) .await?; diff --git a/backend/src/models/settings.rs b/backend/src/models/settings.rs index aead498..2ffb585 100644 --- a/backend/src/models/settings.rs +++ b/backend/src/models/settings.rs @@ -15,6 +15,7 @@ pub struct UserSettings { pub max_articles_per_source: i32, pub use_llm_for_source_links: bool, pub article_history_days: i32, + pub batch_size: i32, pub search_agent_behavior: String, pub ai_provider: String, pub ai_model: String, @@ -34,6 +35,7 @@ pub struct SettingsResponse { pub max_articles_per_source: i32, pub use_llm_for_source_links: bool, pub article_history_days: i32, + pub batch_size: i32, pub search_agent_behavior: String, pub ai_provider: String, pub ai_model: String, @@ -52,6 +54,7 @@ impl From for SettingsResponse { max_articles_per_source: s.max_articles_per_source, use_llm_for_source_links: s.use_llm_for_source_links, article_history_days: s.article_history_days, + batch_size: s.batch_size, search_agent_behavior: s.search_agent_behavior, ai_provider: s.ai_provider, ai_model: s.ai_model, @@ -72,6 +75,7 @@ pub struct UpdateSettingsRequest { pub max_articles_per_source: i32, pub use_llm_for_source_links: bool, pub article_history_days: i32, + pub batch_size: i32, pub search_agent_behavior: String, pub ai_provider: String, pub ai_model: String, @@ -121,6 +125,9 @@ impl UpdateSettingsRequest { if !(0..=365).contains(&self.article_history_days) { return Err("article_history_days must be between 0 and 365".into()); } + if !(1..=20).contains(&self.batch_size) { + return Err("batch_size must be between 1 and 20".into()); + } if self.search_agent_behavior.len() > 2000 { return Err("search_agent_behavior must be at most 2000 characters".into()); } @@ -165,6 +172,7 @@ impl Default for UserSettings { max_articles_per_source: 3, use_llm_for_source_links: false, article_history_days: 90, + batch_size: 5, search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), @@ -190,6 +198,7 @@ mod tests { max_articles_per_source: 3, use_llm_for_source_links: false, article_history_days: 90, + batch_size: 5, search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), @@ -385,6 +394,26 @@ mod tests { assert!(err.contains("ai_model")); } + #[test] + fn test_validate_batch_size_below_range() { + let req = UpdateSettingsRequest { + batch_size: 0, + ..valid_request() + }; + let err = req.validate().unwrap_err(); + assert!(err.contains("batch_size")); + } + + #[test] + fn test_validate_batch_size_above_range() { + let req = UpdateSettingsRequest { + batch_size: 21, + ..valid_request() + }; + let err = req.validate().unwrap_err(); + assert!(err.contains("batch_size")); + } + #[test] fn test_validate_ai_model_websearch_too_long_rejected() { let req = UpdateSettingsRequest { diff --git a/backend/src/services/prompts.rs b/backend/src/services/prompts.rs index ce40503..84577ef 100644 --- a/backend/src/services/prompts.rs +++ b/backend/src/services/prompts.rs @@ -199,6 +199,7 @@ mod tests { max_articles_per_source: 3, use_llm_for_source_links: false, article_history_days: 90, + batch_size: 5, search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index 6f3422c..c9a97e7 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -398,7 +398,7 @@ async fn run_generation_inner( // 1b. Scrape, classify, summarize in batches of 5 emit_progress(tx, "processing", "Traitement des articles...", 25); let total_candidates = candidate_urls.len(); - let batch_size = 5; + let batch_size = settings.batch_size.max(1) as usize; let mut processed = 0usize; let mut candidates_iter = candidate_urls.into_iter(); let mut done = false; diff --git a/backend/tests/api_syntheses_test.rs b/backend/tests/api_syntheses_test.rs index 9d838f9..dff8ed8 100644 --- a/backend/tests/api_syntheses_test.rs +++ b/backend/tests/api_syntheses_test.rs @@ -633,7 +633,8 @@ async fn generate_pipeline_resolves_model_from_admin_config() { "ai_model_websearch": "", "use_llm_for_source_links": false, "use_llm_for_article_extraction": false, - "article_history_days": 90 + "article_history_days": 90, + "batch_size": 5 }); let (settings_status, _) = app .put_with_session("/api/v1/settings", &settings, &session) diff --git a/e2e/tests/generation-live.spec.ts b/e2e/tests/generation-live.spec.ts index 6b3094a..3ff9109 100644 --- a/e2e/tests/generation-live.spec.ts +++ b/e2e/tests/generation-live.spec.ts @@ -143,6 +143,7 @@ test.describe('Live generation with OpenAI', () => { ai_model_websearch: 'gpt-4o-mini', use_llm_for_source_links: false, article_history_days: 90, + batch_size: 5, }); expect(settingsResp.status).toBe(200); diff --git a/frontend/src/i18n/fr.ts b/frontend/src/i18n/fr.ts index 770e4f4..41c2b7c 100644 --- a/frontend/src/i18n/fr.ts +++ b/frontend/src/i18n/fr.ts @@ -153,6 +153,8 @@ const fr = { 'settings.advancedExtraction': 'Extraction avancee', 'settings.useLlmForSourceLinks': "Utiliser l'IA pour extraire les liens", 'settings.articleHistoryDays': 'Historique des articles (jours)', + 'settings.batchSize': 'Taille du lot de traitement', + 'settings.batchSizeHelp': 'Nombre d\'articles traites en parallele lors de la generation (defaut: 5).', 'settings.export': 'Exporter', 'settings.import': 'Importer', 'settings.exportIncludeKeys': 'Inclure les cles API', diff --git a/frontend/src/pages/Settings.tsx b/frontend/src/pages/Settings.tsx index 8d15a2e..a021168 100644 --- a/frontend/src/pages/Settings.tsx +++ b/frontend/src/pages/Settings.tsx @@ -458,6 +458,33 @@ const Settings: Component = () => { + +
+ +

{t('settings.batchSizeHelp')}

+
+ + setSettings((prev) => ({ + ...prev, + batch_size: + parseInt(e.currentTarget.value) || 5, + })) + } + /> +
+
{/* Advanced extraction */} diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 765e414..909fa79 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -46,6 +46,7 @@ export interface UserSettings { max_articles_per_source: number; use_llm_for_source_links: boolean; article_history_days: number; + batch_size: number; search_agent_behavior: string; ai_model: string; ai_model_websearch: string; @@ -62,6 +63,7 @@ export const DEFAULT_SETTINGS: UserSettings = { max_articles_per_source: 3, use_llm_for_source_links: false, article_history_days: 90, + batch_size: 5, search_agent_behavior: "Tu peux egalement utiliser d'autres sources pertinentes trouvees via la recherche Google.", ai_model: '',