From 8d232c1ade20b7a632b921e64d822667b0bbdc6b Mon Sep 17 00:00:00 2001 From: oabrivard Date: Wed, 25 Mar 2026 08:34:59 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20split=20model=20selection=20=E2=80=94?= =?UTF-8?q?=20scraping=20vs=20websearch=20with=20GPT-5=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 2 +- .../migrations/20260325000019_model_split.sql | 30 +++++++ backend/src/db/providers.rs | 66 +++++++++----- backend/src/db/settings.rs | 18 ++-- backend/src/handlers/admin.rs | 12 ++- backend/src/handlers/api_keys.rs | 4 +- backend/src/handlers/config.rs | 3 +- backend/src/models/provider.rs | 86 +++++++++++++------ backend/src/models/settings.rs | 24 +++--- backend/src/services/prompts.rs | 11 ++- backend/src/services/synthesis.rs | 9 +- backend/tests/api_syntheses_test.rs | 2 +- 12 files changed, 181 insertions(+), 86 deletions(-) create mode 100644 backend/migrations/20260325000019_model_split.sql diff --git a/CLAUDE.md b/CLAUDE.md index 2b73394..1bfd1eb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit - `GET /api/v1/admin/users` — user list - `PUT /api/v1/admin/users/:id/role` — role management -## Database (18 migrations) +## Database (19 migrations) Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log` ## Environment Variables diff --git a/backend/migrations/20260325000019_model_split.sql b/backend/migrations/20260325000019_model_split.sql new file mode 100644 index 0000000..f06988d --- /dev/null +++ b/backend/migrations/20260325000019_model_split.sql @@ -0,0 +1,30 @@ +-- Rename ai_model_writing to ai_model_websearch +ALTER TABLE settings RENAME COLUMN ai_model_writing TO ai_model_websearch; + +-- Restructure admin_providers: rename 'models' to 'models_scraping', add 'models_websearch' +-- First, rename the existing models column +ALTER TABLE admin_providers RENAME COLUMN models TO models_scraping; + +-- Add the new models_websearch column (copy from models_scraping initially) +ALTER TABLE admin_providers ADD COLUMN models_websearch JSONB NOT NULL DEFAULT '[]'::jsonb; + +-- Copy models_scraping to models_websearch for all providers (same list initially) +UPDATE admin_providers SET models_websearch = models_scraping; + +-- Update OpenAI with GPT-5 generation models +UPDATE admin_providers SET + models_scraping = '[ + {"model_id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "is_default": false}, + {"model_id": "gpt-5.4-nano", "display_name": "GPT-5.4 Nano", "is_default": false}, + {"model_id": "gpt-5-mini", "display_name": "GPT-5 Mini", "is_default": false}, + {"model_id": "gpt-5-nano", "display_name": "GPT-5 Nano", "is_default": true} + ]'::jsonb, + models_websearch = '[ + {"model_id": "gpt-5.4", "display_name": "GPT-5.4", "is_default": false}, + {"model_id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "is_default": false}, + {"model_id": "gpt-5.2", "display_name": "GPT-5.2", "is_default": false}, + {"model_id": "gpt-5.1", "display_name": "GPT-5.1", "is_default": true}, + {"model_id": "gpt-5-mini", "display_name": "GPT-5 Mini", "is_default": false}, + {"model_id": "gpt-5", "display_name": "GPT-5", "is_default": false} + ]'::jsonb +WHERE provider_name = 'openai'; diff --git a/backend/src/db/providers.rs b/backend/src/db/providers.rs index d07dc73..5c8e1f7 100644 --- a/backend/src/db/providers.rs +++ b/backend/src/db/providers.rs @@ -14,7 +14,8 @@ struct ProviderRow { id: Uuid, provider_name: String, display_name: String, - models: serde_json::Value, + models_scraping: serde_json::Value, + models_websearch: serde_json::Value, is_enabled: bool, created_at: chrono::DateTime, updated_at: chrono::DateTime, @@ -24,16 +25,21 @@ impl TryFrom for AdminProvider { type Error = AppError; fn try_from(row: ProviderRow) -> Result { - let models: Vec = - serde_json::from_value(row.models).map_err(|e| { - AppError::Internal(anyhow::anyhow!("Failed to parse provider models JSON: {}", e)) + let models_scraping: Vec = + serde_json::from_value(row.models_scraping).map_err(|e| { + AppError::Internal(anyhow::anyhow!("Failed to parse provider models_scraping JSON: {}", e)) + })?; + let models_websearch: Vec = + serde_json::from_value(row.models_websearch).map_err(|e| { + AppError::Internal(anyhow::anyhow!("Failed to parse provider models_websearch JSON: {}", e)) })?; Ok(Self { id: row.id, provider_name: row.provider_name, display_name: row.display_name, - models, + models_scraping, + models_websearch, is_enabled: row.is_enabled, created_at: row.created_at, updated_at: row.updated_at, @@ -45,7 +51,7 @@ impl TryFrom for AdminProvider { pub async fn list_all(pool: &PgPool) -> Result, AppError> { let rows = sqlx::query_as::<_, ProviderRow>( r#" - SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at + SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at FROM admin_providers ORDER BY provider_name "#, @@ -60,7 +66,7 @@ pub async fn list_all(pool: &PgPool) -> Result, AppError> { pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result, AppError> { let row = sqlx::query_as::<_, ProviderRow>( r#" - SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at + SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at FROM admin_providers WHERE id = $1 "#, @@ -76,7 +82,7 @@ pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result, pub async fn get_by_name(pool: &PgPool, name: &str) -> Result, AppError> { let row = sqlx::query_as::<_, ProviderRow>( r#" - SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at + SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at FROM admin_providers WHERE provider_name = $1 "#, @@ -95,23 +101,28 @@ pub async fn create( pool: &PgPool, provider_name: &str, display_name: &str, - models: &[ProviderModel], + models_scraping: &[ProviderModel], + models_websearch: &[ProviderModel], is_enabled: bool, ) -> Result { - let models_json = serde_json::to_value(models).map_err(|e| { - AppError::Internal(anyhow::anyhow!("Failed to serialize models: {}", e)) + let models_scraping_json = serde_json::to_value(models_scraping).map_err(|e| { + AppError::Internal(anyhow::anyhow!("Failed to serialize models_scraping: {}", e)) + })?; + let models_websearch_json = serde_json::to_value(models_websearch).map_err(|e| { + AppError::Internal(anyhow::anyhow!("Failed to serialize models_websearch: {}", e)) })?; let row = sqlx::query_as::<_, ProviderRow>( r#" - INSERT INTO admin_providers (provider_name, display_name, models, is_enabled) - VALUES ($1, $2, $3, $4) - RETURNING id, provider_name, display_name, models, is_enabled, created_at, updated_at + INSERT INTO admin_providers (provider_name, display_name, models_scraping, models_websearch, is_enabled) + VALUES ($1, $2, $3, $4, $5) + RETURNING id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at "#, ) .bind(provider_name) .bind(display_name) - .bind(&models_json) + .bind(&models_scraping_json) + .bind(&models_websearch_json) .bind(is_enabled) .fetch_one(pool) .await?; @@ -127,13 +138,20 @@ pub async fn update( pool: &PgPool, id: Uuid, display_name: Option<&str>, - models: Option<&[ProviderModel]>, + models_scraping: Option<&[ProviderModel]>, + models_websearch: Option<&[ProviderModel]>, is_enabled: Option, ) -> Result, AppError> { - let models_json = models + let models_scraping_json = models_scraping + .map(|m| { + serde_json::to_value(m) + .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models_scraping: {}", e))) + }) + .transpose()?; + let models_websearch_json = models_websearch .map(|m| { serde_json::to_value(m) - .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models: {}", e))) + .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models_websearch: {}", e))) }) .transpose()?; @@ -141,16 +159,18 @@ pub async fn update( r#" UPDATE admin_providers SET display_name = COALESCE($2, display_name), - models = COALESCE($3, models), - is_enabled = COALESCE($4, is_enabled), + models_scraping = COALESCE($3, models_scraping), + models_websearch = COALESCE($4, models_websearch), + is_enabled = COALESCE($5, is_enabled), updated_at = now() WHERE id = $1 - RETURNING id, provider_name, display_name, models, is_enabled, created_at, updated_at + RETURNING id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at "#, ) .bind(id) .bind(display_name) - .bind(models_json) + .bind(models_scraping_json) + .bind(models_websearch_json) .bind(is_enabled) .fetch_optional(pool) .await?; @@ -176,7 +196,7 @@ pub async fn delete(pool: &PgPool, id: Uuid) -> Result { pub async fn list_enabled(pool: &PgPool) -> Result, AppError> { let rows = sqlx::query_as::<_, ProviderRow>( r#" - SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at + SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at FROM admin_providers WHERE is_enabled = true ORDER BY provider_name diff --git a/backend/src/db/settings.rs b/backend/src/db/settings.rs index 00fbfbd..2e2e688 100644 --- a/backend/src/db/settings.rs +++ b/backend/src/db/settings.rs @@ -23,7 +23,7 @@ struct SettingsRow { search_agent_behavior: String, ai_provider: String, ai_model: String, - ai_model_writing: String, + ai_model_websearch: String, rate_limit_max_requests: Option, rate_limit_time_window_seconds: Option, updated_at: chrono::DateTime, @@ -49,7 +49,7 @@ impl TryFrom for UserSettings { search_agent_behavior: row.search_agent_behavior, ai_provider: row.ai_provider, ai_model: row.ai_model, - ai_model_writing: row.ai_model_writing, + ai_model_websearch: row.ai_model_websearch, rate_limit_max_requests: row.rate_limit_max_requests, rate_limit_time_window_seconds: row.rate_limit_time_window_seconds, updated_at: row.updated_at, @@ -72,10 +72,10 @@ pub async fn get_or_create_default( let row = sqlx::query_as::<_, SettingsRow>( r#" - INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) + INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) ON CONFLICT (user_id) DO UPDATE SET user_id = settings.user_id - RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at + RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at "#, ) .bind(user_id) @@ -86,7 +86,7 @@ pub async fn get_or_create_default( .bind(&defaults.search_agent_behavior) .bind(&defaults.ai_provider) .bind(&defaults.ai_model) - .bind(&defaults.ai_model_writing) + .bind(&defaults.ai_model_websearch) .bind(defaults.rate_limit_max_requests) .bind(defaults.rate_limit_time_window_seconds) .bind(defaults.max_articles_per_source) @@ -110,7 +110,7 @@ pub async fn upsert( let row = sqlx::query_as::<_, SettingsRow>( r#" - INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) + INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) ON CONFLICT (user_id) DO UPDATE SET theme = EXCLUDED.theme, @@ -120,14 +120,14 @@ pub async fn upsert( search_agent_behavior = EXCLUDED.search_agent_behavior, ai_provider = EXCLUDED.ai_provider, ai_model = EXCLUDED.ai_model, - ai_model_writing = EXCLUDED.ai_model_writing, + ai_model_websearch = EXCLUDED.ai_model_websearch, rate_limit_max_requests = EXCLUDED.rate_limit_max_requests, rate_limit_time_window_seconds = EXCLUDED.rate_limit_time_window_seconds, max_articles_per_source = EXCLUDED.max_articles_per_source, use_llm_for_source_links = EXCLUDED.use_llm_for_source_links, article_history_days = EXCLUDED.article_history_days, updated_at = now() - RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at + RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at "#, ) .bind(user_id) @@ -138,7 +138,7 @@ pub async fn upsert( .bind(&req.search_agent_behavior) .bind(&req.ai_provider) .bind(&req.ai_model) - .bind(&req.ai_model_writing) + .bind(&req.ai_model_websearch) .bind(req.rate_limit_max_requests) .bind(req.rate_limit_time_window_seconds) .bind(req.max_articles_per_source) diff --git a/backend/src/handlers/admin.rs b/backend/src/handlers/admin.rs index 9f6f580..0a5c91c 100644 --- a/backend/src/handlers/admin.rs +++ b/backend/src/handlers/admin.rs @@ -70,7 +70,8 @@ pub async fn create_provider( &state.pool, &body.provider_name, &body.display_name, - &body.models, + &body.models_scraping, + &body.models_websearch, body.is_enabled, ) .await?; @@ -86,7 +87,8 @@ pub async fn create_provider( details: Some(serde_json::json!({ "provider_name": provider.provider_name, "display_name": provider.display_name, - "model_count": provider.models.len(), + "model_scraping_count": provider.models_scraping.len(), + "model_websearch_count": provider.models_websearch.len(), "is_enabled": provider.is_enabled, })), }, @@ -121,7 +123,8 @@ pub async fn update_provider( &state.pool, id, body.display_name.as_deref(), - body.models.as_deref(), + body.models_scraping.as_deref(), + body.models_websearch.as_deref(), body.is_enabled, ) .await?; @@ -140,7 +143,8 @@ pub async fn update_provider( "provider_name": provider.provider_name, "updated_fields": { "display_name": body.display_name.is_some(), - "models": body.models.is_some(), + "models_scraping": body.models_scraping.is_some(), + "models_websearch": body.models_websearch.is_some(), "is_enabled": body.is_enabled.is_some(), }, })), diff --git a/backend/src/handlers/api_keys.rs b/backend/src/handlers/api_keys.rs index c300420..028d8d0 100644 --- a/backend/src/handlers/api_keys.rs +++ b/backend/src/handlers/api_keys.rs @@ -218,10 +218,10 @@ async fn get_default_model_for_provider( Some(p) => { // Find the default model, or use the first one let model = p - .models + .models_scraping .iter() .find(|m| m.is_default) - .or_else(|| p.models.first()) + .or_else(|| p.models_scraping.first()) .ok_or_else(|| { AppError::BadRequest(format!( "No models configured for provider '{}'", diff --git a/backend/src/handlers/config.rs b/backend/src/handlers/config.rs index 1f74e5e..2963a85 100644 --- a/backend/src/handlers/config.rs +++ b/backend/src/handlers/config.rs @@ -31,7 +31,8 @@ pub async fn list_enabled_providers( .map(|p| ProviderConfigResponse { provider_name: p.provider_name, display_name: p.display_name, - models: p.models.into_iter().map(PublicModelInfo::from).collect(), + models_scraping: p.models_scraping.into_iter().map(PublicModelInfo::from).collect(), + models_websearch: p.models_websearch.into_iter().map(PublicModelInfo::from).collect(), }) .collect(); diff --git a/backend/src/models/provider.rs b/backend/src/models/provider.rs index b628e7d..41a2341 100644 --- a/backend/src/models/provider.rs +++ b/backend/src/models/provider.rs @@ -22,7 +22,8 @@ pub struct AdminProvider { pub id: Uuid, pub provider_name: String, pub display_name: String, - pub models: Vec, + pub models_scraping: Vec, + pub models_websearch: Vec, pub is_enabled: bool, pub created_at: DateTime, pub updated_at: DateTime, @@ -33,7 +34,8 @@ pub struct AdminProvider { pub struct CreateProviderRequest { pub provider_name: String, pub display_name: String, - pub models: Vec, + pub models_scraping: Vec, + pub models_websearch: Vec, #[serde(default = "default_true")] pub is_enabled: bool, } @@ -69,7 +71,8 @@ impl CreateProviderRequest { } validate_display_name(&self.display_name)?; - validate_models(&self.models)?; + validate_models(&self.models_scraping)?; + validate_models(&self.models_websearch)?; Ok(()) } @@ -79,7 +82,8 @@ impl CreateProviderRequest { #[derive(Debug, Deserialize)] pub struct UpdateProviderRequest { pub display_name: Option, - pub models: Option>, + pub models_scraping: Option>, + pub models_websearch: Option>, pub is_enabled: Option, } @@ -89,7 +93,10 @@ impl UpdateProviderRequest { if let Some(ref display) = self.display_name { validate_display_name(display)?; } - if let Some(ref models) = self.models { + if let Some(ref models) = self.models_scraping { + validate_models(models)?; + } + if let Some(ref models) = self.models_websearch { validate_models(models)?; } Ok(()) @@ -143,7 +150,8 @@ fn validate_models(models: &[ProviderModel]) -> Result<(), String> { pub struct ProviderConfigResponse { pub provider_name: String, pub display_name: String, - pub models: Vec, + pub models_scraping: Vec, + pub models_websearch: Vec, } /// Public model info (subset of `ProviderModel`). @@ -170,7 +178,8 @@ pub struct AdminProviderResponse { pub id: Uuid, pub provider_name: String, pub display_name: String, - pub models: Vec, + pub models_scraping: Vec, + pub models_websearch: Vec, pub is_enabled: bool, pub created_at: DateTime, pub updated_at: DateTime, @@ -182,7 +191,8 @@ impl From for AdminProviderResponse { id: p.id, provider_name: p.provider_name, display_name: p.display_name, - models: p.models, + models_scraping: p.models_scraping, + models_websearch: p.models_websearch, is_enabled: p.is_enabled, created_at: p.created_at, updated_at: p.updated_at, @@ -194,12 +204,26 @@ impl From for AdminProviderResponse { mod tests { use super::*; + /// Helper to create a sample model list for tests. + fn sample_models() -> Vec { + vec![ProviderModel { + model_id: "m1".into(), + display_name: "Model 1".into(), + is_default: true, + }] + } + #[test] fn test_valid_create_request() { let req = CreateProviderRequest { provider_name: "gemini".into(), display_name: "Google Gemini".into(), - models: vec![ProviderModel { + models_scraping: vec![ProviderModel { + model_id: "gemini-2.5-pro".into(), + display_name: "Gemini 2.5 Pro".into(), + is_default: true, + }], + models_websearch: vec![ProviderModel { model_id: "gemini-2.5-pro".into(), display_name: "Gemini 2.5 Pro".into(), is_default: true, @@ -214,11 +238,8 @@ mod tests { let req = CreateProviderRequest { provider_name: "unknown_provider".into(), display_name: "Unknown".into(), - models: vec![ProviderModel { - model_id: "m1".into(), - display_name: "Model 1".into(), - is_default: false, - }], + models_scraping: sample_models(), + models_websearch: sample_models(), is_enabled: true, }; let err = req.validate().unwrap_err(); @@ -230,11 +251,8 @@ mod tests { let req = CreateProviderRequest { provider_name: " ".into(), display_name: "Some Provider".into(), - models: vec![ProviderModel { - model_id: "m1".into(), - display_name: "Model 1".into(), - is_default: false, - }], + models_scraping: sample_models(), + models_websearch: sample_models(), is_enabled: true, }; let err = req.validate().unwrap_err(); @@ -242,11 +260,25 @@ mod tests { } #[test] - fn test_empty_models_list() { + fn test_empty_models_scraping_list() { + let req = CreateProviderRequest { + provider_name: "openai".into(), + display_name: "OpenAI".into(), + models_scraping: vec![], + models_websearch: sample_models(), + is_enabled: true, + }; + let err = req.validate().unwrap_err(); + assert!(err.contains("At least one model")); + } + + #[test] + fn test_empty_models_websearch_list() { let req = CreateProviderRequest { provider_name: "openai".into(), display_name: "OpenAI".into(), - models: vec![], + models_scraping: sample_models(), + models_websearch: vec![], is_enabled: true, }; let err = req.validate().unwrap_err(); @@ -258,7 +290,7 @@ mod tests { let req = CreateProviderRequest { provider_name: "openai".into(), display_name: "OpenAI".into(), - models: vec![ + models_scraping: vec![ ProviderModel { model_id: "gpt-4o".into(), display_name: "GPT-4o".into(), @@ -270,6 +302,7 @@ mod tests { is_default: true, }, ], + models_websearch: sample_models(), is_enabled: true, }; let err = req.validate().unwrap_err(); @@ -281,11 +314,12 @@ mod tests { let req = CreateProviderRequest { provider_name: "anthropic".into(), display_name: "Anthropic".into(), - models: vec![ProviderModel { + models_scraping: vec![ProviderModel { model_id: "".into(), display_name: "Claude".into(), is_default: false, }], + models_websearch: sample_models(), is_enabled: true, }; let err = req.validate().unwrap_err(); @@ -296,7 +330,8 @@ mod tests { fn test_update_request_all_none() { let req = UpdateProviderRequest { display_name: None, - models: None, + models_scraping: None, + models_websearch: None, is_enabled: None, }; assert!(req.validate().is_ok()); @@ -306,7 +341,8 @@ mod tests { fn test_update_request_empty_display_name() { let req = UpdateProviderRequest { display_name: Some("".into()), - models: None, + models_scraping: None, + models_websearch: None, is_enabled: None, }; let err = req.validate().unwrap_err(); diff --git a/backend/src/models/settings.rs b/backend/src/models/settings.rs index 37dd27d..aead498 100644 --- a/backend/src/models/settings.rs +++ b/backend/src/models/settings.rs @@ -18,7 +18,7 @@ pub struct UserSettings { pub search_agent_behavior: String, pub ai_provider: String, pub ai_model: String, - pub ai_model_writing: String, + pub ai_model_websearch: String, pub rate_limit_max_requests: Option, pub rate_limit_time_window_seconds: Option, pub updated_at: DateTime, @@ -37,7 +37,7 @@ pub struct SettingsResponse { pub search_agent_behavior: String, pub ai_provider: String, pub ai_model: String, - pub ai_model_writing: String, + pub ai_model_websearch: String, pub rate_limit_max_requests: Option, pub rate_limit_time_window_seconds: Option, } @@ -55,7 +55,7 @@ impl From for SettingsResponse { search_agent_behavior: s.search_agent_behavior, ai_provider: s.ai_provider, ai_model: s.ai_model, - ai_model_writing: s.ai_model_writing, + ai_model_websearch: s.ai_model_websearch, rate_limit_max_requests: s.rate_limit_max_requests, rate_limit_time_window_seconds: s.rate_limit_time_window_seconds, } @@ -75,7 +75,7 @@ pub struct UpdateSettingsRequest { pub search_agent_behavior: String, pub ai_provider: String, pub ai_model: String, - pub ai_model_writing: String, + pub ai_model_websearch: String, pub rate_limit_max_requests: Option, pub rate_limit_time_window_seconds: Option, } @@ -130,8 +130,8 @@ impl UpdateSettingsRequest { if self.ai_model.len() > 100 { return Err("ai_model must be at most 100 characters".into()); } - if self.ai_model_writing.len() > 100 { - return Err("ai_model_writing must be at most 100 characters".into()); + if self.ai_model_websearch.len() > 100 { + return Err("ai_model_websearch must be at most 100 characters".into()); } if let Some(max_req) = self.rate_limit_max_requests { if max_req < 1 { @@ -168,7 +168,7 @@ impl Default for UserSettings { search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), - ai_model_writing: String::new(), + ai_model_websearch: String::new(), rate_limit_max_requests: None, rate_limit_time_window_seconds: None, updated_at: Utc::now(), @@ -193,7 +193,7 @@ mod tests { search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), - ai_model_writing: String::new(), + ai_model_websearch: String::new(), rate_limit_max_requests: None, rate_limit_time_window_seconds: None, } @@ -329,7 +329,7 @@ mod tests { let req = UpdateSettingsRequest { ai_provider: "google".into(), ai_model: "gemini-2.5-pro".into(), - ai_model_writing: "gemini-2.5-flash".into(), + ai_model_websearch: "gemini-2.5-flash".into(), ..valid_request() }; assert!(req.validate().is_ok()); @@ -386,13 +386,13 @@ mod tests { } #[test] - fn test_validate_ai_model_writing_too_long_rejected() { + fn test_validate_ai_model_websearch_too_long_rejected() { let req = UpdateSettingsRequest { - ai_model_writing: "a".repeat(101), + ai_model_websearch: "a".repeat(101), ..valid_request() }; let err = req.validate().unwrap_err(); - assert!(err.contains("ai_model_writing")); + assert!(err.contains("ai_model_websearch")); } } diff --git a/backend/src/services/prompts.rs b/backend/src/services/prompts.rs index 9ce8b6d..b51e8f8 100644 --- a/backend/src/services/prompts.rs +++ b/backend/src/services/prompts.rs @@ -78,6 +78,7 @@ pub fn build_search_prompt( et un resume provisoire.\n\ Ne retourne JAMAIS des URLs de pages d'accueil (homepage). Fournis toujours des liens \ directs vers des articles specifiques avec un chemin complet (pas juste le nom de domaine).\n\ + Ne change jamais les URLs retournees, et ne les tronque jamais. \ Retourne le resultat au format JSON en utilisant les cles category_0, category_1, etc. \ correspondant a l'ordre des sections ci-dessus.", date = current_date, @@ -127,14 +128,16 @@ pub fn build_link_extraction_prompt(head_html: &str, body_html: &str) -> (String .to_string(); let body_truncated: String = body_html.chars().take(8000).collect(); - + let user_prompt = format!( "Voici le contenu HTML d'une page de blog ou de site d'actualites.\n\n\ \n{head}\n\n\n\ \n{body}\n\n\n\ Extrais UNIQUEMENT les URLs qui pointent vers des articles \ - (pas les liens de navigation, tags, categories, login, pages statiques, etc.).\n\ - Retourne les URLs completes dans le format JSON demande.", + (pas les liens de navigation, tags, categories, login, pages statiques, topics, \ + archive, companies, events, company, event, collections, etc.).\n\ + Retourne les URLs completes, sans les modifier, dans le format JSON demande. \ + Ne change jamais les URLs retournees, et ne les tronque jamais.", head = head_html, body = body_truncated, ); @@ -201,7 +204,7 @@ mod tests { search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), - ai_model_writing: String::new(), + ai_model_websearch: String::new(), rate_limit_max_requests: None, rate_limit_time_window_seconds: None, updated_at: Utc::now(), diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index 202d7f0..998f5f8 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -264,6 +264,7 @@ async fn run_generation_inner( let (provider_name, api_key) = resolve_provider_and_key(state, user_id, &settings).await?; let provider = create_provider(&provider_name, api_key)?; let model_research = if !settings.ai_model.is_empty() { settings.ai_model.clone() } else { resolve_model(state, &provider_name).await? }; + let model_websearch = if !settings.ai_model_websearch.is_empty() { settings.ai_model_websearch.clone() } else { model_research.clone() }; let user_rate_limiter = get_user_rate_limiter(state, &settings, user_id); // Tracking structures @@ -475,9 +476,9 @@ async fn run_generation_inner( let (sys_prompt, usr_prompt) = crate::services::prompts::build_search_prompt(&settings, &sources, ¤t_date, &[], Some(&category_gaps)); let llm_start = std::time::Instant::now(); - let raw_results = provider.call_llm(&model_research, &sys_prompt, &usr_prompt, &search_schema).await?; + let raw_results = provider.call_llm(&model_websearch, &sys_prompt, &usr_prompt, &search_schema).await?; let llm_duration = llm_start.elapsed().as_millis() as u64; - log_llm_call(&state.pool, user_id, job_id, "search", &model_research, &sys_prompt, &usr_prompt, &raw_results, llm_duration).await; + log_llm_call(&state.pool, user_id, job_id, "search", &model_websearch, &sys_prompt, &usr_prompt, &raw_results, llm_duration).await; emit_progress(tx, "parsing", "Analyse des resultats...", 75); let parsed = parse_llm_output(&raw_results, &user_categories)?; @@ -848,11 +849,11 @@ async fn resolve_provider_and_key( /// Looks up the first enabled model for the provider from the admin config. /// Falls back to sensible defaults if no admin-configured models exist. async fn resolve_model(state: &AppState, provider_name: &str) -> Result { - // Try to get the default model from the admin_providers JSONB models array + // Try to get the default model from the admin_providers JSONB models_scraping array let model = sqlx::query_scalar::<_, String>( r#" SELECT m->>'model_id' - FROM admin_providers, jsonb_array_elements(models) AS m + FROM admin_providers, jsonb_array_elements(models_scraping) AS m WHERE provider_name = $1 AND is_enabled = true AND (m->>'is_default')::boolean = true LIMIT 1 "#, diff --git a/backend/tests/api_syntheses_test.rs b/backend/tests/api_syntheses_test.rs index 16c23d8..9d838f9 100644 --- a/backend/tests/api_syntheses_test.rs +++ b/backend/tests/api_syntheses_test.rs @@ -630,7 +630,7 @@ async fn generate_pipeline_resolves_model_from_admin_config() { "categories": ["Test Category"], "ai_provider": "openai", "ai_model": "", - "ai_model_writing": "", + "ai_model_websearch": "", "use_llm_for_source_links": false, "use_llm_for_article_extraction": false, "article_history_days": 90