feat: split model selection — scraping vs websearch with GPT-5 models

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent 97e484e03f
commit 8d232c1ade

@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit
- `GET /api/v1/admin/users` — user list - `GET /api/v1/admin/users` — user list
- `PUT /api/v1/admin/users/:id/role` — role management - `PUT /api/v1/admin/users/:id/role` — role management
## Database (18 migrations) ## Database (19 migrations)
Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log` Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log`
## Environment Variables ## Environment Variables

@ -0,0 +1,30 @@
-- Rename ai_model_writing to ai_model_websearch
ALTER TABLE settings RENAME COLUMN ai_model_writing TO ai_model_websearch;
-- Restructure admin_providers: rename 'models' to 'models_scraping', add 'models_websearch'
-- First, rename the existing models column
ALTER TABLE admin_providers RENAME COLUMN models TO models_scraping;
-- Add the new models_websearch column (copy from models_scraping initially)
ALTER TABLE admin_providers ADD COLUMN models_websearch JSONB NOT NULL DEFAULT '[]'::jsonb;
-- Copy models_scraping to models_websearch for all providers (same list initially)
UPDATE admin_providers SET models_websearch = models_scraping;
-- Update OpenAI with GPT-5 generation models
UPDATE admin_providers SET
models_scraping = '[
{"model_id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "is_default": false},
{"model_id": "gpt-5.4-nano", "display_name": "GPT-5.4 Nano", "is_default": false},
{"model_id": "gpt-5-mini", "display_name": "GPT-5 Mini", "is_default": false},
{"model_id": "gpt-5-nano", "display_name": "GPT-5 Nano", "is_default": true}
]'::jsonb,
models_websearch = '[
{"model_id": "gpt-5.4", "display_name": "GPT-5.4", "is_default": false},
{"model_id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "is_default": false},
{"model_id": "gpt-5.2", "display_name": "GPT-5.2", "is_default": false},
{"model_id": "gpt-5.1", "display_name": "GPT-5.1", "is_default": true},
{"model_id": "gpt-5-mini", "display_name": "GPT-5 Mini", "is_default": false},
{"model_id": "gpt-5", "display_name": "GPT-5", "is_default": false}
]'::jsonb
WHERE provider_name = 'openai';

@ -14,7 +14,8 @@ struct ProviderRow {
id: Uuid, id: Uuid,
provider_name: String, provider_name: String,
display_name: String, display_name: String,
models: serde_json::Value, models_scraping: serde_json::Value,
models_websearch: serde_json::Value,
is_enabled: bool, is_enabled: bool,
created_at: chrono::DateTime<chrono::Utc>, created_at: chrono::DateTime<chrono::Utc>,
updated_at: chrono::DateTime<chrono::Utc>, updated_at: chrono::DateTime<chrono::Utc>,
@ -24,16 +25,21 @@ impl TryFrom<ProviderRow> for AdminProvider {
type Error = AppError; type Error = AppError;
fn try_from(row: ProviderRow) -> Result<Self, Self::Error> { fn try_from(row: ProviderRow) -> Result<Self, Self::Error> {
let models: Vec<ProviderModel> = let models_scraping: Vec<ProviderModel> =
serde_json::from_value(row.models).map_err(|e| { serde_json::from_value(row.models_scraping).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to parse provider models JSON: {}", e)) AppError::Internal(anyhow::anyhow!("Failed to parse provider models_scraping JSON: {}", e))
})?;
let models_websearch: Vec<ProviderModel> =
serde_json::from_value(row.models_websearch).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to parse provider models_websearch JSON: {}", e))
})?; })?;
Ok(Self { Ok(Self {
id: row.id, id: row.id,
provider_name: row.provider_name, provider_name: row.provider_name,
display_name: row.display_name, display_name: row.display_name,
models, models_scraping,
models_websearch,
is_enabled: row.is_enabled, is_enabled: row.is_enabled,
created_at: row.created_at, created_at: row.created_at,
updated_at: row.updated_at, updated_at: row.updated_at,
@ -45,7 +51,7 @@ impl TryFrom<ProviderRow> for AdminProvider {
pub async fn list_all(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> { pub async fn list_all(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> {
let rows = sqlx::query_as::<_, ProviderRow>( let rows = sqlx::query_as::<_, ProviderRow>(
r#" r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers FROM admin_providers
ORDER BY provider_name ORDER BY provider_name
"#, "#,
@ -60,7 +66,7 @@ pub async fn list_all(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> {
pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result<Option<AdminProvider>, AppError> { pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result<Option<AdminProvider>, AppError> {
let row = sqlx::query_as::<_, ProviderRow>( let row = sqlx::query_as::<_, ProviderRow>(
r#" r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers FROM admin_providers
WHERE id = $1 WHERE id = $1
"#, "#,
@ -76,7 +82,7 @@ pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result<Option<AdminProvider>,
pub async fn get_by_name(pool: &PgPool, name: &str) -> Result<Option<AdminProvider>, AppError> { pub async fn get_by_name(pool: &PgPool, name: &str) -> Result<Option<AdminProvider>, AppError> {
let row = sqlx::query_as::<_, ProviderRow>( let row = sqlx::query_as::<_, ProviderRow>(
r#" r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers FROM admin_providers
WHERE provider_name = $1 WHERE provider_name = $1
"#, "#,
@ -95,23 +101,28 @@ pub async fn create(
pool: &PgPool, pool: &PgPool,
provider_name: &str, provider_name: &str,
display_name: &str, display_name: &str,
models: &[ProviderModel], models_scraping: &[ProviderModel],
models_websearch: &[ProviderModel],
is_enabled: bool, is_enabled: bool,
) -> Result<AdminProvider, AppError> { ) -> Result<AdminProvider, AppError> {
let models_json = serde_json::to_value(models).map_err(|e| { let models_scraping_json = serde_json::to_value(models_scraping).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to serialize models: {}", e)) AppError::Internal(anyhow::anyhow!("Failed to serialize models_scraping: {}", e))
})?;
let models_websearch_json = serde_json::to_value(models_websearch).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to serialize models_websearch: {}", e))
})?; })?;
let row = sqlx::query_as::<_, ProviderRow>( let row = sqlx::query_as::<_, ProviderRow>(
r#" r#"
INSERT INTO admin_providers (provider_name, display_name, models, is_enabled) INSERT INTO admin_providers (provider_name, display_name, models_scraping, models_websearch, is_enabled)
VALUES ($1, $2, $3, $4) VALUES ($1, $2, $3, $4, $5)
RETURNING id, provider_name, display_name, models, is_enabled, created_at, updated_at RETURNING id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
"#, "#,
) )
.bind(provider_name) .bind(provider_name)
.bind(display_name) .bind(display_name)
.bind(&models_json) .bind(&models_scraping_json)
.bind(&models_websearch_json)
.bind(is_enabled) .bind(is_enabled)
.fetch_one(pool) .fetch_one(pool)
.await?; .await?;
@ -127,13 +138,20 @@ pub async fn update(
pool: &PgPool, pool: &PgPool,
id: Uuid, id: Uuid,
display_name: Option<&str>, display_name: Option<&str>,
models: Option<&[ProviderModel]>, models_scraping: Option<&[ProviderModel]>,
models_websearch: Option<&[ProviderModel]>,
is_enabled: Option<bool>, is_enabled: Option<bool>,
) -> Result<Option<AdminProvider>, AppError> { ) -> Result<Option<AdminProvider>, AppError> {
let models_json = models let models_scraping_json = models_scraping
.map(|m| {
serde_json::to_value(m)
.map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models_scraping: {}", e)))
})
.transpose()?;
let models_websearch_json = models_websearch
.map(|m| { .map(|m| {
serde_json::to_value(m) serde_json::to_value(m)
.map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models: {}", e))) .map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models_websearch: {}", e)))
}) })
.transpose()?; .transpose()?;
@ -141,16 +159,18 @@ pub async fn update(
r#" r#"
UPDATE admin_providers SET UPDATE admin_providers SET
display_name = COALESCE($2, display_name), display_name = COALESCE($2, display_name),
models = COALESCE($3, models), models_scraping = COALESCE($3, models_scraping),
is_enabled = COALESCE($4, is_enabled), models_websearch = COALESCE($4, models_websearch),
is_enabled = COALESCE($5, is_enabled),
updated_at = now() updated_at = now()
WHERE id = $1 WHERE id = $1
RETURNING id, provider_name, display_name, models, is_enabled, created_at, updated_at RETURNING id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
"#, "#,
) )
.bind(id) .bind(id)
.bind(display_name) .bind(display_name)
.bind(models_json) .bind(models_scraping_json)
.bind(models_websearch_json)
.bind(is_enabled) .bind(is_enabled)
.fetch_optional(pool) .fetch_optional(pool)
.await?; .await?;
@ -176,7 +196,7 @@ pub async fn delete(pool: &PgPool, id: Uuid) -> Result<bool, AppError> {
pub async fn list_enabled(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> { pub async fn list_enabled(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> {
let rows = sqlx::query_as::<_, ProviderRow>( let rows = sqlx::query_as::<_, ProviderRow>(
r#" r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers FROM admin_providers
WHERE is_enabled = true WHERE is_enabled = true
ORDER BY provider_name ORDER BY provider_name

@ -23,7 +23,7 @@ struct SettingsRow {
search_agent_behavior: String, search_agent_behavior: String,
ai_provider: String, ai_provider: String,
ai_model: String, ai_model: String,
ai_model_writing: String, ai_model_websearch: String,
rate_limit_max_requests: Option<i32>, rate_limit_max_requests: Option<i32>,
rate_limit_time_window_seconds: Option<i32>, rate_limit_time_window_seconds: Option<i32>,
updated_at: chrono::DateTime<chrono::Utc>, updated_at: chrono::DateTime<chrono::Utc>,
@ -49,7 +49,7 @@ impl TryFrom<SettingsRow> for UserSettings {
search_agent_behavior: row.search_agent_behavior, search_agent_behavior: row.search_agent_behavior,
ai_provider: row.ai_provider, ai_provider: row.ai_provider,
ai_model: row.ai_model, ai_model: row.ai_model,
ai_model_writing: row.ai_model_writing, ai_model_websearch: row.ai_model_websearch,
rate_limit_max_requests: row.rate_limit_max_requests, rate_limit_max_requests: row.rate_limit_max_requests,
rate_limit_time_window_seconds: row.rate_limit_time_window_seconds, rate_limit_time_window_seconds: row.rate_limit_time_window_seconds,
updated_at: row.updated_at, updated_at: row.updated_at,
@ -72,10 +72,10 @@ pub async fn get_or_create_default(
let row = sqlx::query_as::<_, SettingsRow>( let row = sqlx::query_as::<_, SettingsRow>(
r#" r#"
INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
ON CONFLICT (user_id) DO UPDATE SET user_id = settings.user_id ON CONFLICT (user_id) DO UPDATE SET user_id = settings.user_id
RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at
"#, "#,
) )
.bind(user_id) .bind(user_id)
@ -86,7 +86,7 @@ pub async fn get_or_create_default(
.bind(&defaults.search_agent_behavior) .bind(&defaults.search_agent_behavior)
.bind(&defaults.ai_provider) .bind(&defaults.ai_provider)
.bind(&defaults.ai_model) .bind(&defaults.ai_model)
.bind(&defaults.ai_model_writing) .bind(&defaults.ai_model_websearch)
.bind(defaults.rate_limit_max_requests) .bind(defaults.rate_limit_max_requests)
.bind(defaults.rate_limit_time_window_seconds) .bind(defaults.rate_limit_time_window_seconds)
.bind(defaults.max_articles_per_source) .bind(defaults.max_articles_per_source)
@ -110,7 +110,7 @@ pub async fn upsert(
let row = sqlx::query_as::<_, SettingsRow>( let row = sqlx::query_as::<_, SettingsRow>(
r#" r#"
INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days) INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
ON CONFLICT (user_id) DO UPDATE SET ON CONFLICT (user_id) DO UPDATE SET
theme = EXCLUDED.theme, theme = EXCLUDED.theme,
@ -120,14 +120,14 @@ pub async fn upsert(
search_agent_behavior = EXCLUDED.search_agent_behavior, search_agent_behavior = EXCLUDED.search_agent_behavior,
ai_provider = EXCLUDED.ai_provider, ai_provider = EXCLUDED.ai_provider,
ai_model = EXCLUDED.ai_model, ai_model = EXCLUDED.ai_model,
ai_model_writing = EXCLUDED.ai_model_writing, ai_model_websearch = EXCLUDED.ai_model_websearch,
rate_limit_max_requests = EXCLUDED.rate_limit_max_requests, rate_limit_max_requests = EXCLUDED.rate_limit_max_requests,
rate_limit_time_window_seconds = EXCLUDED.rate_limit_time_window_seconds, rate_limit_time_window_seconds = EXCLUDED.rate_limit_time_window_seconds,
max_articles_per_source = EXCLUDED.max_articles_per_source, max_articles_per_source = EXCLUDED.max_articles_per_source,
use_llm_for_source_links = EXCLUDED.use_llm_for_source_links, use_llm_for_source_links = EXCLUDED.use_llm_for_source_links,
article_history_days = EXCLUDED.article_history_days, article_history_days = EXCLUDED.article_history_days,
updated_at = now() updated_at = now()
RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at
"#, "#,
) )
.bind(user_id) .bind(user_id)
@ -138,7 +138,7 @@ pub async fn upsert(
.bind(&req.search_agent_behavior) .bind(&req.search_agent_behavior)
.bind(&req.ai_provider) .bind(&req.ai_provider)
.bind(&req.ai_model) .bind(&req.ai_model)
.bind(&req.ai_model_writing) .bind(&req.ai_model_websearch)
.bind(req.rate_limit_max_requests) .bind(req.rate_limit_max_requests)
.bind(req.rate_limit_time_window_seconds) .bind(req.rate_limit_time_window_seconds)
.bind(req.max_articles_per_source) .bind(req.max_articles_per_source)

@ -70,7 +70,8 @@ pub async fn create_provider(
&state.pool, &state.pool,
&body.provider_name, &body.provider_name,
&body.display_name, &body.display_name,
&body.models, &body.models_scraping,
&body.models_websearch,
body.is_enabled, body.is_enabled,
) )
.await?; .await?;
@ -86,7 +87,8 @@ pub async fn create_provider(
details: Some(serde_json::json!({ details: Some(serde_json::json!({
"provider_name": provider.provider_name, "provider_name": provider.provider_name,
"display_name": provider.display_name, "display_name": provider.display_name,
"model_count": provider.models.len(), "model_scraping_count": provider.models_scraping.len(),
"model_websearch_count": provider.models_websearch.len(),
"is_enabled": provider.is_enabled, "is_enabled": provider.is_enabled,
})), })),
}, },
@ -121,7 +123,8 @@ pub async fn update_provider(
&state.pool, &state.pool,
id, id,
body.display_name.as_deref(), body.display_name.as_deref(),
body.models.as_deref(), body.models_scraping.as_deref(),
body.models_websearch.as_deref(),
body.is_enabled, body.is_enabled,
) )
.await?; .await?;
@ -140,7 +143,8 @@ pub async fn update_provider(
"provider_name": provider.provider_name, "provider_name": provider.provider_name,
"updated_fields": { "updated_fields": {
"display_name": body.display_name.is_some(), "display_name": body.display_name.is_some(),
"models": body.models.is_some(), "models_scraping": body.models_scraping.is_some(),
"models_websearch": body.models_websearch.is_some(),
"is_enabled": body.is_enabled.is_some(), "is_enabled": body.is_enabled.is_some(),
}, },
})), })),

@ -218,10 +218,10 @@ async fn get_default_model_for_provider(
Some(p) => { Some(p) => {
// Find the default model, or use the first one // Find the default model, or use the first one
let model = p let model = p
.models .models_scraping
.iter() .iter()
.find(|m| m.is_default) .find(|m| m.is_default)
.or_else(|| p.models.first()) .or_else(|| p.models_scraping.first())
.ok_or_else(|| { .ok_or_else(|| {
AppError::BadRequest(format!( AppError::BadRequest(format!(
"No models configured for provider '{}'", "No models configured for provider '{}'",

@ -31,7 +31,8 @@ pub async fn list_enabled_providers(
.map(|p| ProviderConfigResponse { .map(|p| ProviderConfigResponse {
provider_name: p.provider_name, provider_name: p.provider_name,
display_name: p.display_name, display_name: p.display_name,
models: p.models.into_iter().map(PublicModelInfo::from).collect(), models_scraping: p.models_scraping.into_iter().map(PublicModelInfo::from).collect(),
models_websearch: p.models_websearch.into_iter().map(PublicModelInfo::from).collect(),
}) })
.collect(); .collect();

@ -22,7 +22,8 @@ pub struct AdminProvider {
pub id: Uuid, pub id: Uuid,
pub provider_name: String, pub provider_name: String,
pub display_name: String, pub display_name: String,
pub models: Vec<ProviderModel>, pub models_scraping: Vec<ProviderModel>,
pub models_websearch: Vec<ProviderModel>,
pub is_enabled: bool, pub is_enabled: bool,
pub created_at: DateTime<Utc>, pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>, pub updated_at: DateTime<Utc>,
@ -33,7 +34,8 @@ pub struct AdminProvider {
pub struct CreateProviderRequest { pub struct CreateProviderRequest {
pub provider_name: String, pub provider_name: String,
pub display_name: String, pub display_name: String,
pub models: Vec<ProviderModel>, pub models_scraping: Vec<ProviderModel>,
pub models_websearch: Vec<ProviderModel>,
#[serde(default = "default_true")] #[serde(default = "default_true")]
pub is_enabled: bool, pub is_enabled: bool,
} }
@ -69,7 +71,8 @@ impl CreateProviderRequest {
} }
validate_display_name(&self.display_name)?; validate_display_name(&self.display_name)?;
validate_models(&self.models)?; validate_models(&self.models_scraping)?;
validate_models(&self.models_websearch)?;
Ok(()) Ok(())
} }
@ -79,7 +82,8 @@ impl CreateProviderRequest {
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub struct UpdateProviderRequest { pub struct UpdateProviderRequest {
pub display_name: Option<String>, pub display_name: Option<String>,
pub models: Option<Vec<ProviderModel>>, pub models_scraping: Option<Vec<ProviderModel>>,
pub models_websearch: Option<Vec<ProviderModel>>,
pub is_enabled: Option<bool>, pub is_enabled: Option<bool>,
} }
@ -89,7 +93,10 @@ impl UpdateProviderRequest {
if let Some(ref display) = self.display_name { if let Some(ref display) = self.display_name {
validate_display_name(display)?; validate_display_name(display)?;
} }
if let Some(ref models) = self.models { if let Some(ref models) = self.models_scraping {
validate_models(models)?;
}
if let Some(ref models) = self.models_websearch {
validate_models(models)?; validate_models(models)?;
} }
Ok(()) Ok(())
@ -143,7 +150,8 @@ fn validate_models(models: &[ProviderModel]) -> Result<(), String> {
pub struct ProviderConfigResponse { pub struct ProviderConfigResponse {
pub provider_name: String, pub provider_name: String,
pub display_name: String, pub display_name: String,
pub models: Vec<PublicModelInfo>, pub models_scraping: Vec<PublicModelInfo>,
pub models_websearch: Vec<PublicModelInfo>,
} }
/// Public model info (subset of `ProviderModel`). /// Public model info (subset of `ProviderModel`).
@ -170,7 +178,8 @@ pub struct AdminProviderResponse {
pub id: Uuid, pub id: Uuid,
pub provider_name: String, pub provider_name: String,
pub display_name: String, pub display_name: String,
pub models: Vec<ProviderModel>, pub models_scraping: Vec<ProviderModel>,
pub models_websearch: Vec<ProviderModel>,
pub is_enabled: bool, pub is_enabled: bool,
pub created_at: DateTime<Utc>, pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>, pub updated_at: DateTime<Utc>,
@ -182,7 +191,8 @@ impl From<AdminProvider> for AdminProviderResponse {
id: p.id, id: p.id,
provider_name: p.provider_name, provider_name: p.provider_name,
display_name: p.display_name, display_name: p.display_name,
models: p.models, models_scraping: p.models_scraping,
models_websearch: p.models_websearch,
is_enabled: p.is_enabled, is_enabled: p.is_enabled,
created_at: p.created_at, created_at: p.created_at,
updated_at: p.updated_at, updated_at: p.updated_at,
@ -194,12 +204,26 @@ impl From<AdminProvider> for AdminProviderResponse {
mod tests { mod tests {
use super::*; use super::*;
/// Helper to create a sample model list for tests.
fn sample_models() -> Vec<ProviderModel> {
vec![ProviderModel {
model_id: "m1".into(),
display_name: "Model 1".into(),
is_default: true,
}]
}
#[test] #[test]
fn test_valid_create_request() { fn test_valid_create_request() {
let req = CreateProviderRequest { let req = CreateProviderRequest {
provider_name: "gemini".into(), provider_name: "gemini".into(),
display_name: "Google Gemini".into(), display_name: "Google Gemini".into(),
models: vec![ProviderModel { models_scraping: vec![ProviderModel {
model_id: "gemini-2.5-pro".into(),
display_name: "Gemini 2.5 Pro".into(),
is_default: true,
}],
models_websearch: vec![ProviderModel {
model_id: "gemini-2.5-pro".into(), model_id: "gemini-2.5-pro".into(),
display_name: "Gemini 2.5 Pro".into(), display_name: "Gemini 2.5 Pro".into(),
is_default: true, is_default: true,
@ -214,11 +238,8 @@ mod tests {
let req = CreateProviderRequest { let req = CreateProviderRequest {
provider_name: "unknown_provider".into(), provider_name: "unknown_provider".into(),
display_name: "Unknown".into(), display_name: "Unknown".into(),
models: vec![ProviderModel { models_scraping: sample_models(),
model_id: "m1".into(), models_websearch: sample_models(),
display_name: "Model 1".into(),
is_default: false,
}],
is_enabled: true, is_enabled: true,
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();
@ -230,11 +251,8 @@ mod tests {
let req = CreateProviderRequest { let req = CreateProviderRequest {
provider_name: " ".into(), provider_name: " ".into(),
display_name: "Some Provider".into(), display_name: "Some Provider".into(),
models: vec![ProviderModel { models_scraping: sample_models(),
model_id: "m1".into(), models_websearch: sample_models(),
display_name: "Model 1".into(),
is_default: false,
}],
is_enabled: true, is_enabled: true,
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();
@ -242,11 +260,25 @@ mod tests {
} }
#[test] #[test]
fn test_empty_models_list() { fn test_empty_models_scraping_list() {
let req = CreateProviderRequest {
provider_name: "openai".into(),
display_name: "OpenAI".into(),
models_scraping: vec![],
models_websearch: sample_models(),
is_enabled: true,
};
let err = req.validate().unwrap_err();
assert!(err.contains("At least one model"));
}
#[test]
fn test_empty_models_websearch_list() {
let req = CreateProviderRequest { let req = CreateProviderRequest {
provider_name: "openai".into(), provider_name: "openai".into(),
display_name: "OpenAI".into(), display_name: "OpenAI".into(),
models: vec![], models_scraping: sample_models(),
models_websearch: vec![],
is_enabled: true, is_enabled: true,
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();
@ -258,7 +290,7 @@ mod tests {
let req = CreateProviderRequest { let req = CreateProviderRequest {
provider_name: "openai".into(), provider_name: "openai".into(),
display_name: "OpenAI".into(), display_name: "OpenAI".into(),
models: vec![ models_scraping: vec![
ProviderModel { ProviderModel {
model_id: "gpt-4o".into(), model_id: "gpt-4o".into(),
display_name: "GPT-4o".into(), display_name: "GPT-4o".into(),
@ -270,6 +302,7 @@ mod tests {
is_default: true, is_default: true,
}, },
], ],
models_websearch: sample_models(),
is_enabled: true, is_enabled: true,
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();
@ -281,11 +314,12 @@ mod tests {
let req = CreateProviderRequest { let req = CreateProviderRequest {
provider_name: "anthropic".into(), provider_name: "anthropic".into(),
display_name: "Anthropic".into(), display_name: "Anthropic".into(),
models: vec![ProviderModel { models_scraping: vec![ProviderModel {
model_id: "".into(), model_id: "".into(),
display_name: "Claude".into(), display_name: "Claude".into(),
is_default: false, is_default: false,
}], }],
models_websearch: sample_models(),
is_enabled: true, is_enabled: true,
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();
@ -296,7 +330,8 @@ mod tests {
fn test_update_request_all_none() { fn test_update_request_all_none() {
let req = UpdateProviderRequest { let req = UpdateProviderRequest {
display_name: None, display_name: None,
models: None, models_scraping: None,
models_websearch: None,
is_enabled: None, is_enabled: None,
}; };
assert!(req.validate().is_ok()); assert!(req.validate().is_ok());
@ -306,7 +341,8 @@ mod tests {
fn test_update_request_empty_display_name() { fn test_update_request_empty_display_name() {
let req = UpdateProviderRequest { let req = UpdateProviderRequest {
display_name: Some("".into()), display_name: Some("".into()),
models: None, models_scraping: None,
models_websearch: None,
is_enabled: None, is_enabled: None,
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();

@ -18,7 +18,7 @@ pub struct UserSettings {
pub search_agent_behavior: String, pub search_agent_behavior: String,
pub ai_provider: String, pub ai_provider: String,
pub ai_model: String, pub ai_model: String,
pub ai_model_writing: String, pub ai_model_websearch: String,
pub rate_limit_max_requests: Option<i32>, pub rate_limit_max_requests: Option<i32>,
pub rate_limit_time_window_seconds: Option<i32>, pub rate_limit_time_window_seconds: Option<i32>,
pub updated_at: DateTime<Utc>, pub updated_at: DateTime<Utc>,
@ -37,7 +37,7 @@ pub struct SettingsResponse {
pub search_agent_behavior: String, pub search_agent_behavior: String,
pub ai_provider: String, pub ai_provider: String,
pub ai_model: String, pub ai_model: String,
pub ai_model_writing: String, pub ai_model_websearch: String,
pub rate_limit_max_requests: Option<i32>, pub rate_limit_max_requests: Option<i32>,
pub rate_limit_time_window_seconds: Option<i32>, pub rate_limit_time_window_seconds: Option<i32>,
} }
@ -55,7 +55,7 @@ impl From<UserSettings> for SettingsResponse {
search_agent_behavior: s.search_agent_behavior, search_agent_behavior: s.search_agent_behavior,
ai_provider: s.ai_provider, ai_provider: s.ai_provider,
ai_model: s.ai_model, ai_model: s.ai_model,
ai_model_writing: s.ai_model_writing, ai_model_websearch: s.ai_model_websearch,
rate_limit_max_requests: s.rate_limit_max_requests, rate_limit_max_requests: s.rate_limit_max_requests,
rate_limit_time_window_seconds: s.rate_limit_time_window_seconds, rate_limit_time_window_seconds: s.rate_limit_time_window_seconds,
} }
@ -75,7 +75,7 @@ pub struct UpdateSettingsRequest {
pub search_agent_behavior: String, pub search_agent_behavior: String,
pub ai_provider: String, pub ai_provider: String,
pub ai_model: String, pub ai_model: String,
pub ai_model_writing: String, pub ai_model_websearch: String,
pub rate_limit_max_requests: Option<i32>, pub rate_limit_max_requests: Option<i32>,
pub rate_limit_time_window_seconds: Option<i32>, pub rate_limit_time_window_seconds: Option<i32>,
} }
@ -130,8 +130,8 @@ impl UpdateSettingsRequest {
if self.ai_model.len() > 100 { if self.ai_model.len() > 100 {
return Err("ai_model must be at most 100 characters".into()); return Err("ai_model must be at most 100 characters".into());
} }
if self.ai_model_writing.len() > 100 { if self.ai_model_websearch.len() > 100 {
return Err("ai_model_writing must be at most 100 characters".into()); return Err("ai_model_websearch must be at most 100 characters".into());
} }
if let Some(max_req) = self.rate_limit_max_requests { if let Some(max_req) = self.rate_limit_max_requests {
if max_req < 1 { if max_req < 1 {
@ -168,7 +168,7 @@ impl Default for UserSettings {
search_agent_behavior: String::new(), search_agent_behavior: String::new(),
ai_provider: String::new(), ai_provider: String::new(),
ai_model: String::new(), ai_model: String::new(),
ai_model_writing: String::new(), ai_model_websearch: String::new(),
rate_limit_max_requests: None, rate_limit_max_requests: None,
rate_limit_time_window_seconds: None, rate_limit_time_window_seconds: None,
updated_at: Utc::now(), updated_at: Utc::now(),
@ -193,7 +193,7 @@ mod tests {
search_agent_behavior: String::new(), search_agent_behavior: String::new(),
ai_provider: String::new(), ai_provider: String::new(),
ai_model: String::new(), ai_model: String::new(),
ai_model_writing: String::new(), ai_model_websearch: String::new(),
rate_limit_max_requests: None, rate_limit_max_requests: None,
rate_limit_time_window_seconds: None, rate_limit_time_window_seconds: None,
} }
@ -329,7 +329,7 @@ mod tests {
let req = UpdateSettingsRequest { let req = UpdateSettingsRequest {
ai_provider: "google".into(), ai_provider: "google".into(),
ai_model: "gemini-2.5-pro".into(), ai_model: "gemini-2.5-pro".into(),
ai_model_writing: "gemini-2.5-flash".into(), ai_model_websearch: "gemini-2.5-flash".into(),
..valid_request() ..valid_request()
}; };
assert!(req.validate().is_ok()); assert!(req.validate().is_ok());
@ -386,13 +386,13 @@ mod tests {
} }
#[test] #[test]
fn test_validate_ai_model_writing_too_long_rejected() { fn test_validate_ai_model_websearch_too_long_rejected() {
let req = UpdateSettingsRequest { let req = UpdateSettingsRequest {
ai_model_writing: "a".repeat(101), ai_model_websearch: "a".repeat(101),
..valid_request() ..valid_request()
}; };
let err = req.validate().unwrap_err(); let err = req.validate().unwrap_err();
assert!(err.contains("ai_model_writing")); assert!(err.contains("ai_model_websearch"));
} }
} }

@ -78,6 +78,7 @@ pub fn build_search_prompt(
et un resume provisoire.\n\ et un resume provisoire.\n\
Ne retourne JAMAIS des URLs de pages d'accueil (homepage). Fournis toujours des liens \ Ne retourne JAMAIS des URLs de pages d'accueil (homepage). Fournis toujours des liens \
directs vers des articles specifiques avec un chemin complet (pas juste le nom de domaine).\n\ directs vers des articles specifiques avec un chemin complet (pas juste le nom de domaine).\n\
Ne change jamais les URLs retournees, et ne les tronque jamais. \
Retourne le resultat au format JSON en utilisant les cles category_0, category_1, etc. \ Retourne le resultat au format JSON en utilisant les cles category_0, category_1, etc. \
correspondant a l'ordre des sections ci-dessus.", correspondant a l'ordre des sections ci-dessus.",
date = current_date, date = current_date,
@ -133,8 +134,10 @@ pub fn build_link_extraction_prompt(head_html: &str, body_html: &str) -> (String
<head>\n{head}\n</head>\n\n\ <head>\n{head}\n</head>\n\n\
<body (extrait)>\n{body}\n</body>\n\n\ <body (extrait)>\n{body}\n</body>\n\n\
Extrais UNIQUEMENT les URLs qui pointent vers des articles \ Extrais UNIQUEMENT les URLs qui pointent vers des articles \
(pas les liens de navigation, tags, categories, login, pages statiques, etc.).\n\ (pas les liens de navigation, tags, categories, login, pages statiques, topics, \
Retourne les URLs completes dans le format JSON demande.", archive, companies, events, company, event, collections, etc.).\n\
Retourne les URLs completes, sans les modifier, dans le format JSON demande. \
Ne change jamais les URLs retournees, et ne les tronque jamais.",
head = head_html, head = head_html,
body = body_truncated, body = body_truncated,
); );
@ -201,7 +204,7 @@ mod tests {
search_agent_behavior: String::new(), search_agent_behavior: String::new(),
ai_provider: String::new(), ai_provider: String::new(),
ai_model: String::new(), ai_model: String::new(),
ai_model_writing: String::new(), ai_model_websearch: String::new(),
rate_limit_max_requests: None, rate_limit_max_requests: None,
rate_limit_time_window_seconds: None, rate_limit_time_window_seconds: None,
updated_at: Utc::now(), updated_at: Utc::now(),

@ -264,6 +264,7 @@ async fn run_generation_inner(
let (provider_name, api_key) = resolve_provider_and_key(state, user_id, &settings).await?; let (provider_name, api_key) = resolve_provider_and_key(state, user_id, &settings).await?;
let provider = create_provider(&provider_name, api_key)?; let provider = create_provider(&provider_name, api_key)?;
let model_research = if !settings.ai_model.is_empty() { settings.ai_model.clone() } else { resolve_model(state, &provider_name).await? }; let model_research = if !settings.ai_model.is_empty() { settings.ai_model.clone() } else { resolve_model(state, &provider_name).await? };
let model_websearch = if !settings.ai_model_websearch.is_empty() { settings.ai_model_websearch.clone() } else { model_research.clone() };
let user_rate_limiter = get_user_rate_limiter(state, &settings, user_id); let user_rate_limiter = get_user_rate_limiter(state, &settings, user_id);
// Tracking structures // Tracking structures
@ -475,9 +476,9 @@ async fn run_generation_inner(
let (sys_prompt, usr_prompt) = crate::services::prompts::build_search_prompt(&settings, &sources, &current_date, &[], Some(&category_gaps)); let (sys_prompt, usr_prompt) = crate::services::prompts::build_search_prompt(&settings, &sources, &current_date, &[], Some(&category_gaps));
let llm_start = std::time::Instant::now(); let llm_start = std::time::Instant::now();
let raw_results = provider.call_llm(&model_research, &sys_prompt, &usr_prompt, &search_schema).await?; let raw_results = provider.call_llm(&model_websearch, &sys_prompt, &usr_prompt, &search_schema).await?;
let llm_duration = llm_start.elapsed().as_millis() as u64; let llm_duration = llm_start.elapsed().as_millis() as u64;
log_llm_call(&state.pool, user_id, job_id, "search", &model_research, &sys_prompt, &usr_prompt, &raw_results, llm_duration).await; log_llm_call(&state.pool, user_id, job_id, "search", &model_websearch, &sys_prompt, &usr_prompt, &raw_results, llm_duration).await;
emit_progress(tx, "parsing", "Analyse des resultats...", 75); emit_progress(tx, "parsing", "Analyse des resultats...", 75);
let parsed = parse_llm_output(&raw_results, &user_categories)?; let parsed = parse_llm_output(&raw_results, &user_categories)?;
@ -848,11 +849,11 @@ async fn resolve_provider_and_key(
/// Looks up the first enabled model for the provider from the admin config. /// Looks up the first enabled model for the provider from the admin config.
/// Falls back to sensible defaults if no admin-configured models exist. /// Falls back to sensible defaults if no admin-configured models exist.
async fn resolve_model(state: &AppState, provider_name: &str) -> Result<String, AppError> { async fn resolve_model(state: &AppState, provider_name: &str) -> Result<String, AppError> {
// Try to get the default model from the admin_providers JSONB models array // Try to get the default model from the admin_providers JSONB models_scraping array
let model = sqlx::query_scalar::<_, String>( let model = sqlx::query_scalar::<_, String>(
r#" r#"
SELECT m->>'model_id' SELECT m->>'model_id'
FROM admin_providers, jsonb_array_elements(models) AS m FROM admin_providers, jsonb_array_elements(models_scraping) AS m
WHERE provider_name = $1 AND is_enabled = true AND (m->>'is_default')::boolean = true WHERE provider_name = $1 AND is_enabled = true AND (m->>'is_default')::boolean = true
LIMIT 1 LIMIT 1
"#, "#,

@ -630,7 +630,7 @@ async fn generate_pipeline_resolves_model_from_admin_config() {
"categories": ["Test Category"], "categories": ["Test Category"],
"ai_provider": "openai", "ai_provider": "openai",
"ai_model": "", "ai_model": "",
"ai_model_writing": "", "ai_model_websearch": "",
"use_llm_for_source_links": false, "use_llm_for_source_links": false,
"use_llm_for_article_extraction": false, "use_llm_for_article_extraction": false,
"article_history_days": 90 "article_history_days": 90

Loading…
Cancel
Save