feat: split model selection — scraping vs websearch with GPT-5 models

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent 97e484e03f
commit 8d232c1ade

@ -117,7 +117,7 @@ cd frontend && npx tsc --noEmit
- `GET /api/v1/admin/users` — user list
- `PUT /api/v1/admin/users/:id/role` — role management
## Database (18 migrations)
## Database (19 migrations)
Tables: `users`, `sessions`, `magic_link_tokens`, `user_settings`, `sources`, `syntheses`, `admin_providers`, `admin_rate_limits`, `user_api_keys`, `audit_log`
## Environment Variables

@ -0,0 +1,30 @@
-- Rename ai_model_writing to ai_model_websearch
ALTER TABLE settings RENAME COLUMN ai_model_writing TO ai_model_websearch;
-- Restructure admin_providers: rename 'models' to 'models_scraping', add 'models_websearch'
-- First, rename the existing models column
ALTER TABLE admin_providers RENAME COLUMN models TO models_scraping;
-- Add the new models_websearch column (copy from models_scraping initially)
ALTER TABLE admin_providers ADD COLUMN models_websearch JSONB NOT NULL DEFAULT '[]'::jsonb;
-- Copy models_scraping to models_websearch for all providers (same list initially)
UPDATE admin_providers SET models_websearch = models_scraping;
-- Update OpenAI with GPT-5 generation models
UPDATE admin_providers SET
models_scraping = '[
{"model_id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "is_default": false},
{"model_id": "gpt-5.4-nano", "display_name": "GPT-5.4 Nano", "is_default": false},
{"model_id": "gpt-5-mini", "display_name": "GPT-5 Mini", "is_default": false},
{"model_id": "gpt-5-nano", "display_name": "GPT-5 Nano", "is_default": true}
]'::jsonb,
models_websearch = '[
{"model_id": "gpt-5.4", "display_name": "GPT-5.4", "is_default": false},
{"model_id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "is_default": false},
{"model_id": "gpt-5.2", "display_name": "GPT-5.2", "is_default": false},
{"model_id": "gpt-5.1", "display_name": "GPT-5.1", "is_default": true},
{"model_id": "gpt-5-mini", "display_name": "GPT-5 Mini", "is_default": false},
{"model_id": "gpt-5", "display_name": "GPT-5", "is_default": false}
]'::jsonb
WHERE provider_name = 'openai';

@ -14,7 +14,8 @@ struct ProviderRow {
id: Uuid,
provider_name: String,
display_name: String,
models: serde_json::Value,
models_scraping: serde_json::Value,
models_websearch: serde_json::Value,
is_enabled: bool,
created_at: chrono::DateTime<chrono::Utc>,
updated_at: chrono::DateTime<chrono::Utc>,
@ -24,16 +25,21 @@ impl TryFrom<ProviderRow> for AdminProvider {
type Error = AppError;
fn try_from(row: ProviderRow) -> Result<Self, Self::Error> {
let models: Vec<ProviderModel> =
serde_json::from_value(row.models).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to parse provider models JSON: {}", e))
let models_scraping: Vec<ProviderModel> =
serde_json::from_value(row.models_scraping).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to parse provider models_scraping JSON: {}", e))
})?;
let models_websearch: Vec<ProviderModel> =
serde_json::from_value(row.models_websearch).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to parse provider models_websearch JSON: {}", e))
})?;
Ok(Self {
id: row.id,
provider_name: row.provider_name,
display_name: row.display_name,
models,
models_scraping,
models_websearch,
is_enabled: row.is_enabled,
created_at: row.created_at,
updated_at: row.updated_at,
@ -45,7 +51,7 @@ impl TryFrom<ProviderRow> for AdminProvider {
pub async fn list_all(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> {
let rows = sqlx::query_as::<_, ProviderRow>(
r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at
SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers
ORDER BY provider_name
"#,
@ -60,7 +66,7 @@ pub async fn list_all(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> {
pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result<Option<AdminProvider>, AppError> {
let row = sqlx::query_as::<_, ProviderRow>(
r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at
SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers
WHERE id = $1
"#,
@ -76,7 +82,7 @@ pub async fn get_by_id(pool: &PgPool, id: Uuid) -> Result<Option<AdminProvider>,
pub async fn get_by_name(pool: &PgPool, name: &str) -> Result<Option<AdminProvider>, AppError> {
let row = sqlx::query_as::<_, ProviderRow>(
r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at
SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers
WHERE provider_name = $1
"#,
@ -95,23 +101,28 @@ pub async fn create(
pool: &PgPool,
provider_name: &str,
display_name: &str,
models: &[ProviderModel],
models_scraping: &[ProviderModel],
models_websearch: &[ProviderModel],
is_enabled: bool,
) -> Result<AdminProvider, AppError> {
let models_json = serde_json::to_value(models).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to serialize models: {}", e))
let models_scraping_json = serde_json::to_value(models_scraping).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to serialize models_scraping: {}", e))
})?;
let models_websearch_json = serde_json::to_value(models_websearch).map_err(|e| {
AppError::Internal(anyhow::anyhow!("Failed to serialize models_websearch: {}", e))
})?;
let row = sqlx::query_as::<_, ProviderRow>(
r#"
INSERT INTO admin_providers (provider_name, display_name, models, is_enabled)
VALUES ($1, $2, $3, $4)
RETURNING id, provider_name, display_name, models, is_enabled, created_at, updated_at
INSERT INTO admin_providers (provider_name, display_name, models_scraping, models_websearch, is_enabled)
VALUES ($1, $2, $3, $4, $5)
RETURNING id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
"#,
)
.bind(provider_name)
.bind(display_name)
.bind(&models_json)
.bind(&models_scraping_json)
.bind(&models_websearch_json)
.bind(is_enabled)
.fetch_one(pool)
.await?;
@ -127,13 +138,20 @@ pub async fn update(
pool: &PgPool,
id: Uuid,
display_name: Option<&str>,
models: Option<&[ProviderModel]>,
models_scraping: Option<&[ProviderModel]>,
models_websearch: Option<&[ProviderModel]>,
is_enabled: Option<bool>,
) -> Result<Option<AdminProvider>, AppError> {
let models_json = models
let models_scraping_json = models_scraping
.map(|m| {
serde_json::to_value(m)
.map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models_scraping: {}", e)))
})
.transpose()?;
let models_websearch_json = models_websearch
.map(|m| {
serde_json::to_value(m)
.map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models: {}", e)))
.map_err(|e| AppError::Internal(anyhow::anyhow!("Failed to serialize models_websearch: {}", e)))
})
.transpose()?;
@ -141,16 +159,18 @@ pub async fn update(
r#"
UPDATE admin_providers SET
display_name = COALESCE($2, display_name),
models = COALESCE($3, models),
is_enabled = COALESCE($4, is_enabled),
models_scraping = COALESCE($3, models_scraping),
models_websearch = COALESCE($4, models_websearch),
is_enabled = COALESCE($5, is_enabled),
updated_at = now()
WHERE id = $1
RETURNING id, provider_name, display_name, models, is_enabled, created_at, updated_at
RETURNING id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
"#,
)
.bind(id)
.bind(display_name)
.bind(models_json)
.bind(models_scraping_json)
.bind(models_websearch_json)
.bind(is_enabled)
.fetch_optional(pool)
.await?;
@ -176,7 +196,7 @@ pub async fn delete(pool: &PgPool, id: Uuid) -> Result<bool, AppError> {
pub async fn list_enabled(pool: &PgPool) -> Result<Vec<AdminProvider>, AppError> {
let rows = sqlx::query_as::<_, ProviderRow>(
r#"
SELECT id, provider_name, display_name, models, is_enabled, created_at, updated_at
SELECT id, provider_name, display_name, models_scraping, models_websearch, is_enabled, created_at, updated_at
FROM admin_providers
WHERE is_enabled = true
ORDER BY provider_name

@ -23,7 +23,7 @@ struct SettingsRow {
search_agent_behavior: String,
ai_provider: String,
ai_model: String,
ai_model_writing: String,
ai_model_websearch: String,
rate_limit_max_requests: Option<i32>,
rate_limit_time_window_seconds: Option<i32>,
updated_at: chrono::DateTime<chrono::Utc>,
@ -49,7 +49,7 @@ impl TryFrom<SettingsRow> for UserSettings {
search_agent_behavior: row.search_agent_behavior,
ai_provider: row.ai_provider,
ai_model: row.ai_model,
ai_model_writing: row.ai_model_writing,
ai_model_websearch: row.ai_model_websearch,
rate_limit_max_requests: row.rate_limit_max_requests,
rate_limit_time_window_seconds: row.rate_limit_time_window_seconds,
updated_at: row.updated_at,
@ -72,10 +72,10 @@ pub async fn get_or_create_default(
let row = sqlx::query_as::<_, SettingsRow>(
r#"
INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days)
INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
ON CONFLICT (user_id) DO UPDATE SET user_id = settings.user_id
RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at
RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at
"#,
)
.bind(user_id)
@ -86,7 +86,7 @@ pub async fn get_or_create_default(
.bind(&defaults.search_agent_behavior)
.bind(&defaults.ai_provider)
.bind(&defaults.ai_model)
.bind(&defaults.ai_model_writing)
.bind(&defaults.ai_model_websearch)
.bind(defaults.rate_limit_max_requests)
.bind(defaults.rate_limit_time_window_seconds)
.bind(defaults.max_articles_per_source)
@ -110,7 +110,7 @@ pub async fn upsert(
let row = sqlx::query_as::<_, SettingsRow>(
r#"
INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days)
INSERT INTO settings (user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
ON CONFLICT (user_id) DO UPDATE SET
theme = EXCLUDED.theme,
@ -120,14 +120,14 @@ pub async fn upsert(
search_agent_behavior = EXCLUDED.search_agent_behavior,
ai_provider = EXCLUDED.ai_provider,
ai_model = EXCLUDED.ai_model,
ai_model_writing = EXCLUDED.ai_model_writing,
ai_model_websearch = EXCLUDED.ai_model_websearch,
rate_limit_max_requests = EXCLUDED.rate_limit_max_requests,
rate_limit_time_window_seconds = EXCLUDED.rate_limit_time_window_seconds,
max_articles_per_source = EXCLUDED.max_articles_per_source,
use_llm_for_source_links = EXCLUDED.use_llm_for_source_links,
article_history_days = EXCLUDED.article_history_days,
updated_at = now()
RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_writing, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at
RETURNING user_id, theme, max_age_days, categories, max_items_per_category, search_agent_behavior, ai_provider, ai_model, ai_model_websearch, rate_limit_max_requests, rate_limit_time_window_seconds, max_articles_per_source, use_llm_for_source_links, article_history_days, updated_at
"#,
)
.bind(user_id)
@ -138,7 +138,7 @@ pub async fn upsert(
.bind(&req.search_agent_behavior)
.bind(&req.ai_provider)
.bind(&req.ai_model)
.bind(&req.ai_model_writing)
.bind(&req.ai_model_websearch)
.bind(req.rate_limit_max_requests)
.bind(req.rate_limit_time_window_seconds)
.bind(req.max_articles_per_source)

@ -70,7 +70,8 @@ pub async fn create_provider(
&state.pool,
&body.provider_name,
&body.display_name,
&body.models,
&body.models_scraping,
&body.models_websearch,
body.is_enabled,
)
.await?;
@ -86,7 +87,8 @@ pub async fn create_provider(
details: Some(serde_json::json!({
"provider_name": provider.provider_name,
"display_name": provider.display_name,
"model_count": provider.models.len(),
"model_scraping_count": provider.models_scraping.len(),
"model_websearch_count": provider.models_websearch.len(),
"is_enabled": provider.is_enabled,
})),
},
@ -121,7 +123,8 @@ pub async fn update_provider(
&state.pool,
id,
body.display_name.as_deref(),
body.models.as_deref(),
body.models_scraping.as_deref(),
body.models_websearch.as_deref(),
body.is_enabled,
)
.await?;
@ -140,7 +143,8 @@ pub async fn update_provider(
"provider_name": provider.provider_name,
"updated_fields": {
"display_name": body.display_name.is_some(),
"models": body.models.is_some(),
"models_scraping": body.models_scraping.is_some(),
"models_websearch": body.models_websearch.is_some(),
"is_enabled": body.is_enabled.is_some(),
},
})),

@ -218,10 +218,10 @@ async fn get_default_model_for_provider(
Some(p) => {
// Find the default model, or use the first one
let model = p
.models
.models_scraping
.iter()
.find(|m| m.is_default)
.or_else(|| p.models.first())
.or_else(|| p.models_scraping.first())
.ok_or_else(|| {
AppError::BadRequest(format!(
"No models configured for provider '{}'",

@ -31,7 +31,8 @@ pub async fn list_enabled_providers(
.map(|p| ProviderConfigResponse {
provider_name: p.provider_name,
display_name: p.display_name,
models: p.models.into_iter().map(PublicModelInfo::from).collect(),
models_scraping: p.models_scraping.into_iter().map(PublicModelInfo::from).collect(),
models_websearch: p.models_websearch.into_iter().map(PublicModelInfo::from).collect(),
})
.collect();

@ -22,7 +22,8 @@ pub struct AdminProvider {
pub id: Uuid,
pub provider_name: String,
pub display_name: String,
pub models: Vec<ProviderModel>,
pub models_scraping: Vec<ProviderModel>,
pub models_websearch: Vec<ProviderModel>,
pub is_enabled: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
@ -33,7 +34,8 @@ pub struct AdminProvider {
pub struct CreateProviderRequest {
pub provider_name: String,
pub display_name: String,
pub models: Vec<ProviderModel>,
pub models_scraping: Vec<ProviderModel>,
pub models_websearch: Vec<ProviderModel>,
#[serde(default = "default_true")]
pub is_enabled: bool,
}
@ -69,7 +71,8 @@ impl CreateProviderRequest {
}
validate_display_name(&self.display_name)?;
validate_models(&self.models)?;
validate_models(&self.models_scraping)?;
validate_models(&self.models_websearch)?;
Ok(())
}
@ -79,7 +82,8 @@ impl CreateProviderRequest {
#[derive(Debug, Deserialize)]
pub struct UpdateProviderRequest {
pub display_name: Option<String>,
pub models: Option<Vec<ProviderModel>>,
pub models_scraping: Option<Vec<ProviderModel>>,
pub models_websearch: Option<Vec<ProviderModel>>,
pub is_enabled: Option<bool>,
}
@ -89,7 +93,10 @@ impl UpdateProviderRequest {
if let Some(ref display) = self.display_name {
validate_display_name(display)?;
}
if let Some(ref models) = self.models {
if let Some(ref models) = self.models_scraping {
validate_models(models)?;
}
if let Some(ref models) = self.models_websearch {
validate_models(models)?;
}
Ok(())
@ -143,7 +150,8 @@ fn validate_models(models: &[ProviderModel]) -> Result<(), String> {
pub struct ProviderConfigResponse {
pub provider_name: String,
pub display_name: String,
pub models: Vec<PublicModelInfo>,
pub models_scraping: Vec<PublicModelInfo>,
pub models_websearch: Vec<PublicModelInfo>,
}
/// Public model info (subset of `ProviderModel`).
@ -170,7 +178,8 @@ pub struct AdminProviderResponse {
pub id: Uuid,
pub provider_name: String,
pub display_name: String,
pub models: Vec<ProviderModel>,
pub models_scraping: Vec<ProviderModel>,
pub models_websearch: Vec<ProviderModel>,
pub is_enabled: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
@ -182,7 +191,8 @@ impl From<AdminProvider> for AdminProviderResponse {
id: p.id,
provider_name: p.provider_name,
display_name: p.display_name,
models: p.models,
models_scraping: p.models_scraping,
models_websearch: p.models_websearch,
is_enabled: p.is_enabled,
created_at: p.created_at,
updated_at: p.updated_at,
@ -194,12 +204,26 @@ impl From<AdminProvider> for AdminProviderResponse {
mod tests {
use super::*;
/// Helper to create a sample model list for tests.
fn sample_models() -> Vec<ProviderModel> {
vec![ProviderModel {
model_id: "m1".into(),
display_name: "Model 1".into(),
is_default: true,
}]
}
#[test]
fn test_valid_create_request() {
let req = CreateProviderRequest {
provider_name: "gemini".into(),
display_name: "Google Gemini".into(),
models: vec![ProviderModel {
models_scraping: vec![ProviderModel {
model_id: "gemini-2.5-pro".into(),
display_name: "Gemini 2.5 Pro".into(),
is_default: true,
}],
models_websearch: vec![ProviderModel {
model_id: "gemini-2.5-pro".into(),
display_name: "Gemini 2.5 Pro".into(),
is_default: true,
@ -214,11 +238,8 @@ mod tests {
let req = CreateProviderRequest {
provider_name: "unknown_provider".into(),
display_name: "Unknown".into(),
models: vec![ProviderModel {
model_id: "m1".into(),
display_name: "Model 1".into(),
is_default: false,
}],
models_scraping: sample_models(),
models_websearch: sample_models(),
is_enabled: true,
};
let err = req.validate().unwrap_err();
@ -230,11 +251,8 @@ mod tests {
let req = CreateProviderRequest {
provider_name: " ".into(),
display_name: "Some Provider".into(),
models: vec![ProviderModel {
model_id: "m1".into(),
display_name: "Model 1".into(),
is_default: false,
}],
models_scraping: sample_models(),
models_websearch: sample_models(),
is_enabled: true,
};
let err = req.validate().unwrap_err();
@ -242,11 +260,25 @@ mod tests {
}
#[test]
fn test_empty_models_list() {
fn test_empty_models_scraping_list() {
let req = CreateProviderRequest {
provider_name: "openai".into(),
display_name: "OpenAI".into(),
models_scraping: vec![],
models_websearch: sample_models(),
is_enabled: true,
};
let err = req.validate().unwrap_err();
assert!(err.contains("At least one model"));
}
#[test]
fn test_empty_models_websearch_list() {
let req = CreateProviderRequest {
provider_name: "openai".into(),
display_name: "OpenAI".into(),
models: vec![],
models_scraping: sample_models(),
models_websearch: vec![],
is_enabled: true,
};
let err = req.validate().unwrap_err();
@ -258,7 +290,7 @@ mod tests {
let req = CreateProviderRequest {
provider_name: "openai".into(),
display_name: "OpenAI".into(),
models: vec![
models_scraping: vec![
ProviderModel {
model_id: "gpt-4o".into(),
display_name: "GPT-4o".into(),
@ -270,6 +302,7 @@ mod tests {
is_default: true,
},
],
models_websearch: sample_models(),
is_enabled: true,
};
let err = req.validate().unwrap_err();
@ -281,11 +314,12 @@ mod tests {
let req = CreateProviderRequest {
provider_name: "anthropic".into(),
display_name: "Anthropic".into(),
models: vec![ProviderModel {
models_scraping: vec![ProviderModel {
model_id: "".into(),
display_name: "Claude".into(),
is_default: false,
}],
models_websearch: sample_models(),
is_enabled: true,
};
let err = req.validate().unwrap_err();
@ -296,7 +330,8 @@ mod tests {
fn test_update_request_all_none() {
let req = UpdateProviderRequest {
display_name: None,
models: None,
models_scraping: None,
models_websearch: None,
is_enabled: None,
};
assert!(req.validate().is_ok());
@ -306,7 +341,8 @@ mod tests {
fn test_update_request_empty_display_name() {
let req = UpdateProviderRequest {
display_name: Some("".into()),
models: None,
models_scraping: None,
models_websearch: None,
is_enabled: None,
};
let err = req.validate().unwrap_err();

@ -18,7 +18,7 @@ pub struct UserSettings {
pub search_agent_behavior: String,
pub ai_provider: String,
pub ai_model: String,
pub ai_model_writing: String,
pub ai_model_websearch: String,
pub rate_limit_max_requests: Option<i32>,
pub rate_limit_time_window_seconds: Option<i32>,
pub updated_at: DateTime<Utc>,
@ -37,7 +37,7 @@ pub struct SettingsResponse {
pub search_agent_behavior: String,
pub ai_provider: String,
pub ai_model: String,
pub ai_model_writing: String,
pub ai_model_websearch: String,
pub rate_limit_max_requests: Option<i32>,
pub rate_limit_time_window_seconds: Option<i32>,
}
@ -55,7 +55,7 @@ impl From<UserSettings> for SettingsResponse {
search_agent_behavior: s.search_agent_behavior,
ai_provider: s.ai_provider,
ai_model: s.ai_model,
ai_model_writing: s.ai_model_writing,
ai_model_websearch: s.ai_model_websearch,
rate_limit_max_requests: s.rate_limit_max_requests,
rate_limit_time_window_seconds: s.rate_limit_time_window_seconds,
}
@ -75,7 +75,7 @@ pub struct UpdateSettingsRequest {
pub search_agent_behavior: String,
pub ai_provider: String,
pub ai_model: String,
pub ai_model_writing: String,
pub ai_model_websearch: String,
pub rate_limit_max_requests: Option<i32>,
pub rate_limit_time_window_seconds: Option<i32>,
}
@ -130,8 +130,8 @@ impl UpdateSettingsRequest {
if self.ai_model.len() > 100 {
return Err("ai_model must be at most 100 characters".into());
}
if self.ai_model_writing.len() > 100 {
return Err("ai_model_writing must be at most 100 characters".into());
if self.ai_model_websearch.len() > 100 {
return Err("ai_model_websearch must be at most 100 characters".into());
}
if let Some(max_req) = self.rate_limit_max_requests {
if max_req < 1 {
@ -168,7 +168,7 @@ impl Default for UserSettings {
search_agent_behavior: String::new(),
ai_provider: String::new(),
ai_model: String::new(),
ai_model_writing: String::new(),
ai_model_websearch: String::new(),
rate_limit_max_requests: None,
rate_limit_time_window_seconds: None,
updated_at: Utc::now(),
@ -193,7 +193,7 @@ mod tests {
search_agent_behavior: String::new(),
ai_provider: String::new(),
ai_model: String::new(),
ai_model_writing: String::new(),
ai_model_websearch: String::new(),
rate_limit_max_requests: None,
rate_limit_time_window_seconds: None,
}
@ -329,7 +329,7 @@ mod tests {
let req = UpdateSettingsRequest {
ai_provider: "google".into(),
ai_model: "gemini-2.5-pro".into(),
ai_model_writing: "gemini-2.5-flash".into(),
ai_model_websearch: "gemini-2.5-flash".into(),
..valid_request()
};
assert!(req.validate().is_ok());
@ -386,13 +386,13 @@ mod tests {
}
#[test]
fn test_validate_ai_model_writing_too_long_rejected() {
fn test_validate_ai_model_websearch_too_long_rejected() {
let req = UpdateSettingsRequest {
ai_model_writing: "a".repeat(101),
ai_model_websearch: "a".repeat(101),
..valid_request()
};
let err = req.validate().unwrap_err();
assert!(err.contains("ai_model_writing"));
assert!(err.contains("ai_model_websearch"));
}
}

@ -78,6 +78,7 @@ pub fn build_search_prompt(
et un resume provisoire.\n\
Ne retourne JAMAIS des URLs de pages d'accueil (homepage). Fournis toujours des liens \
directs vers des articles specifiques avec un chemin complet (pas juste le nom de domaine).\n\
Ne change jamais les URLs retournees, et ne les tronque jamais. \
Retourne le resultat au format JSON en utilisant les cles category_0, category_1, etc. \
correspondant a l'ordre des sections ci-dessus.",
date = current_date,
@ -133,8 +134,10 @@ pub fn build_link_extraction_prompt(head_html: &str, body_html: &str) -> (String
<head>\n{head}\n</head>\n\n\
<body (extrait)>\n{body}\n</body>\n\n\
Extrais UNIQUEMENT les URLs qui pointent vers des articles \
(pas les liens de navigation, tags, categories, login, pages statiques, etc.).\n\
Retourne les URLs completes dans le format JSON demande.",
(pas les liens de navigation, tags, categories, login, pages statiques, topics, \
archive, companies, events, company, event, collections, etc.).\n\
Retourne les URLs completes, sans les modifier, dans le format JSON demande. \
Ne change jamais les URLs retournees, et ne les tronque jamais.",
head = head_html,
body = body_truncated,
);
@ -201,7 +204,7 @@ mod tests {
search_agent_behavior: String::new(),
ai_provider: String::new(),
ai_model: String::new(),
ai_model_writing: String::new(),
ai_model_websearch: String::new(),
rate_limit_max_requests: None,
rate_limit_time_window_seconds: None,
updated_at: Utc::now(),

@ -264,6 +264,7 @@ async fn run_generation_inner(
let (provider_name, api_key) = resolve_provider_and_key(state, user_id, &settings).await?;
let provider = create_provider(&provider_name, api_key)?;
let model_research = if !settings.ai_model.is_empty() { settings.ai_model.clone() } else { resolve_model(state, &provider_name).await? };
let model_websearch = if !settings.ai_model_websearch.is_empty() { settings.ai_model_websearch.clone() } else { model_research.clone() };
let user_rate_limiter = get_user_rate_limiter(state, &settings, user_id);
// Tracking structures
@ -475,9 +476,9 @@ async fn run_generation_inner(
let (sys_prompt, usr_prompt) = crate::services::prompts::build_search_prompt(&settings, &sources, &current_date, &[], Some(&category_gaps));
let llm_start = std::time::Instant::now();
let raw_results = provider.call_llm(&model_research, &sys_prompt, &usr_prompt, &search_schema).await?;
let raw_results = provider.call_llm(&model_websearch, &sys_prompt, &usr_prompt, &search_schema).await?;
let llm_duration = llm_start.elapsed().as_millis() as u64;
log_llm_call(&state.pool, user_id, job_id, "search", &model_research, &sys_prompt, &usr_prompt, &raw_results, llm_duration).await;
log_llm_call(&state.pool, user_id, job_id, "search", &model_websearch, &sys_prompt, &usr_prompt, &raw_results, llm_duration).await;
emit_progress(tx, "parsing", "Analyse des resultats...", 75);
let parsed = parse_llm_output(&raw_results, &user_categories)?;
@ -848,11 +849,11 @@ async fn resolve_provider_and_key(
/// Looks up the first enabled model for the provider from the admin config.
/// Falls back to sensible defaults if no admin-configured models exist.
async fn resolve_model(state: &AppState, provider_name: &str) -> Result<String, AppError> {
// Try to get the default model from the admin_providers JSONB models array
// Try to get the default model from the admin_providers JSONB models_scraping array
let model = sqlx::query_scalar::<_, String>(
r#"
SELECT m->>'model_id'
FROM admin_providers, jsonb_array_elements(models) AS m
FROM admin_providers, jsonb_array_elements(models_scraping) AS m
WHERE provider_name = $1 AND is_enabled = true AND (m->>'is_default')::boolean = true
LIMIT 1
"#,

@ -630,7 +630,7 @@ async fn generate_pipeline_resolves_model_from_admin_config() {
"categories": ["Test Category"],
"ai_provider": "openai",
"ai_model": "",
"ai_model_writing": "",
"ai_model_websearch": "",
"use_llm_for_source_links": false,
"use_llm_for_article_extraction": false,
"article_history_days": 90

Loading…
Cancel
Save