fix: enforce max_items_per_category in JSON schema and prompt

The LLM was returning only 1 article per category despite the user setting 4.
- Added minItems/maxItems to the category array schema (enforced by OpenAI strict mode)
- Changed prompt from "au maximum N actualites" to "exactement N actualites"
- Schema builder now takes max_items_per_category parameter

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent 0b0702de39
commit 45c9e71589

@ -35,7 +35,7 @@ use serde_json::Value;
/// "required": ["category_0", "category_1"] /// "required": ["category_0", "category_1"]
/// } /// }
/// ``` /// ```
pub fn build_category_schema(categories: &[String]) -> Value { pub fn build_category_schema(categories: &[String], max_items_per_category: i32) -> Value {
let news_item_schema = serde_json::json!({ let news_item_schema = serde_json::json!({
"type": "object", "type": "object",
"properties": { "properties": {
@ -66,7 +66,9 @@ pub fn build_category_schema(categories: &[String]) -> Value {
serde_json::json!({ serde_json::json!({
"type": "array", "type": "array",
"description": category_name, "description": category_name,
"items": news_item_schema "items": news_item_schema,
"minItems": max_items_per_category,
"maxItems": max_items_per_category
}), }),
); );
required.push(Value::String(key)); required.push(Value::String(key));
@ -87,7 +89,7 @@ mod tests {
#[test] #[test]
fn schema_with_one_category() { fn schema_with_one_category() {
let categories = vec!["AI News".to_string()]; let categories = vec!["AI News".to_string()];
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
assert_eq!(schema["type"], "object"); assert_eq!(schema["type"], "object");
@ -124,7 +126,7 @@ mod tests {
"Recherche".to_string(), "Recherche".to_string(),
"Secteur public".to_string(), "Secteur public".to_string(),
]; ];
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
let props = schema["properties"].as_object().unwrap(); let props = schema["properties"].as_object().unwrap();
assert_eq!(props.len(), 3); assert_eq!(props.len(), 3);
@ -141,7 +143,7 @@ mod tests {
let categories: Vec<String> = (0..5) let categories: Vec<String> = (0..5)
.map(|i| format!("Category {}", i)) .map(|i| format!("Category {}", i))
.collect(); .collect();
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
let props = schema["properties"].as_object().unwrap(); let props = schema["properties"].as_object().unwrap();
assert_eq!(props.len(), 5); assert_eq!(props.len(), 5);
@ -162,7 +164,7 @@ mod tests {
#[test] #[test]
fn schema_with_empty_categories() { fn schema_with_empty_categories() {
let categories: Vec<String> = vec![]; let categories: Vec<String> = vec![];
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
let props = schema["properties"].as_object().unwrap(); let props = schema["properties"].as_object().unwrap();
assert_eq!(props.len(), 0); assert_eq!(props.len(), 0);
@ -174,7 +176,7 @@ mod tests {
#[test] #[test]
fn schema_news_item_has_required_fields() { fn schema_news_item_has_required_fields() {
let categories = vec!["Test".to_string()]; let categories = vec!["Test".to_string()];
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
let items = &schema["properties"]["category_0"]["items"]; let items = &schema["properties"]["category_0"]["items"];
let item_required = items["required"].as_array().unwrap(); let item_required = items["required"].as_array().unwrap();
@ -191,7 +193,7 @@ mod tests {
#[test] #[test]
fn schema_meets_openai_strict_mode_requirements() { fn schema_meets_openai_strict_mode_requirements() {
let categories = vec!["Test".to_string(), "Other".to_string()]; let categories = vec!["Test".to_string(), "Other".to_string()];
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
// Every "type": "object" must have "additionalProperties": false // Every "type": "object" must have "additionalProperties": false
assert_eq!( assert_eq!(
@ -253,7 +255,7 @@ mod tests {
"AI & Machine Learning".to_string(), "AI & Machine Learning".to_string(),
"R&D / Innovation".to_string(), "R&D / Innovation".to_string(),
]; ];
let schema = build_category_schema(&categories); let schema = build_category_schema(&categories, 5);
let props = schema["properties"].as_object().unwrap(); let props = schema["properties"].as_object().unwrap();
assert_eq!(props["category_0"]["description"], "AI & Machine Learning"); assert_eq!(props["category_0"]["description"], "AI & Machine Learning");

@ -71,7 +71,7 @@ pub fn build_search_prompt(
{behavior}\n\n\ {behavior}\n\n\
La synthese doit etre divisee en {count} grandes sections :\n\ La synthese doit etre divisee en {count} grandes sections :\n\
{categories}\n\n\ {categories}\n\n\
Pour chaque categorie, fournis au maximum {max_items} actualites.\n\ Pour chaque categorie, fournis exactement {max_items} actualites.\n\
Pour chaque actualite, fournis un titre provisoire, l'URL source exacte et complete, \ Pour chaque actualite, fournis un titre provisoire, l'URL source exacte et complete, \
et un resume provisoire.\n\ et un resume provisoire.\n\
Ne retourne JAMAIS des URLs de pages d'accueil (homepage). Fournis toujours des liens \ Ne retourne JAMAIS des URLs de pages d'accueil (homepage). Fournis toujours des liens \

@ -275,7 +275,7 @@ async fn run_generation_inner(
let provider = create_provider(&provider_name, api_key)?; let provider = create_provider(&provider_name, api_key)?;
// Step 4: Build schema from categories // Step 4: Build schema from categories
let schema = build_category_schema(&settings.categories); let schema = build_category_schema(&settings.categories, settings.max_items_per_category);
// Step 4b: Resolve models — user overrides take priority over admin config // Step 4b: Resolve models — user overrides take priority over admin config
let model_research = if !settings.ai_model.is_empty() { let model_research = if !settings.ai_model.is_empty() {

Loading…
Cancel
Save