//! JSON Schema builder for structured LLM output. //! //! Constructs the JSON Schema that is passed to the LLM provider //! to enforce structured output matching the user's categories. use serde_json::Value; /// Build a JSON Schema for structured output based on user categories. /// /// Each category is mapped to a property named `category_0`, `category_1`, etc. /// Each property is an array of news items with `title`, `url`, and `summary` fields. /// /// # Example /// /// For categories `["Major Announcements", "Research"]`, produces: /// ```json /// { /// "type": "object", /// "properties": { /// "category_0": { /// "type": "array", /// "description": "Major Announcements", /// "items": { /// "type": "object", /// "properties": { /// "title": { "type": "string" }, /// "url": { "type": "string" }, /// "summary": { "type": "string" } /// }, /// "required": ["title", "url", "summary"] /// } /// }, /// "category_1": { ... } /// }, /// "required": ["category_0", "category_1"] /// } /// ``` pub fn build_category_schema(categories: &[String], max_items_per_category: i32) -> Value { let news_item_schema = serde_json::json!({ "type": "object", "properties": { "title": { "type": "string", "description": "The title of the news article" }, "url": { "type": "string", "description": "The URL of the source article" }, "summary": { "type": "string", "description": "A concise summary of the article" } }, "required": ["title", "url", "summary"], "additionalProperties": false }); let mut properties = serde_json::Map::new(); let mut required = Vec::new(); for (i, category_name) in categories.iter().enumerate() { let key = format!("category_{}", i); properties.insert( key.clone(), serde_json::json!({ "type": "array", "description": category_name, "items": news_item_schema, "minItems": max_items_per_category, "maxItems": max_items_per_category }), ); required.push(Value::String(key)); } serde_json::json!({ "type": "object", "properties": properties, "required": required, "additionalProperties": false }) } /// Build a JSON Schema for the article classification response. /// /// The LLM returns an array of assignments mapping article indices to category names. pub fn build_classification_schema() -> Value { serde_json::json!({ "type": "object", "properties": { "assignments": { "type": "array", "items": { "type": "object", "properties": { "index": { "type": "integer", "description": "Article index from the input list" }, "category": { "type": "string", "description": "Category name to assign this article to" } }, "required": ["index", "category"], "additionalProperties": false } } }, "required": ["assignments"], "additionalProperties": false }) } #[cfg(test)] mod tests { use super::*; #[test] fn schema_with_one_category() { let categories = vec!["AI News".to_string()]; let schema = build_category_schema(&categories, 5); assert_eq!(schema["type"], "object"); // One property let props = schema["properties"].as_object().unwrap(); assert_eq!(props.len(), 1); assert!(props.contains_key("category_0")); // Category description assert_eq!(props["category_0"]["description"], "AI News"); // Array type with items assert_eq!(props["category_0"]["type"], "array"); let items = &props["category_0"]["items"]; assert_eq!(items["type"], "object"); assert!(items["properties"].get("title").is_some()); assert!(items["properties"].get("url").is_some()); assert!(items["properties"].get("summary").is_some()); // Required fields let required = schema["required"].as_array().unwrap(); assert_eq!(required.len(), 1); assert_eq!(required[0], "category_0"); // OpenAI strict mode: additionalProperties must be false on all objects assert_eq!(schema["additionalProperties"], false); assert_eq!(items["additionalProperties"], false); } #[test] fn schema_with_three_categories() { let categories = vec![ "Annonces majeures".to_string(), "Recherche".to_string(), "Secteur public".to_string(), ]; let schema = build_category_schema(&categories, 5); let props = schema["properties"].as_object().unwrap(); assert_eq!(props.len(), 3); assert_eq!(props["category_0"]["description"], "Annonces majeures"); assert_eq!(props["category_1"]["description"], "Recherche"); assert_eq!(props["category_2"]["description"], "Secteur public"); let required = schema["required"].as_array().unwrap(); assert_eq!(required.len(), 3); } #[test] fn schema_with_five_categories() { let categories: Vec = (0..5) .map(|i| format!("Category {}", i)) .collect(); let schema = build_category_schema(&categories, 5); let props = schema["properties"].as_object().unwrap(); assert_eq!(props.len(), 5); for i in 0..5 { let key = format!("category_{}", i); assert!(props.contains_key(&key)); assert_eq!( props[&key]["description"].as_str().unwrap(), format!("Category {}", i) ); } let required = schema["required"].as_array().unwrap(); assert_eq!(required.len(), 5); } #[test] fn schema_with_empty_categories() { let categories: Vec = vec![]; let schema = build_category_schema(&categories, 5); let props = schema["properties"].as_object().unwrap(); assert_eq!(props.len(), 0); let required = schema["required"].as_array().unwrap(); assert_eq!(required.len(), 0); } #[test] fn schema_news_item_has_required_fields() { let categories = vec!["Test".to_string()]; let schema = build_category_schema(&categories, 5); let items = &schema["properties"]["category_0"]["items"]; let item_required = items["required"].as_array().unwrap(); let item_required_strs: Vec<&str> = item_required .iter() .map(|v| v.as_str().unwrap()) .collect(); assert!(item_required_strs.contains(&"title")); assert!(item_required_strs.contains(&"url")); assert!(item_required_strs.contains(&"summary")); } #[test] fn schema_meets_openai_strict_mode_requirements() { let categories = vec!["Test".to_string(), "Other".to_string()]; let schema = build_category_schema(&categories, 5); // Every "type": "object" must have "additionalProperties": false assert_eq!( schema["additionalProperties"], false, "Root object must have additionalProperties: false" ); let items = &schema["properties"]["category_0"]["items"]; assert_eq!( items["additionalProperties"], false, "News item object must have additionalProperties: false" ); // All properties must be listed in required let props: Vec<&str> = schema["properties"] .as_object() .unwrap() .keys() .map(|k| k.as_str()) .collect(); let required: Vec<&str> = schema["required"] .as_array() .unwrap() .iter() .map(|v| v.as_str().unwrap()) .collect(); for prop in &props { assert!( required.contains(prop), "Property '{}' must be in required array", prop ); } // News item required fields must match properties let item_props: Vec<&str> = items["properties"] .as_object() .unwrap() .keys() .map(|k| k.as_str()) .collect(); let item_required: Vec<&str> = items["required"] .as_array() .unwrap() .iter() .map(|v| v.as_str().unwrap()) .collect(); for prop in &item_props { assert!( item_required.contains(prop), "News item property '{}' must be in required array", prop ); } } #[test] fn schema_with_special_characters_in_category_name() { let categories = vec![ "AI & Machine Learning".to_string(), "R&D / Innovation".to_string(), ]; let schema = build_category_schema(&categories, 5); let props = schema["properties"].as_object().unwrap(); assert_eq!(props["category_0"]["description"], "AI & Machine Learning"); assert_eq!(props["category_1"]["description"], "R&D / Innovation"); } #[test] fn classification_schema_has_assignments_array() { let schema = build_classification_schema(); assert_eq!(schema["type"], "object"); let assignments = &schema["properties"]["assignments"]; assert_eq!(assignments["type"], "array"); let item_props = &assignments["items"]["properties"]; assert!(item_props.get("index").is_some()); assert!(item_props.get("category").is_some()); assert_eq!(assignments["items"]["additionalProperties"], false); assert_eq!(schema["additionalProperties"], false); } }