diff --git a/backend/src/services/llm/schema.rs b/backend/src/services/llm/schema.rs index 17a244d..21d84dc 100644 --- a/backend/src/services/llm/schema.rs +++ b/backend/src/services/llm/schema.rs @@ -107,6 +107,20 @@ pub fn build_classification_schema() -> Value { }) } +/// Build a JSON Schema for per-article classification and summarization. +pub fn build_article_classify_schema() -> Value { + serde_json::json!({ + "type": "object", + "properties": { + "title": { "type": "string", "description": "Article title" }, + "summary": { "type": "string", "description": "4-5 line summary of the article" }, + "category": { "type": "string", "description": "Category name from the provided list" } + }, + "required": ["title", "summary", "category"], + "additionalProperties": false + }) +} + /// Build a JSON Schema for LLM link extraction response. pub fn build_link_extraction_schema() -> Value { serde_json::json!({ @@ -330,6 +344,16 @@ mod tests { assert_eq!(schema["additionalProperties"], false); } + #[test] + fn article_classify_schema_has_all_fields() { + let schema = build_article_classify_schema(); + let props = schema["properties"].as_object().unwrap(); + assert!(props.contains_key("title")); + assert!(props.contains_key("summary")); + assert!(props.contains_key("category")); + assert_eq!(schema["additionalProperties"], false); + } + #[test] fn link_extraction_schema_has_urls_array() { let schema = build_link_extraction_schema(); diff --git a/backend/src/services/prompts.rs b/backend/src/services/prompts.rs index 607c511..8a1603d 100644 --- a/backend/src/services/prompts.rs +++ b/backend/src/services/prompts.rs @@ -205,6 +205,43 @@ pub fn build_article_extraction_prompt(head_html: &str, body_text: &str) -> (Str (system_prompt, user_prompt) } +/// Build a prompt for per-article classification and summarization. +/// +/// The LLM classifies the article into a category and generates a title + summary. +pub fn build_article_classify_prompt( + title: &str, + body_snippet: &str, + categories: &[String], +) -> (String, String) { + let system_prompt = + "Tu es un assistant qui analyse des articles d'actualite. \ + Tu dois classer l'article dans une categorie et generer un titre et un resume. \ + Reponds uniquement au format JSON demande." + .to_string(); + + let categories_list = categories + .iter() + .map(|c| format!("- \"{}\"", c)) + .collect::>() + .join("\n"); + + let user_prompt = format!( + "Voici un article d'actualite.\n\n\ + Titre : {title}\n\n\ + Contenu (extrait) :\n{body}\n\n\ + Categories disponibles :\n{categories}\n\n\ + Classe cet article dans la categorie la plus appropriee.\n\ + Si aucune categorie ne correspond, utilise \"Autre\".\n\ + Genere un titre clair et un resume de 4 a 5 lignes.\n\ + Si le titre fourni est vide, genere un titre a partir du contenu.", + title = if title.is_empty() { "(pas de titre)" } else { title }, + body = body_snippet, + categories = categories_list, + ); + + (system_prompt, user_prompt) +} + /// Build a prompt for classifying scraped articles into categories. /// /// # Arguments @@ -528,6 +565,25 @@ mod tests { assert!(user.len() < 15000); } + #[test] + fn article_classify_prompt_includes_content() { + let (sys, user) = build_article_classify_prompt( + "GPT-5 Released", + "OpenAI released GPT-5 today", + &["AI News".into(), "Autre".into()], + ); + assert!(user.contains("GPT-5 Released")); + assert!(user.contains("AI News")); + assert!(user.contains("Autre")); + assert!(sys.contains("classer")); + } + + #[test] + fn article_classify_prompt_handles_empty_title() { + let (_, user) = build_article_classify_prompt("", "Some content", &["Tech".into(), "Autre".into()]); + assert!(user.contains("(pas de titre)")); + } + #[test] fn article_extraction_prompt_includes_content() { let (_, user) = build_article_extraction_prompt("", "Article body here");