//! Prompt construction for the two-pass LLM generation pipeline. //! //! Builds system and user prompts for: //! - **Search pass** (Pass 1): web search and initial article discovery //! - **Rewrite pass** (Pass 2): rewrite summaries using scraped content //! //! Prompts are provider-agnostic and parameterized by user settings. use crate::models::settings::UserSettings; use crate::models::source::Source; use crate::models::synthesis::ScrapedNewsItem; /// Build the system prompt and user prompt for the search pass (Pass 1). /// /// The search pass instructs the LLM to find recent news articles /// matching the user's theme and categories, using web search grounding. /// /// # Arguments /// * `settings` — User's configured settings (theme, categories, etc.) /// * `sources` — User's custom sources to prioritize /// * `current_date` — Formatted date string for the prompt pub fn build_search_prompt( settings: &UserSettings, sources: &[Source], current_date: &str, ) -> (String, String) { let sources_text = if sources.is_empty() { String::new() } else { let list = sources .iter() .map(|s| format!("- {} ({})", s.title, s.url)) .collect::>() .join("\n"); format!( "\nEn plus des sources par defaut, tu DOIS imperativement consulter \ et integrer les informations provenant de ces sources personnalisees :\n{}\n", list ) }; let categories_text = settings .categories .iter() .enumerate() .map(|(i, cat)| format!("{}. {}", i + 1, cat)) .collect::>() .join("\n"); let behavior = if settings.search_agent_behavior.is_empty() { "Tu peux egalement utiliser d'autres sources pertinentes trouvees via la recherche Google." .to_string() } else { settings.search_agent_behavior.clone() }; let system_prompt = format!( "Tu es un assistant IA precis. Tu dois TOUJOURS fournir des URLs completes et exactes. \ Ne tronque jamais les URLs. Tu dois te concentrer UNIQUEMENT sur les actualites des {} \ derniers jours.", settings.max_age_days ); let user_prompt = format!( "Aujourd'hui, nous sommes le {date}.\n\ Tu es un expert en analyse de l'actualite sur le theme : \"{theme}\".\n\ Ta tache est de rechercher les actualites STRICTEMENT des {days} derniers jours.\n\ Ne retourne AUCUNE actualite datant de plus de {days} jours.\n\n\ Tu DOIS imperativement t'appuyer sur le contenu des sites web pertinents pour ce theme.\ {sources}\ {behavior}\n\n\ La synthese doit etre divisee en {count} grandes sections :\n\ {categories}\n\n\ Pour chaque categorie, fournis au maximum {max_items} actualites.\n\ Pour chaque actualite, fournis un titre provisoire, l'URL source exacte et complete, \ et un resume provisoire.\n\ Retourne le resultat au format JSON en utilisant les cles category_0, category_1, etc. \ correspondant a l'ordre des sections ci-dessus.", date = current_date, theme = settings.theme, days = settings.max_age_days, sources = sources_text, behavior = behavior, count = settings.categories.len(), categories = categories_text, max_items = settings.max_items_per_category, ); (system_prompt, user_prompt) } /// Build the system prompt and user prompt for the rewrite pass (Pass 2). /// /// The rewrite pass takes scraped article content and asks the LLM to /// rewrite titles and summaries to faithfully reflect the actual content. /// /// # Arguments /// * `scraped_data` — Map of category key to scraped news items with content pub fn build_rewrite_prompt( scraped_data: &std::collections::HashMap>, ) -> (String, String) { let system_prompt = "Tu es un assistant IA precis. Tu dois generer des titres et resumes fideles \ au contenu fourni." .to_string(); let data_json = serde_json::to_string_pretty(scraped_data).unwrap_or_default(); let user_prompt = format!( "Tu es un expert en analyse de l'actualite.\n\ Voici une liste d'articles d'actualite classes par categorie, avec leur contenu textuel \ brut extrait des sites web ('scrapedContent').\n\ Ta tache est de reecrire le 'title' et le 'summary' (4 ou 5 lignes) pour chaque article \ afin qu'ils refletent EXACTEMENT et FIDELEMENT le contenu textuel fourni.\n\ Si le 'scrapedContent' est vide ou insuffisant, utilise le titre et le resume originaux \ pour faire au mieux.\n\ Conserve EXACTEMENT les memes URLs. Ne supprime aucun article de cette liste.\n\n\ Donnees des articles :\n{data}", data = data_json, ); (system_prompt, user_prompt) } #[cfg(test)] mod tests { use super::*; use chrono::Utc; use uuid::Uuid; fn test_settings() -> UserSettings { UserSettings { user_id: Uuid::nil(), theme: "Intelligence Artificielle".to_string(), max_age_days: 7, categories: vec![ "Annonces majeures".to_string(), "Recherche et innovation".to_string(), ], max_items_per_category: 4, search_agent_behavior: String::new(), ai_provider: String::new(), ai_model: String::new(), ai_model_writing: String::new(), rate_limit_max_requests: None, rate_limit_time_window_seconds: None, updated_at: Utc::now(), } } #[test] fn search_prompt_includes_theme() { let settings = test_settings(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("Intelligence Artificielle")); } #[test] fn search_prompt_includes_date() { let settings = test_settings(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("lundi 21 mars 2026")); } #[test] fn search_prompt_includes_max_age() { let settings = test_settings(); let (system, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("7 derniers jours")); assert!(system.contains("7")); } #[test] fn search_prompt_includes_categories() { let settings = test_settings(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("1. Annonces majeures")); assert!(user_prompt.contains("2. Recherche et innovation")); assert!(user_prompt.contains("2 grandes sections")); } #[test] fn search_prompt_includes_max_items() { let settings = test_settings(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("4 actualites")); } #[test] fn search_prompt_includes_custom_sources() { let settings = test_settings(); let sources = vec![ Source { id: Uuid::nil(), user_id: Uuid::nil(), title: "TechCrunch".into(), url: "https://techcrunch.com".into(), created_at: Utc::now(), }, Source { id: Uuid::nil(), user_id: Uuid::nil(), title: "The Verge".into(), url: "https://theverge.com".into(), created_at: Utc::now(), }, ]; let (_, user_prompt) = build_search_prompt(&settings, &sources, "lundi 21 mars 2026"); assert!(user_prompt.contains("TechCrunch (https://techcrunch.com)")); assert!(user_prompt.contains("The Verge (https://theverge.com)")); assert!(user_prompt.contains("sources personnalisees")); } #[test] fn search_prompt_no_sources_no_section() { let settings = test_settings(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(!user_prompt.contains("sources personnalisees")); } #[test] fn search_prompt_custom_behavior() { let mut settings = test_settings(); settings.search_agent_behavior = "Concentre-toi sur les sources europeennes.".to_string(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("Concentre-toi sur les sources europeennes.")); assert!(!user_prompt.contains("recherche Google")); } #[test] fn search_prompt_default_behavior_when_empty() { let settings = test_settings(); let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026"); assert!(user_prompt.contains("recherche Google")); } #[test] fn rewrite_prompt_includes_instructions() { let mut data = std::collections::HashMap::new(); data.insert( "category_0".to_string(), vec![ScrapedNewsItem { title: "Test Article".into(), url: "https://example.com".into(), summary: "A summary".into(), scraped_content: "Full article text here...".into(), }], ); let (system, user_prompt) = build_rewrite_prompt(&data); assert!(system.contains("fideles")); assert!(user_prompt.contains("scrapedContent")); assert!(user_prompt.contains("Test Article")); assert!(user_prompt.contains("https://example.com")); assert!(user_prompt.contains("Ne supprime aucun article")); } #[test] fn rewrite_prompt_with_empty_data() { let data = std::collections::HashMap::new(); let (_, user_prompt) = build_rewrite_prompt(&data); // Should still produce a valid prompt with empty data assert!(user_prompt.contains("Donnees des articles")); } }