You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
269 lines
10 KiB
Rust
269 lines
10 KiB
Rust
//! Prompt construction for the two-pass LLM generation pipeline.
|
|
//!
|
|
//! Builds system and user prompts for:
|
|
//! - **Search pass** (Pass 1): web search and initial article discovery
|
|
//! - **Rewrite pass** (Pass 2): rewrite summaries using scraped content
|
|
//!
|
|
//! Prompts are provider-agnostic and parameterized by user settings.
|
|
|
|
use crate::models::settings::UserSettings;
|
|
use crate::models::source::Source;
|
|
use crate::models::synthesis::ScrapedNewsItem;
|
|
|
|
/// Build the system prompt and user prompt for the search pass (Pass 1).
|
|
///
|
|
/// The search pass instructs the LLM to find recent news articles
|
|
/// matching the user's theme and categories, using web search grounding.
|
|
///
|
|
/// # Arguments
|
|
/// * `settings` — User's configured settings (theme, categories, etc.)
|
|
/// * `sources` — User's custom sources to prioritize
|
|
/// * `current_date` — Formatted date string for the prompt
|
|
pub fn build_search_prompt(
|
|
settings: &UserSettings,
|
|
sources: &[Source],
|
|
current_date: &str,
|
|
) -> (String, String) {
|
|
let sources_text = if sources.is_empty() {
|
|
String::new()
|
|
} else {
|
|
let list = sources
|
|
.iter()
|
|
.map(|s| format!("- {} ({})", s.title, s.url))
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
format!(
|
|
"\nEn plus des sources par defaut, tu DOIS imperativement consulter \
|
|
et integrer les informations provenant de ces sources personnalisees :\n{}\n",
|
|
list
|
|
)
|
|
};
|
|
|
|
let categories_text = settings
|
|
.categories
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, cat)| format!("{}. {}", i + 1, cat))
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
|
|
let behavior = if settings.search_agent_behavior.is_empty() {
|
|
"Tu peux egalement utiliser d'autres sources pertinentes trouvees via la recherche Google."
|
|
.to_string()
|
|
} else {
|
|
settings.search_agent_behavior.clone()
|
|
};
|
|
|
|
let system_prompt = format!(
|
|
"Tu es un assistant IA precis. Tu dois TOUJOURS fournir des URLs completes et exactes. \
|
|
Ne tronque jamais les URLs. Tu dois te concentrer UNIQUEMENT sur les actualites des {} \
|
|
derniers jours.",
|
|
settings.max_age_days
|
|
);
|
|
|
|
let user_prompt = format!(
|
|
"Aujourd'hui, nous sommes le {date}.\n\
|
|
Tu es un expert en analyse de l'actualite sur le theme : \"{theme}\".\n\
|
|
Ta tache est de rechercher les actualites STRICTEMENT des {days} derniers jours.\n\
|
|
Ne retourne AUCUNE actualite datant de plus de {days} jours.\n\n\
|
|
Tu DOIS imperativement t'appuyer sur le contenu des sites web pertinents pour ce theme.\
|
|
{sources}\
|
|
{behavior}\n\n\
|
|
La synthese doit etre divisee en {count} grandes sections :\n\
|
|
{categories}\n\n\
|
|
Pour chaque categorie, fournis au maximum {max_items} actualites.\n\
|
|
Pour chaque actualite, fournis un titre provisoire, l'URL source exacte et complete, \
|
|
et un resume provisoire.\n\
|
|
Retourne le resultat au format JSON en utilisant les cles category_0, category_1, etc. \
|
|
correspondant a l'ordre des sections ci-dessus.",
|
|
date = current_date,
|
|
theme = settings.theme,
|
|
days = settings.max_age_days,
|
|
sources = sources_text,
|
|
behavior = behavior,
|
|
count = settings.categories.len(),
|
|
categories = categories_text,
|
|
max_items = settings.max_items_per_category,
|
|
);
|
|
|
|
(system_prompt, user_prompt)
|
|
}
|
|
|
|
/// Build the system prompt and user prompt for the rewrite pass (Pass 2).
|
|
///
|
|
/// The rewrite pass takes scraped article content and asks the LLM to
|
|
/// rewrite titles and summaries to faithfully reflect the actual content.
|
|
///
|
|
/// # Arguments
|
|
/// * `scraped_data` — Map of category key to scraped news items with content
|
|
pub fn build_rewrite_prompt(
|
|
scraped_data: &std::collections::HashMap<String, Vec<ScrapedNewsItem>>,
|
|
) -> (String, String) {
|
|
let system_prompt =
|
|
"Tu es un assistant IA precis. Tu dois generer des titres et resumes fideles \
|
|
au contenu fourni."
|
|
.to_string();
|
|
|
|
let data_json = serde_json::to_string_pretty(scraped_data).unwrap_or_default();
|
|
|
|
let user_prompt = format!(
|
|
"Tu es un expert en analyse de l'actualite.\n\
|
|
Voici une liste d'articles d'actualite classes par categorie, avec leur contenu textuel \
|
|
brut extrait des sites web ('scrapedContent').\n\
|
|
Ta tache est de reecrire le 'title' et le 'summary' (4 ou 5 lignes) pour chaque article \
|
|
afin qu'ils refletent EXACTEMENT et FIDELEMENT le contenu textuel fourni.\n\
|
|
Si le 'scrapedContent' est vide ou insuffisant, utilise le titre et le resume originaux \
|
|
pour faire au mieux.\n\
|
|
Conserve EXACTEMENT les memes URLs. Ne supprime aucun article de cette liste.\n\n\
|
|
Donnees des articles :\n{data}",
|
|
data = data_json,
|
|
);
|
|
|
|
(system_prompt, user_prompt)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use chrono::Utc;
|
|
use uuid::Uuid;
|
|
|
|
fn test_settings() -> UserSettings {
|
|
UserSettings {
|
|
user_id: Uuid::nil(),
|
|
theme: "Intelligence Artificielle".to_string(),
|
|
max_age_days: 7,
|
|
categories: vec![
|
|
"Annonces majeures".to_string(),
|
|
"Recherche et innovation".to_string(),
|
|
],
|
|
max_items_per_category: 4,
|
|
search_agent_behavior: String::new(),
|
|
ai_provider: String::new(),
|
|
ai_model: String::new(),
|
|
ai_model_writing: String::new(),
|
|
rate_limit_max_requests: None,
|
|
rate_limit_time_window_seconds: None,
|
|
updated_at: Utc::now(),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_includes_theme() {
|
|
let settings = test_settings();
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("Intelligence Artificielle"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_includes_date() {
|
|
let settings = test_settings();
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("lundi 21 mars 2026"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_includes_max_age() {
|
|
let settings = test_settings();
|
|
let (system, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("7 derniers jours"));
|
|
assert!(system.contains("7"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_includes_categories() {
|
|
let settings = test_settings();
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("1. Annonces majeures"));
|
|
assert!(user_prompt.contains("2. Recherche et innovation"));
|
|
assert!(user_prompt.contains("2 grandes sections"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_includes_max_items() {
|
|
let settings = test_settings();
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("4 actualites"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_includes_custom_sources() {
|
|
let settings = test_settings();
|
|
let sources = vec![
|
|
Source {
|
|
id: Uuid::nil(),
|
|
user_id: Uuid::nil(),
|
|
title: "TechCrunch".into(),
|
|
url: "https://techcrunch.com".into(),
|
|
created_at: Utc::now(),
|
|
},
|
|
Source {
|
|
id: Uuid::nil(),
|
|
user_id: Uuid::nil(),
|
|
title: "The Verge".into(),
|
|
url: "https://theverge.com".into(),
|
|
created_at: Utc::now(),
|
|
},
|
|
];
|
|
|
|
let (_, user_prompt) = build_search_prompt(&settings, &sources, "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("TechCrunch (https://techcrunch.com)"));
|
|
assert!(user_prompt.contains("The Verge (https://theverge.com)"));
|
|
assert!(user_prompt.contains("sources personnalisees"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_no_sources_no_section() {
|
|
let settings = test_settings();
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(!user_prompt.contains("sources personnalisees"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_custom_behavior() {
|
|
let mut settings = test_settings();
|
|
settings.search_agent_behavior =
|
|
"Concentre-toi sur les sources europeennes.".to_string();
|
|
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("Concentre-toi sur les sources europeennes."));
|
|
assert!(!user_prompt.contains("recherche Google"));
|
|
}
|
|
|
|
#[test]
|
|
fn search_prompt_default_behavior_when_empty() {
|
|
let settings = test_settings();
|
|
let (_, user_prompt) = build_search_prompt(&settings, &[], "lundi 21 mars 2026");
|
|
assert!(user_prompt.contains("recherche Google"));
|
|
}
|
|
|
|
#[test]
|
|
fn rewrite_prompt_includes_instructions() {
|
|
let mut data = std::collections::HashMap::new();
|
|
data.insert(
|
|
"category_0".to_string(),
|
|
vec![ScrapedNewsItem {
|
|
title: "Test Article".into(),
|
|
url: "https://example.com".into(),
|
|
summary: "A summary".into(),
|
|
scraped_content: "Full article text here...".into(),
|
|
}],
|
|
);
|
|
|
|
let (system, user_prompt) = build_rewrite_prompt(&data);
|
|
assert!(system.contains("fideles"));
|
|
assert!(user_prompt.contains("scrapedContent"));
|
|
assert!(user_prompt.contains("Test Article"));
|
|
assert!(user_prompt.contains("https://example.com"));
|
|
assert!(user_prompt.contains("Ne supprime aucun article"));
|
|
}
|
|
|
|
#[test]
|
|
fn rewrite_prompt_with_empty_data() {
|
|
let data = std::collections::HashMap::new();
|
|
let (_, user_prompt) = build_rewrite_prompt(&data);
|
|
// Should still produce a valid prompt with empty data
|
|
assert!(user_prompt.contains("Donnees des articles"));
|
|
}
|
|
}
|