|
|
|
|
@ -205,6 +205,43 @@ pub fn build_article_extraction_prompt(head_html: &str, body_text: &str) -> (Str
|
|
|
|
|
(system_prompt, user_prompt)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Build a prompt for per-article classification and summarization.
|
|
|
|
|
///
|
|
|
|
|
/// The LLM classifies the article into a category and generates a title + summary.
|
|
|
|
|
pub fn build_article_classify_prompt(
|
|
|
|
|
title: &str,
|
|
|
|
|
body_snippet: &str,
|
|
|
|
|
categories: &[String],
|
|
|
|
|
) -> (String, String) {
|
|
|
|
|
let system_prompt =
|
|
|
|
|
"Tu es un assistant qui analyse des articles d'actualite. \
|
|
|
|
|
Tu dois classer l'article dans une categorie et generer un titre et un resume. \
|
|
|
|
|
Reponds uniquement au format JSON demande."
|
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
|
|
let categories_list = categories
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|c| format!("- \"{}\"", c))
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("\n");
|
|
|
|
|
|
|
|
|
|
let user_prompt = format!(
|
|
|
|
|
"Voici un article d'actualite.\n\n\
|
|
|
|
|
Titre : {title}\n\n\
|
|
|
|
|
Contenu (extrait) :\n{body}\n\n\
|
|
|
|
|
Categories disponibles :\n{categories}\n\n\
|
|
|
|
|
Classe cet article dans la categorie la plus appropriee.\n\
|
|
|
|
|
Si aucune categorie ne correspond, utilise \"Autre\".\n\
|
|
|
|
|
Genere un titre clair et un resume de 4 a 5 lignes.\n\
|
|
|
|
|
Si le titre fourni est vide, genere un titre a partir du contenu.",
|
|
|
|
|
title = if title.is_empty() { "(pas de titre)" } else { title },
|
|
|
|
|
body = body_snippet,
|
|
|
|
|
categories = categories_list,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
(system_prompt, user_prompt)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Build a prompt for classifying scraped articles into categories.
|
|
|
|
|
///
|
|
|
|
|
/// # Arguments
|
|
|
|
|
@ -528,6 +565,25 @@ mod tests {
|
|
|
|
|
assert!(user.len() < 15000);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn article_classify_prompt_includes_content() {
|
|
|
|
|
let (sys, user) = build_article_classify_prompt(
|
|
|
|
|
"GPT-5 Released",
|
|
|
|
|
"OpenAI released GPT-5 today",
|
|
|
|
|
&["AI News".into(), "Autre".into()],
|
|
|
|
|
);
|
|
|
|
|
assert!(user.contains("GPT-5 Released"));
|
|
|
|
|
assert!(user.contains("AI News"));
|
|
|
|
|
assert!(user.contains("Autre"));
|
|
|
|
|
assert!(sys.contains("classer"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn article_classify_prompt_handles_empty_title() {
|
|
|
|
|
let (_, user) = build_article_classify_prompt("", "Some content", &["Tech".into(), "Autre".into()]);
|
|
|
|
|
assert!(user.contains("(pas de titre)"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn article_extraction_prompt_includes_content() {
|
|
|
|
|
let (_, user) = build_article_extraction_prompt("<meta name='date'>", "Article body here");
|
|
|
|
|
|