feat: add per-article classify/summarize prompt and schema

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
master
oabrivard 3 months ago
parent 825b793387
commit b2dbc3847a

@ -107,6 +107,20 @@ pub fn build_classification_schema() -> Value {
}) })
} }
/// Build a JSON Schema for per-article classification and summarization.
pub fn build_article_classify_schema() -> Value {
serde_json::json!({
"type": "object",
"properties": {
"title": { "type": "string", "description": "Article title" },
"summary": { "type": "string", "description": "4-5 line summary of the article" },
"category": { "type": "string", "description": "Category name from the provided list" }
},
"required": ["title", "summary", "category"],
"additionalProperties": false
})
}
/// Build a JSON Schema for LLM link extraction response. /// Build a JSON Schema for LLM link extraction response.
pub fn build_link_extraction_schema() -> Value { pub fn build_link_extraction_schema() -> Value {
serde_json::json!({ serde_json::json!({
@ -330,6 +344,16 @@ mod tests {
assert_eq!(schema["additionalProperties"], false); assert_eq!(schema["additionalProperties"], false);
} }
#[test]
fn article_classify_schema_has_all_fields() {
let schema = build_article_classify_schema();
let props = schema["properties"].as_object().unwrap();
assert!(props.contains_key("title"));
assert!(props.contains_key("summary"));
assert!(props.contains_key("category"));
assert_eq!(schema["additionalProperties"], false);
}
#[test] #[test]
fn link_extraction_schema_has_urls_array() { fn link_extraction_schema_has_urls_array() {
let schema = build_link_extraction_schema(); let schema = build_link_extraction_schema();

@ -205,6 +205,43 @@ pub fn build_article_extraction_prompt(head_html: &str, body_text: &str) -> (Str
(system_prompt, user_prompt) (system_prompt, user_prompt)
} }
/// Build a prompt for per-article classification and summarization.
///
/// The LLM classifies the article into a category and generates a title + summary.
pub fn build_article_classify_prompt(
title: &str,
body_snippet: &str,
categories: &[String],
) -> (String, String) {
let system_prompt =
"Tu es un assistant qui analyse des articles d'actualite. \
Tu dois classer l'article dans une categorie et generer un titre et un resume. \
Reponds uniquement au format JSON demande."
.to_string();
let categories_list = categories
.iter()
.map(|c| format!("- \"{}\"", c))
.collect::<Vec<_>>()
.join("\n");
let user_prompt = format!(
"Voici un article d'actualite.\n\n\
Titre : {title}\n\n\
Contenu (extrait) :\n{body}\n\n\
Categories disponibles :\n{categories}\n\n\
Classe cet article dans la categorie la plus appropriee.\n\
Si aucune categorie ne correspond, utilise \"Autre\".\n\
Genere un titre clair et un resume de 4 a 5 lignes.\n\
Si le titre fourni est vide, genere un titre a partir du contenu.",
title = if title.is_empty() { "(pas de titre)" } else { title },
body = body_snippet,
categories = categories_list,
);
(system_prompt, user_prompt)
}
/// Build a prompt for classifying scraped articles into categories. /// Build a prompt for classifying scraped articles into categories.
/// ///
/// # Arguments /// # Arguments
@ -528,6 +565,25 @@ mod tests {
assert!(user.len() < 15000); assert!(user.len() < 15000);
} }
#[test]
fn article_classify_prompt_includes_content() {
let (sys, user) = build_article_classify_prompt(
"GPT-5 Released",
"OpenAI released GPT-5 today",
&["AI News".into(), "Autre".into()],
);
assert!(user.contains("GPT-5 Released"));
assert!(user.contains("AI News"));
assert!(user.contains("Autre"));
assert!(sys.contains("classer"));
}
#[test]
fn article_classify_prompt_handles_empty_title() {
let (_, user) = build_article_classify_prompt("", "Some content", &["Tech".into(), "Autre".into()]);
assert!(user.contains("(pas de titre)"));
}
#[test] #[test]
fn article_extraction_prompt_includes_content() { fn article_extraction_prompt_includes_content() {
let (_, user) = build_article_extraction_prompt("<meta name='date'>", "Article body here"); let (_, user) = build_article_extraction_prompt("<meta name='date'>", "Article body here");

Loading…
Cancel
Save