feat: dynamic summary length and body snippet size based on setting

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
master
oabrivard 3 months ago
parent bf07b049f3
commit 91272ddfc4

@ -150,6 +150,7 @@ pub fn build_article_classify_prompt(
title: &str, title: &str,
body_snippet: &str, body_snippet: &str,
categories: &[String], categories: &[String],
summary_length: i32,
) -> (String, String) { ) -> (String, String) {
let system_prompt = let system_prompt =
"Tu es un assistant qui analyse des articles d'actualite. \ "Tu es un assistant qui analyse des articles d'actualite. \
@ -163,6 +164,12 @@ pub fn build_article_classify_prompt(
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join("\n"); .join("\n");
let summary_instruction = match summary_length {
1 => "Genere un titre clair et un resume de 3 a 4 lignes.",
2 => "Genere un titre clair et un resume de 6 a 8 lignes.",
_ => "Genere un titre clair et un resume detaille de 12 a 15 lignes.",
};
let user_prompt = format!( let user_prompt = format!(
"Voici un article d'actualite.\n\n\ "Voici un article d'actualite.\n\n\
Titre : {title}\n\n\ Titre : {title}\n\n\
@ -170,11 +177,12 @@ pub fn build_article_classify_prompt(
Categories disponibles :\n{categories}\n\n\ Categories disponibles :\n{categories}\n\n\
Classe cet article dans la categorie la plus appropriee.\n\ Classe cet article dans la categorie la plus appropriee.\n\
Si aucune categorie ne correspond, utilise \"Autre\".\n\ Si aucune categorie ne correspond, utilise \"Autre\".\n\
Genere un titre clair et un resume de 4 a 5 lignes.\n\ {summary_instruction}\n\
Si le titre fourni est vide, genere un titre a partir du contenu.", Si le titre fourni est vide, genere un titre a partir du contenu.",
title = if title.is_empty() { "(pas de titre)" } else { title }, title = if title.is_empty() { "(pas de titre)" } else { title },
body = body_snippet, body = body_snippet,
categories = categories_list, categories = categories_list,
summary_instruction = summary_instruction,
); );
(system_prompt, user_prompt) (system_prompt, user_prompt)
@ -375,6 +383,7 @@ mod tests {
"GPT-5 Released", "GPT-5 Released",
"OpenAI released GPT-5 today", "OpenAI released GPT-5 today",
&["AI News".into(), "Autre".into()], &["AI News".into(), "Autre".into()],
3,
); );
assert!(user.contains("GPT-5 Released")); assert!(user.contains("GPT-5 Released"));
assert!(user.contains("AI News")); assert!(user.contains("AI News"));
@ -384,8 +393,20 @@ mod tests {
#[test] #[test]
fn article_classify_prompt_handles_empty_title() { fn article_classify_prompt_handles_empty_title() {
let (_, user) = build_article_classify_prompt("", "Some content", &["Tech".into(), "Autre".into()]); let (_, user) = build_article_classify_prompt("", "Some content", &["Tech".into(), "Autre".into()], 3);
assert!(user.contains("(pas de titre)")); assert!(user.contains("(pas de titre)"));
} }
#[test]
fn article_classify_prompt_short_summary() {
let (_, user) = build_article_classify_prompt("Title", "Content", &["AI".into()], 1);
assert!(user.contains("3 a 4 lignes"));
}
#[test]
fn article_classify_prompt_detailed_summary() {
let (_, user) = build_article_classify_prompt("Title", "Content", &["AI".into()], 3);
assert!(user.contains("12 a 15 lignes"));
}
} }

@ -484,7 +484,12 @@ pub async fn run_generation_inner(
let model = Arc::clone(&model_research); let model = Arc::clone(&model_research);
let schema = Arc::clone(&classify_schema); let schema = Arc::clone(&classify_schema);
let cats = Arc::clone(&classification_categories); let cats = Arc::clone(&classification_categories);
let body_snippet: String = body_text.chars().take(500).collect(); let snippet_size = match settings.summary_length {
1 => 500,
2 => 2000,
_ => 4000,
};
let body_snippet: String = body_text.chars().take(snippet_size).collect();
let title = page_title.clone(); let title = page_title.clone();
let url = final_url.clone(); let url = final_url.clone();
let su = source_url.clone(); let su = source_url.clone();
@ -492,7 +497,7 @@ pub async fn run_generation_inner(
let uid = user_id; let uid = user_id;
let jid = job_id; let jid = job_id;
let (sys, usr) = crate::services::prompts::build_article_classify_prompt(&title, &body_snippet, &cats); let (sys, usr) = crate::services::prompts::build_article_classify_prompt(&title, &body_snippet, &cats, settings.summary_length);
classify_set.spawn(async move { classify_set.spawn(async move {
let llm_start = std::time::Instant::now(); let llm_start = std::time::Instant::now();
@ -662,14 +667,19 @@ pub async fn run_generation_inner(
let model = Arc::clone(&model_research); let model = Arc::clone(&model_research);
let schema = Arc::clone(&classify_schema); let schema = Arc::clone(&classify_schema);
let cats = Arc::clone(&classification_categories); let cats = Arc::clone(&classification_categories);
let body_snippet: String = body_text.chars().take(500).collect(); let snippet_size = match settings.summary_length {
1 => 500,
2 => 2000,
_ => 4000,
};
let body_snippet: String = body_text.chars().take(snippet_size).collect();
let title = page_title.clone(); let title = page_title.clone();
let url = final_url.clone(); let url = final_url.clone();
let pool = state.pool.clone(); let pool = state.pool.clone();
let uid = user_id; let uid = user_id;
let jid = job_id; let jid = job_id;
let (sys, usr) = crate::services::prompts::build_article_classify_prompt(&title, &body_snippet, &cats); let (sys, usr) = crate::services::prompts::build_article_classify_prompt(&title, &body_snippet, &cats, settings.summary_length);
classify_set.spawn(async move { classify_set.spawn(async move {
let llm_start = std::time::Instant::now(); let llm_start = std::time::Instant::now();

Loading…
Cancel
Save