@ -18,9 +18,12 @@ use serde::Serialize;
use tokio ::sync ::watch ;
use tokio ::sync ::watch ;
use uuid ::Uuid ;
use uuid ::Uuid ;
use url ::Url ;
use crate ::app_state ::AppState ;
use crate ::app_state ::AppState ;
use crate ::db ;
use crate ::db ;
use crate ::errors ::AppError ;
use crate ::errors ::AppError ;
use crate ::models ::settings ::UserSettings ;
use crate ::models ::synthesis ::{
use crate ::models ::synthesis ::{
get_iso_week_string , NewsItem , NewsSection , ScrapedNewsItem ,
get_iso_week_string , NewsItem , NewsSection , ScrapedNewsItem ,
} ;
} ;
@ -267,19 +270,28 @@ async fn run_generation_inner(
// Step 3: Resolve provider + decrypt API key
// Step 3: Resolve provider + decrypt API key
emit_progress ( tx , "provider" , "Configuration du fournisseur IA..." , 15 ) ;
emit_progress ( tx , "provider" , "Configuration du fournisseur IA..." , 15 ) ;
let ( provider_name , api_key ) = resolve_provider_and_key ( state , user_id ). await ? ;
let ( provider_name , api_key ) = resolve_provider_and_key ( state , user_id , & settings ). await ? ;
let provider = create_provider ( & provider_name , api_key , & state . http_client ) ? ;
let provider = create_provider ( & provider_name , api_key , & state . http_client ) ? ;
// Step 4: Build schema from categories
// Step 4: Build schema from categories
let schema = build_category_schema ( & settings . categories ) ;
let schema = build_category_schema ( & settings . categories ) ;
// Step 4b: Resolve models — user overrides take priority over admin config
let model_research = if ! settings . ai_model . is_empty ( ) {
settings . ai_model . clone ( )
} else {
resolve_model ( state , & provider_name ) . await ?
} ;
let model_writing = if ! settings . ai_model_writing . is_empty ( ) {
settings . ai_model_writing . clone ( )
} else {
model_research . clone ( )
} ;
// Step 5: Rate limit check (pass 1)
// Step 5: Rate limit check (pass 1)
if ! state . provider_rate_limiter . check ( & provider_name ) {
// User overrides take priority over global rate limiter
return Err ( AppError ::RateLimited (
check_rate_limit ( state , & settings , & provider_name ) ? ;
"Limite de requetes atteinte. Veuillez reessayer dans quelques instants." . into ( ) ,
) ) ;
}
// Step 6: LLM search pass
// Step 6: LLM search pass
emit_progress ( tx , "search" , "Recherche d'actualites en cours..." , 30 ) ;
emit_progress ( tx , "search" , "Recherche d'actualites en cours..." , 30 ) ;
@ -289,16 +301,17 @@ async fn run_generation_inner(
let ( system_prompt , user_prompt ) =
let ( system_prompt , user_prompt ) =
prompts ::build_search_prompt ( & settings , & sources , & current_date ) ;
prompts ::build_search_prompt ( & settings , & sources , & current_date ) ;
let model = resolve_model ( state , & provider_name ) . await ? ;
let raw_results = provider
let raw_results = provider
. generate_search_pass ( & model , & system_prompt , & user_prompt , & schema )
. generate_search_pass ( & model_research , & system_prompt , & user_prompt , & schema )
. await ? ;
. await ? ;
// Step 7: Parse structured output into (category_key, Vec<NewsItem>)
// Step 7: Parse structured output into (category_key, Vec<NewsItem>)
emit_progress ( tx , "parsing" , "Analyse des resultats..." , 40 ) ;
emit_progress ( tx , "parsing" , "Analyse des resultats..." , 40 ) ;
let parsed = parse_llm_output ( & raw_results , & settings . categories ) ? ;
let parsed = parse_llm_output ( & raw_results , & settings . categories ) ? ;
// Step 7b: Filter out homepage URLs (path == "/" or empty)
let parsed = filter_homepage_urls ( parsed ) ;
// Step 8: Adaptive pipeline — decide whether to scrape+rewrite or use search results directly
// Step 8: Adaptive pipeline — decide whether to scrape+rewrite or use search results directly
//
//
// If the provider supports native web search and the search pass produced high-quality
// If the provider supports native web search and the search pass produced high-quality
@ -322,19 +335,14 @@ async fn run_generation_inner(
let scraped = scrape_articles ( state , & parsed , settings . max_age_days as i64 , tx ) . await ;
let scraped = scrape_articles ( state , & parsed , settings . max_age_days as i64 , tx ) . await ;
// Rate limit check (pass 2)
// Rate limit check (pass 2)
if ! state . provider_rate_limiter . check ( & provider_name ) {
check_rate_limit ( state , & settings , & provider_name ) ? ;
return Err ( AppError ::RateLimited (
"Limite de requetes atteinte pour la passe de reecriture. Veuillez reessayer."
. into ( ) ,
) ) ;
}
// LLM rewrite pass
// LLM rewrite pass
emit_progress ( tx , "rewrite" , "Redaction des resumes..." , 80 ) ;
emit_progress ( tx , "rewrite" , "Redaction des resumes..." , 80 ) ;
let ( rewrite_system , rewrite_user ) = prompts ::build_rewrite_prompt ( & scraped ) ;
let ( rewrite_system , rewrite_user ) = prompts ::build_rewrite_prompt ( & scraped ) ;
let final_results = provider
let final_results = provider
. generate_rewrite_pass ( & model , & rewrite_system , & rewrite_user , & schema )
. generate_rewrite_pass ( & model _writing , & rewrite_system , & rewrite_user , & schema )
. await ? ;
. await ? ;
emit_progress ( tx , "finalizing" , "Finalisation..." , 90 ) ;
emit_progress ( tx , "finalizing" , "Finalisation..." , 90 ) ;
@ -368,13 +376,126 @@ fn emit_progress(tx: &watch::Sender<ProgressEvent>, step: &str, message: &str, p
. ok ( ) ;
. ok ( ) ;
}
}
/// Check rate limits, using user overrides if configured, otherwise the global limiter.
///
/// When the user has both `rate_limit_max_requests` and `rate_limit_time_window_seconds`
/// set, a temporary per-user rate limiter is created with those values. Otherwise the
/// global provider rate limiter is used.
fn check_rate_limit (
state : & AppState ,
settings : & UserSettings ,
provider_name : & str ,
) -> Result < ( ) , AppError > {
match (
settings . rate_limit_max_requests ,
settings . rate_limit_time_window_seconds ,
) {
( Some ( max_req ) , Some ( window_sec ) ) = > {
// Create a temporary rate limiter with user's config
let user_limiter = crate ::services ::rate_limiter ::RateLimiter ::new (
max_req as usize ,
Duration ::from_secs ( window_sec as u64 ) ,
) ;
let key = format! ( "user_gen_{}" , provider_name ) ;
if ! user_limiter . check ( & key ) {
return Err ( AppError ::RateLimited (
"Limite de requetes personnalisee atteinte. Veuillez reessayer dans quelques instants." . into ( ) ,
) ) ;
}
Ok ( ( ) )
}
_ = > {
if ! state . provider_rate_limiter . check ( provider_name ) {
return Err ( AppError ::RateLimited (
"Limite de requetes atteinte. Veuillez reessayer dans quelques instants."
. into ( ) ,
) ) ;
}
Ok ( ( ) )
}
}
}
/// Filter out articles whose URL is a homepage (path is "/" or empty).
///
/// Homepage URLs are typically not useful as article sources and indicate
/// the LLM returned a domain root rather than a specific article.
fn filter_homepage_urls (
parsed : Vec < ( String , Vec < NewsItem > ) > ,
) -> Vec < ( String , Vec < NewsItem > ) > {
let mut total_filtered = 0 usize ;
let result : Vec < ( String , Vec < NewsItem > ) > = parsed
. into_iter ( )
. map ( | ( cat_key , items ) | {
let filtered : Vec < NewsItem > = items
. into_iter ( )
. filter ( | item | {
match Url ::parse ( & item . url ) {
Ok ( parsed_url ) = > {
let path = parsed_url . path ( ) ;
if path = = "/" | | path . is_empty ( ) {
total_filtered + = 1 ;
false
} else {
true
}
}
Err ( _ ) = > true , // Keep items with unparseable URLs (handled elsewhere)
}
} )
. collect ( ) ;
( cat_key , filtered )
} )
. collect ( ) ;
if total_filtered > 0 {
tracing ::warn ! (
count = total_filtered ,
"Filtered out homepage URLs from search results"
) ;
}
result
}
/// Resolve the LLM provider and decrypt the user's API key.
/// Resolve the LLM provider and decrypt the user's API key.
///
///
/// Looks up the user's API key for the first available provider.
/// If the user has a preferred provider in settings, looks for a key matching
/// that provider specifically. Otherwise falls back to the first available key.
async fn resolve_provider_and_key (
async fn resolve_provider_and_key (
state : & AppState ,
state : & AppState ,
user_id : Uuid ,
user_id : Uuid ,
settings : & UserSettings ,
) -> Result < ( String , String ) , AppError > {
) -> Result < ( String , String ) , AppError > {
let master_key = encryption ::MasterKey ::from_hex ( & state . config . master_encryption_key ) ? ;
// If the user has a preferred provider, look for that specific key
if ! settings . ai_provider . is_empty ( ) {
let key_record = db ::api_keys ::get_for_user_and_provider (
& state . pool ,
user_id ,
& settings . ai_provider ,
)
. await ? ;
match key_record {
Some ( record ) = > {
let api_key =
encryption ::decrypt ( & master_key , & record . encrypted_key , & record . nonce ) ? ;
return Ok ( ( record . provider_name . clone ( ) , api_key ) ) ;
}
None = > {
return Err ( AppError ::BadRequest ( format! (
" Aucune cle API configuree pour le fournisseur ' { } ' . \
Veuillez ajouter une cle API pour ce fournisseur dans vos parametres . " ,
settings . ai_provider
) ) ) ;
}
}
}
// Fall back to first available key
let keys = db ::api_keys ::list_for_user ( & state . pool , user_id ) . await ? ;
let keys = db ::api_keys ::list_for_user ( & state . pool , user_id ) . await ? ;
if keys . is_empty ( ) {
if keys . is_empty ( ) {
@ -383,9 +504,7 @@ async fn resolve_provider_and_key(
) ) ;
) ) ;
}
}
// Use the first available key
let key_record = & keys [ 0 ] ;
let key_record = & keys [ 0 ] ;
let master_key = encryption ::MasterKey ::from_hex ( & state . config . master_encryption_key ) ? ;
let api_key = encryption ::decrypt (
let api_key = encryption ::decrypt (
& master_key ,
& master_key ,
& key_record . encrypted_key ,
& key_record . encrypted_key ,
@ -509,11 +628,12 @@ async fn scrape_articles(
pct as u8 ,
pct as u8 ,
) ;
) ;
if let Ok ( ( cat_key , item , scraped_content ) ) = join_result {
if let Ok ( ( cat_key , item , ( scraped_content , page_title ) ) ) = join_result {
let scraped_item = ScrapedNewsItem {
let scraped_item = ScrapedNewsItem {
title : item . title ,
title : item . title ,
url : item . url ,
url : item . url ,
summary : item . summary ,
summary : item . summary ,
original_title : page_title ,
scraped_content ,
scraped_content ,
} ;
} ;
@ -538,7 +658,7 @@ async fn scrape_articles(
result
result
}
}
/// Scrape a single article URL, returning the body text or an empty string on failure.
/// Scrape a single article URL, returning (body_text, page_title) or empty strings on failure.
///
///
/// Handles all failure modes gracefully:
/// Handles all failure modes gracefully:
/// - Network errors → empty content (article kept)
/// - Network errors → empty content (article kept)
@ -548,24 +668,25 @@ async fn scrape_single_article(
http_client : & reqwest ::Client ,
http_client : & reqwest ::Client ,
url : & str ,
url : & str ,
max_age_days : i64 ,
max_age_days : i64 ,
) -> String {
) -> ( String , String ) {
match scraper ::scrape_url ( http_client , url ) . await {
match scraper ::scrape_url ( http_client , url ) . await {
Ok ( content ) = > {
Ok ( content ) = > {
if ! content . ok | | content . is_soft_404 {
if ! content . ok | | content . is_soft_404 {
tracing ::warn ! ( url = url , "Soft 404 or error page detected, skipping content" ) ;
tracing ::warn ! ( url = url , "Soft 404 or error page detected, skipping content" ) ;
return String ::new ( ) ;
return ( String ::new ( ) , String ::new ( ) ) ;
}
}
if scraper ::is_article_too_old ( content . published_date , max_age_days ) {
if scraper ::is_article_too_old ( content . published_date , max_age_days ) {
tracing ::warn ! ( url = url , "Article too old, skipping content" ) ;
tracing ::warn ! ( url = url , "Article too old, skipping content" ) ;
return String ::new ( ) ;
return ( String ::new ( ) , String ::new ( ) ) ;
}
}
content . body_text
let title = content . title . unwrap_or_default ( ) ;
( content . body_text , title )
}
}
Err ( e ) = > {
Err ( e ) = > {
tracing ::warn ! ( url = url , error = % e , "Failed to scrape URL, keeping article with empty content" ) ;
tracing ::warn ! ( url = url , error = % e , "Failed to scrape URL, keeping article with empty content" ) ;
String ::new ( )
( String ::new ( ) , String ::new ( ) )
}
}
}
}
}
}
@ -1062,4 +1183,71 @@ mod tests {
let parsed : Vec < ( String , Vec < NewsItem > ) > = vec! [ ] ;
let parsed : Vec < ( String , Vec < NewsItem > ) > = vec! [ ] ;
assert! ( ! url_quality_sufficient ( & parsed ) ) ;
assert! ( ! url_quality_sufficient ( & parsed ) ) ;
}
}
// ── filter_homepage_urls tests ──────────────────────────────
#[ test ]
fn test_homepage_url_filtered ( ) {
let parsed = vec! [ (
"category_0" . into ( ) ,
vec! [
NewsItem {
title : "Homepage" . into ( ) ,
url : "https://example.com/" . into ( ) ,
summary : "Sum" . into ( ) ,
} ,
NewsItem {
title : "Homepage no slash" . into ( ) ,
url : "https://example.com" . into ( ) ,
summary : "Sum" . into ( ) ,
} ,
NewsItem {
title : "Real article" . into ( ) ,
url : "https://example.com/article/123" . into ( ) ,
summary : "Sum" . into ( ) ,
} ,
] ,
) ] ;
let result = filter_homepage_urls ( parsed ) ;
assert_eq! ( result [ 0 ] . 1. len ( ) , 1 ) ;
assert_eq! ( result [ 0 ] . 1 [ 0 ] . title , "Real article" ) ;
}
#[ test ]
fn test_article_url_not_filtered ( ) {
let parsed = vec! [ (
"category_0" . into ( ) ,
vec! [
NewsItem {
title : "Article 1" . into ( ) ,
url : "https://example.com/news/article-1" . into ( ) ,
summary : "Sum 1" . into ( ) ,
} ,
NewsItem {
title : "Article 2" . into ( ) ,
url : "https://blog.example.org/2026/03/post" . into ( ) ,
summary : "Sum 2" . into ( ) ,
} ,
] ,
) ] ;
let result = filter_homepage_urls ( parsed ) ;
assert_eq! ( result [ 0 ] . 1. len ( ) , 2 ) ;
}
#[ test ]
fn test_homepage_filter_keeps_unparseable_urls ( ) {
let parsed = vec! [ (
"category_0" . into ( ) ,
vec! [ NewsItem {
title : "Bad URL" . into ( ) ,
url : "not-a-url" . into ( ) ,
summary : "Sum" . into ( ) ,
} ] ,
) ] ;
let result = filter_homepage_urls ( parsed ) ;
assert_eq! ( result [ 0 ] . 1. len ( ) , 1 ) ;
}
}
}