From f44aa44c48bbf0192401a0cdcb976f587322ebfa Mon Sep 17 00:00:00 2001 From: oabrivard Date: Thu, 26 Mar 2026 01:19:05 +0100 Subject: [PATCH] refactor: replace trace_article 11 parameters with ArticleTrace struct Co-Authored-By: Claude Sonnet 4.6 --- backend/src/services/synthesis.rs | 92 +++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 29 deletions(-) diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index 16a426b..1a71780 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -363,7 +363,11 @@ async fn run_generation_inner( if !existing.is_empty() { for (url, source_url) in &candidate_urls { if existing.contains(&hash_article_url(url)) { - trace_article(&state.pool, user_id, job_id, url, "", "personalized_source", Some(source_url), None, None, "filtered_history", false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url, title: "", source_type: "personalized_source", + source_url: Some(source_url), category: None, synthesis_id: None, + status: "filtered_history", scraped_ok: false, + }).await; } } candidate_urls.retain(|(url, _)| !existing.contains(&hash_article_url(url))); @@ -397,7 +401,11 @@ async fn run_generation_inner( let source_domain = extract_domain(&source_url).unwrap_or_default(); let source_count = source_counts.get(&source_domain).copied().unwrap_or(0); if source_count >= settings.max_articles_per_source as usize { - trace_article(&state.pool, user_id, job_id, &url, "", "personalized_source", Some(&source_url), None, None, "filtered_diversity", false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &url, title: "", source_type: "personalized_source", + source_url: Some(&source_url), category: None, synthesis_id: None, + status: "filtered_diversity", scraped_ok: false, + }).await; continue; } batch.push((url, source_url)); @@ -427,7 +435,11 @@ async fn run_generation_inner( while let Some(join_result) = scrape_set.join_next().await { if let Ok((_url, source_url, (body_text, page_title, final_url, drop_reason))) = join_result { if let Some(reason) = drop_reason { - trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "personalized_source", Some(&source_url), None, None, reason, false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &final_url, title: &page_title, source_type: "personalized_source", + source_url: Some(&source_url), category: None, synthesis_id: None, + status: reason, scraped_ok: false, + }).await; } else { scraped_articles.push((final_url, source_url, body_text, page_title)); } @@ -542,7 +554,11 @@ async fn run_generation_inner( &state.pool, user_id, &result.url, &seen_urls, &source_counts, settings.article_history_days, settings.max_articles_per_source as usize, ).await { - trace_article(&state.pool, user_id, job_id, &result.url, &result.title, "brave_search", None, None, None, reason, false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &result.url, title: &result.title, source_type: "brave_search", + source_url: None, category: None, synthesis_id: None, + status: reason, scraped_ok: false, + }).await; continue; } @@ -588,7 +604,11 @@ async fn run_generation_inner( while let Some(join_result) = scrape_set.join_next().await { if let Ok((_url, (body_text, page_title, final_url, drop_reason))) = join_result { if let Some(reason) = drop_reason { - trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "brave_search", None, None, None, reason, false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &final_url, title: &page_title, source_type: "brave_search", + source_url: None, category: None, synthesis_id: None, + status: reason, scraped_ok: false, + }).await; } else { scraped_articles.push((final_url, body_text, page_title)); } @@ -699,7 +719,11 @@ async fn run_generation_inner( &state.pool, user_id, &item.url, &seen_urls, &source_counts, settings.article_history_days, settings.max_articles_per_source as usize, ).await { - trace_article(&state.pool, user_id, job_id, &item.url, &item.title, "web_search", None, None, None, reason, false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &item.url, title: &item.title, source_type: "web_search", + source_url: None, category: None, synthesis_id: None, + status: reason, scraped_ok: false, + }).await; continue; } @@ -714,7 +738,11 @@ async fn run_generation_inner( let (_body_text, _, final_url, drop_reason) = scrape_single_article(&state.http_client, &item.url, settings.max_age_days as i64).await; if let Some(reason) = drop_reason { - trace_article(&state.pool, user_id, job_id, &final_url, &item.title, "web_search", None, None, None, reason, false).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &final_url, title: &item.title, source_type: "web_search", + source_url: None, category: None, synthesis_id: None, + status: reason, scraped_ok: false, + }).await; continue; } @@ -766,10 +794,12 @@ async fn run_generation_inner( Some(_) => "personalized_source", None => "web_search", }; - trace_article(&state.pool, user_id, job_id, &item.url, &item.title, - source_type, - if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None }, - Some(§ion.title), Some(synthesis.id), "used", true).await; + trace_article(&state.pool, user_id, job_id, &ArticleTrace { + url: &item.url, title: &item.title, source_type, + source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None }, + category: Some(§ion.title), synthesis_id: Some(synthesis.id), + status: "used", scraped_ok: true, + }).await; } } } @@ -810,32 +840,36 @@ fn emit_progress(tx: &watch::Sender, step: &str, message: &str, p .ok(); } +/// Structured parameters for article history tracing. +struct ArticleTrace<'a> { + url: &'a str, + title: &'a str, + source_type: &'a str, + source_url: Option<&'a str>, + category: Option<&'a str>, + synthesis_id: Option, + status: &'a str, + scraped_ok: bool, +} + /// Insert a trace entry into article_history for debugging pipeline behavior. -#[allow(clippy::too_many_arguments)] async fn trace_article( pool: &sqlx::PgPool, user_id: Uuid, job_id: Uuid, - url: &str, - title: &str, - source_type: &str, - source_url: Option<&str>, - category: Option<&str>, - synthesis_id: Option, - status: &str, - scraped_ok: bool, + trace: &ArticleTrace<'_>, ) { let entry = db::article_history::ArticleHistoryEntry { user_id, - url: url.to_string(), - url_hash: hash_article_url(url), - title: title.to_string(), - source_type: source_type.to_string(), - source_url: source_url.map(|s| s.to_string()), - category: category.map(|s| s.to_string()), - synthesis_id, - status: status.to_string(), - scraped_ok, + url: trace.url.to_string(), + url_hash: hash_article_url(trace.url), + title: trace.title.to_string(), + source_type: trace.source_type.to_string(), + source_url: trace.source_url.map(|s| s.to_string()), + category: trace.category.map(|s| s.to_string()), + synthesis_id: trace.synthesis_id, + status: trace.status.to_string(), + scraped_ok: trace.scraped_ok, job_id, }; db::article_history::insert_entry(pool, &entry).await.ok();