refactor: replace trace_article 11 parameters with ArticleTrace struct

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
master
oabrivard 3 months ago
parent f5466a6bd5
commit f44aa44c48

@ -363,7 +363,11 @@ async fn run_generation_inner(
if !existing.is_empty() {
for (url, source_url) in &candidate_urls {
if existing.contains(&hash_article_url(url)) {
trace_article(&state.pool, user_id, job_id, url, "", "personalized_source", Some(source_url), None, None, "filtered_history", false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url, title: "", source_type: "personalized_source",
source_url: Some(source_url), category: None, synthesis_id: None,
status: "filtered_history", scraped_ok: false,
}).await;
}
}
candidate_urls.retain(|(url, _)| !existing.contains(&hash_article_url(url)));
@ -397,7 +401,11 @@ async fn run_generation_inner(
let source_domain = extract_domain(&source_url).unwrap_or_default();
let source_count = source_counts.get(&source_domain).copied().unwrap_or(0);
if source_count >= settings.max_articles_per_source as usize {
trace_article(&state.pool, user_id, job_id, &url, "", "personalized_source", Some(&source_url), None, None, "filtered_diversity", false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &url, title: "", source_type: "personalized_source",
source_url: Some(&source_url), category: None, synthesis_id: None,
status: "filtered_diversity", scraped_ok: false,
}).await;
continue;
}
batch.push((url, source_url));
@ -427,7 +435,11 @@ async fn run_generation_inner(
while let Some(join_result) = scrape_set.join_next().await {
if let Ok((_url, source_url, (body_text, page_title, final_url, drop_reason))) = join_result {
if let Some(reason) = drop_reason {
trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "personalized_source", Some(&source_url), None, None, reason, false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &final_url, title: &page_title, source_type: "personalized_source",
source_url: Some(&source_url), category: None, synthesis_id: None,
status: reason, scraped_ok: false,
}).await;
} else {
scraped_articles.push((final_url, source_url, body_text, page_title));
}
@ -542,7 +554,11 @@ async fn run_generation_inner(
&state.pool, user_id, &result.url, &seen_urls, &source_counts,
settings.article_history_days, settings.max_articles_per_source as usize,
).await {
trace_article(&state.pool, user_id, job_id, &result.url, &result.title, "brave_search", None, None, None, reason, false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &result.url, title: &result.title, source_type: "brave_search",
source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false,
}).await;
continue;
}
@ -588,7 +604,11 @@ async fn run_generation_inner(
while let Some(join_result) = scrape_set.join_next().await {
if let Ok((_url, (body_text, page_title, final_url, drop_reason))) = join_result {
if let Some(reason) = drop_reason {
trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "brave_search", None, None, None, reason, false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &final_url, title: &page_title, source_type: "brave_search",
source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false,
}).await;
} else {
scraped_articles.push((final_url, body_text, page_title));
}
@ -699,7 +719,11 @@ async fn run_generation_inner(
&state.pool, user_id, &item.url, &seen_urls, &source_counts,
settings.article_history_days, settings.max_articles_per_source as usize,
).await {
trace_article(&state.pool, user_id, job_id, &item.url, &item.title, "web_search", None, None, None, reason, false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &item.url, title: &item.title, source_type: "web_search",
source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false,
}).await;
continue;
}
@ -714,7 +738,11 @@ async fn run_generation_inner(
let (_body_text, _, final_url, drop_reason) = scrape_single_article(&state.http_client, &item.url, settings.max_age_days as i64).await;
if let Some(reason) = drop_reason {
trace_article(&state.pool, user_id, job_id, &final_url, &item.title, "web_search", None, None, None, reason, false).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &final_url, title: &item.title, source_type: "web_search",
source_url: None, category: None, synthesis_id: None,
status: reason, scraped_ok: false,
}).await;
continue;
}
@ -766,10 +794,12 @@ async fn run_generation_inner(
Some(_) => "personalized_source",
None => "web_search",
};
trace_article(&state.pool, user_id, job_id, &item.url, &item.title,
source_type,
if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None },
Some(&section.title), Some(synthesis.id), "used", true).await;
trace_article(&state.pool, user_id, job_id, &ArticleTrace {
url: &item.url, title: &item.title, source_type,
source_url: if source_type == "personalized_source" { url_source.get(&item.url).map(|s| s.as_str()) } else { None },
category: Some(&section.title), synthesis_id: Some(synthesis.id),
status: "used", scraped_ok: true,
}).await;
}
}
}
@ -810,32 +840,36 @@ fn emit_progress(tx: &watch::Sender<ProgressEvent>, step: &str, message: &str, p
.ok();
}
/// Structured parameters for article history tracing.
struct ArticleTrace<'a> {
url: &'a str,
title: &'a str,
source_type: &'a str,
source_url: Option<&'a str>,
category: Option<&'a str>,
synthesis_id: Option<Uuid>,
status: &'a str,
scraped_ok: bool,
}
/// Insert a trace entry into article_history for debugging pipeline behavior.
#[allow(clippy::too_many_arguments)]
async fn trace_article(
pool: &sqlx::PgPool,
user_id: Uuid,
job_id: Uuid,
url: &str,
title: &str,
source_type: &str,
source_url: Option<&str>,
category: Option<&str>,
synthesis_id: Option<Uuid>,
status: &str,
scraped_ok: bool,
trace: &ArticleTrace<'_>,
) {
let entry = db::article_history::ArticleHistoryEntry {
user_id,
url: url.to_string(),
url_hash: hash_article_url(url),
title: title.to_string(),
source_type: source_type.to_string(),
source_url: source_url.map(|s| s.to_string()),
category: category.map(|s| s.to_string()),
synthesis_id,
status: status.to_string(),
scraped_ok,
url: trace.url.to_string(),
url_hash: hash_article_url(trace.url),
title: trace.title.to_string(),
source_type: trace.source_type.to_string(),
source_url: trace.source_url.map(|s| s.to_string()),
category: trace.category.map(|s| s.to_string()),
synthesis_id: trace.synthesis_id,
status: trace.status.to_string(),
scraped_ok: trace.scraped_ok,
job_id,
};
db::article_history::insert_entry(pool, &entry).await.ok();

Loading…
Cancel
Save