From a760220d442590df91907bcabb543cc7d1996d71 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Wed, 25 Mar 2026 09:06:41 +0100 Subject: [PATCH] fix: log LLM calls for source link extraction in llm_call_log --- backend/src/services/source_scraper.rs | 26 +++++++++++++++++++++++--- backend/src/services/synthesis.rs | 8 ++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/backend/src/services/source_scraper.rs b/backend/src/services/source_scraper.rs index 9e8f547..e2e3d8f 100644 --- a/backend/src/services/source_scraper.rs +++ b/backend/src/services/source_scraper.rs @@ -13,8 +13,9 @@ use url::Url; /// Patterns in URL paths that indicate non-article pages. const EXCLUDED_PATH_PATTERNS: &[&str] = &[ - "/tag/", "/category/", "/author/", "/page/", "/login", "/signup", - "/privacy", "/terms", "/search", "/contact", "/about", + "/tag", "/category", "/author", "/page", "/login", "/signup", + "/privacy", "/terms", "/search", "/contact", "/about", "/topics", + "/archive", "/companies", "/events", "/company", "/event", "/collections", ]; /// File extensions that indicate static assets, not articles. @@ -137,6 +138,9 @@ pub async fn extract_article_links_with_llm( max_links: usize, provider: &Arc, model: &str, + pool: Option<&sqlx::PgPool>, + user_id: Option, + job_id: Option, ) -> Result, AppError> { let base_url = Url::parse(source_url) .map_err(|e| AppError::BadRequest(format!("Invalid source URL: {}", e)))?; @@ -160,7 +164,23 @@ pub async fn extract_article_links_with_llm( let (system, user) = build_link_extraction_prompt(&head_html, &body_html); let schema = build_link_extraction_schema(); - match provider.call_llm(model, &system, &user, &schema).await { + let llm_start = std::time::Instant::now(); + let llm_result = provider.call_llm(model, &system, &user, &schema).await; + let llm_duration = llm_start.elapsed().as_millis() as u64; + + // Log the LLM call if pool/user_id/job_id are provided + if let (Some(pool), Some(uid), Some(jid)) = (pool, user_id, job_id) { + let response_str = match &llm_result { + Ok(resp) => serde_json::to_string_pretty(resp).unwrap_or_default(), + Err(e) => format!("Error: {}", e), + }; + crate::db::llm_call_log::insert( + pool, uid, jid, "link_extraction", model, + &system, &user, &response_str, llm_duration as i32, + ).await.ok(); + } + + match llm_result { Ok(llm_response) => { let urls: Vec = llm_response .get("urls") diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index 998f5f8..0d35373 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -301,10 +301,14 @@ async fn run_generation_inner( let provider_clone = std::sync::Arc::clone(&provider); let model = model_research.clone(); let max_l = max_links; + let pool = state.pool.clone(); + let uid = user_id; + let jid = job_id; join_set.spawn(async move { let links = if use_llm { source_scraper::extract_article_links_with_llm( &client, &source_url, max_l, &provider_clone, &model, + Some(&pool), Some(uid), Some(jid), ).await } else { source_scraper::extract_article_links( @@ -342,10 +346,14 @@ async fn run_generation_inner( let provider_clone = std::sync::Arc::clone(&provider); let model = model_research.clone(); let max_l = max_links; + let pool = state.pool.clone(); + let uid = user_id; + let jid = job_id; join_set.spawn(async move { let links = if use_llm { source_scraper::extract_article_links_with_llm( &client, &source_url, max_l, &provider_clone, &model, + Some(&pool), Some(uid), Some(jid), ).await } else { source_scraper::extract_article_links(