|
|
|
@ -435,7 +435,7 @@ async fn run_generation_inner(
|
|
|
|
|
|
|
|
|
|
|
|
let mut scraped_articles: Vec<(String, String, String, String)> = Vec::new(); // (url, source_url, body_text, page_title)
|
|
|
|
let mut scraped_articles: Vec<(String, String, String, String)> = Vec::new(); // (url, source_url, body_text, page_title)
|
|
|
|
while let Some(join_result) = scrape_set.join_next().await {
|
|
|
|
while let Some(join_result) = scrape_set.join_next().await {
|
|
|
|
if let Ok((url, source_url, (body_text, page_title, final_url, drop_reason))) = join_result {
|
|
|
|
if let Ok((_url, source_url, (body_text, page_title, final_url, drop_reason))) = join_result {
|
|
|
|
if let Some(reason) = drop_reason {
|
|
|
|
if let Some(reason) = drop_reason {
|
|
|
|
trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "personalized_source", Some(&source_url), None, None, reason, false).await;
|
|
|
|
trace_article(&state.pool, user_id, job_id, &final_url, &page_title, "personalized_source", Some(&source_url), None, None, reason, false).await;
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
@ -617,7 +617,7 @@ async fn run_generation_inner(
|
|
|
|
// Scrape Phase 2 for validation
|
|
|
|
// Scrape Phase 2 for validation
|
|
|
|
emit_progress(tx, "scraping", "Verification des sources web...", 80);
|
|
|
|
emit_progress(tx, "scraping", "Verification des sources web...", 80);
|
|
|
|
for (cat_key, item) in phase2_items {
|
|
|
|
for (cat_key, item) in phase2_items {
|
|
|
|
let (body_text, _, final_url, drop_reason) = scrape_single_article(&state.http_client, &item.url, settings.max_age_days as i64).await;
|
|
|
|
let (_body_text, _, final_url, drop_reason) = scrape_single_article(&state.http_client, &item.url, settings.max_age_days as i64).await;
|
|
|
|
|
|
|
|
|
|
|
|
if let Some(reason) = drop_reason {
|
|
|
|
if let Some(reason) = drop_reason {
|
|
|
|
trace_article(&state.pool, user_id, job_id, &final_url, &item.title, "web_search", None, None, None, reason, false).await;
|
|
|
|
trace_article(&state.pool, user_id, job_id, &final_url, &item.title, "web_search", None, None, None, reason, false).await;
|
|
|
|
@ -837,8 +837,6 @@ async fn check_rate_limit(
|
|
|
|
tracing::info!(wait_ms = wait.as_millis() as u64, "Rate limited, waiting...");
|
|
|
|
tracing::info!(wait_ms = wait.as_millis() as u64, "Rate limited, waiting...");
|
|
|
|
tokio::time::sleep(wait).await;
|
|
|
|
tokio::time::sleep(wait).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Extract the domain (host) from a URL, or None if unparseable.
|
|
|
|
/// Extract the domain (host) from a URL, or None if unparseable.
|
|
|
|
|