diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs index 46d4988..9723469 100644 --- a/backend/tests/pipeline_test.rs +++ b/backend/tests/pipeline_test.rs @@ -869,3 +869,82 @@ async fn phase1_rss_feed_extraction_persists_rss_url() { assert!(rss_row2.0.is_some(), "rss_url should still be set after second generation"); } + +// ── Site search fallback ───────────────────────────────────────────── + +#[tokio::test] +async fn phase1_site_search_fallback_when_source_returns_no_links() { + let app = common::TestApp::new().await; + let server = MockServer::start().await; + let base = server.uri(); + + // Source page that returns NO article links (simulates Cloudflare block / empty page) + Mock::given(method("GET")) + .and(path("/blocked-site")) + .respond_with(ResponseTemplate::new(200).set_body_string( + "
Please verify you are human.
" + )) + .mount(&server) + .await; + + // Article pages (discovered via LLM site search fallback) + for i in 1..=3 { + Mock::given(method("GET")) + .and(path(format!("/article-{}", i))) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#" +This is a fallback article {i} about artificial intelligence.
+ "# + ))) + .mount(&server) + .await; + } + + let (user_id, session, theme_id) = setup_user_with_settings(&app, vec!["AI News"], 4).await; + + // Add a source pointing to the blocked page + let source_url = format!("{}/blocked-site", base); + let source = serde_json::json!({ + "title": "Blocked Source", + "url": source_url, + "theme_id": theme_id.to_string() + }); + let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await; + assert!(status.is_success(), "Source creation should succeed"); + + // MockLlmProvider with search_urls simulates the LLM site search returning articles + let mock_provider = MockLlmProvider::new() + .with_default_category("AI News") + .with_search_urls(vec![ + format!("{}/article-1", base), + format!("{}/article-2", base), + format!("{}/article-3", base), + ]) + .into_arc(); + + let job_id = uuid::Uuid::new_v4(); + let (tx, _rx) = make_progress_channel(); + + let state = ai_synth_backend::app_state::AppState::new( + app.config.clone(), app.pool.clone(), reqwest::Client::new(), + ); + + let result = synthesis::run_generation_inner( + job_id, &state, user_id, theme_id, &tx, Some(mock_provider), &AtomicBool::new(false), + ).await; + + assert!(result.is_ok(), "Generation should succeed: {:?}", result.err()); + + // Verify article history has entries — either from site_search or Phase 2 + let history_count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2" + ) + .bind(user_id) + .bind(job_id) + .fetch_one(&app.pool) + .await + .unwrap(); + + assert!(history_count.0 > 0, "Should have article history entries from fallback"); +}