diff --git a/backend/src/services/scraper.rs b/backend/src/services/scraper.rs index 0c138bc..e03788b 100644 --- a/backend/src/services/scraper.rs +++ b/backend/src/services/scraper.rs @@ -245,7 +245,12 @@ fn validate_scheme(url: &url::Url) -> Result<(), AppError> { /// Perform SSRF checks by resolving the URL's hostname and verifying /// that none of the resolved IP addresses are private, loopback, /// or link-local. +/// +/// Skipped when `SKIP_SSRF_CHECK=1` is set (integration tests with wiremock). pub async fn check_ssrf(url: &url::Url) -> Result<(), AppError> { + if std::env::var("SKIP_SSRF_CHECK").is_ok() { + return Ok(()); + } let host = url .host_str() .ok_or_else(|| AppError::BadRequest("URL has no host".into()))?; diff --git a/backend/tests/api_syntheses_test.rs b/backend/tests/api_syntheses_test.rs index 964814e..bb9bc77 100644 --- a/backend/tests/api_syntheses_test.rs +++ b/backend/tests/api_syntheses_test.rs @@ -13,7 +13,6 @@ mod common; -use axum::body::Body; use axum::http::StatusCode; fn require_test_db() -> bool { diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs index e9eba57..449cf89 100644 --- a/backend/tests/pipeline_test.rs +++ b/backend/tests/pipeline_test.rs @@ -10,6 +10,20 @@ use wiremock::{Mock, MockServer, ResponseTemplate}; async fn setup_mock_server() -> MockServer { let server = MockServer::start().await; + // Source page with links to articles (for Phase 1 heuristic extraction) + let base = server.uri(); + Mock::given(method("GET")) + .and(path("/blog")) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#" + Article One + Article Two + Article Three + "# + ))) + .mount(&server) + .await; + // Article pages for i in 1..=5 { Mock::given(method("GET")) @@ -77,12 +91,13 @@ async fn phase1_with_llm_link_extraction_classifies_articles() { // Use LLM link extraction to bypass SSRF on source page let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await; - // Add a source (URL doesn't matter much — LLM mock will return article URLs) - let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"}); + // Add a source pointing to wiremock (same host as article URLs) + let source_url = format!("{}/blog", mock_server.uri()); + let source = serde_json::json!({"title": "Test Source", "url": source_url}); let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await; assert!(status.is_success()); - // Mock provider: LLM link extraction returns wiremock article URLs + // Mock provider: LLM link extraction returns wiremock article URLs (same domain) let article_urls: Vec = (1..=3) .map(|i| format!("{}/article-{}", mock_server.uri(), i)) .collect(); @@ -187,7 +202,8 @@ async fn category_overflow_spills_to_autre() { // max_items_per_category=1, but LLM classifies all articles to "AI News" let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await; - let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"}); + let source_url = format!("{}/blog", mock_server.uri()); + let source = serde_json::json!({"title": "Test Source", "url": source_url}); app.post_with_session("/api/v1/sources", &source, &session).await; let article_urls: Vec = (1..=3) diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 07a05d1..2570934 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -26,6 +26,7 @@ PG_PASS="testpassword" PG_DB="ai_synth_test" export TEST_DATABASE_URL="postgres://${PG_USER}:${PG_PASS}@${PG_HOST}:${PG_PORT}/${PG_DB}" +export SKIP_SSRF_CHECK=1 # Allow wiremock on localhost for pipeline tests # ── DB check mode ────────────────────────────────────────────────── if [ "${1:-}" = "--db-check" ]; then