From 24d53a01d1abd8ccdfcf2d469c1b1fb19a6a1cb8 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Thu, 26 Mar 2026 00:45:45 +0100 Subject: [PATCH] fix: block SSRF via IPv4-mapped IPv6 and add check to source page fetching Co-Authored-By: Claude Sonnet 4.6 --- backend/src/services/scraper.rs | 39 +++++++++++++++++++++++++- backend/src/services/source_scraper.rs | 14 +++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/backend/src/services/scraper.rs b/backend/src/services/scraper.rs index 9e61aa2..1ac6fe0 100644 --- a/backend/src/services/scraper.rs +++ b/backend/src/services/scraper.rs @@ -240,7 +240,7 @@ fn validate_scheme(url: &url::Url) -> Result<(), AppError> { /// Perform SSRF checks by resolving the URL's hostname and verifying /// that none of the resolved IP addresses are private, loopback, /// or link-local. -async fn check_ssrf(url: &url::Url) -> Result<(), AppError> { +pub async fn check_ssrf(url: &url::Url) -> Result<(), AppError> { let host = url .host_str() .ok_or_else(|| AppError::BadRequest("URL has no host".into()))?; @@ -300,6 +300,13 @@ fn is_private_ip(ip: IpAddr) -> bool { || v4.is_unspecified() // 0.0.0.0 } IpAddr::V6(v6) => { + // Check for IPv4-mapped IPv6 addresses (::ffff:x.x.x.x) + if let Some(mapped_v4) = v6.to_ipv4_mapped() { + return mapped_v4.is_loopback() + || mapped_v4.is_private() + || mapped_v4.is_link_local() + || mapped_v4.is_unspecified(); + } let segments = v6.segments(); v6.is_loopback() // ::1 || v6.is_unspecified() // :: @@ -781,6 +788,36 @@ mod tests { assert!(!is_private_ip(ip)); } + #[test] + fn rejects_ipv4_mapped_ipv6_loopback() { + let ip: IpAddr = "::ffff:127.0.0.1".parse().unwrap(); + assert!(is_private_ip(ip)); + } + + #[test] + fn rejects_ipv4_mapped_ipv6_private_10() { + let ip: IpAddr = "::ffff:10.0.0.1".parse().unwrap(); + assert!(is_private_ip(ip)); + } + + #[test] + fn rejects_ipv4_mapped_ipv6_private_192() { + let ip: IpAddr = "::ffff:192.168.1.1".parse().unwrap(); + assert!(is_private_ip(ip)); + } + + #[test] + fn rejects_ipv4_mapped_ipv6_link_local() { + let ip: IpAddr = "::ffff:169.254.1.1".parse().unwrap(); + assert!(is_private_ip(ip)); + } + + #[test] + fn allows_ipv4_mapped_ipv6_public() { + let ip: IpAddr = "::ffff:8.8.8.8".parse().unwrap(); + assert!(!is_private_ip(ip)); + } + // ── Soft-404 Detection ────────────────────────────────────────── #[test] diff --git a/backend/src/services/source_scraper.rs b/backend/src/services/source_scraper.rs index 35e9320..5caa0e7 100644 --- a/backend/src/services/source_scraper.rs +++ b/backend/src/services/source_scraper.rs @@ -36,6 +36,13 @@ pub async fn extract_article_links( ) -> Result, AppError> { let base_url = Url::parse(source_url) .map_err(|e| AppError::BadRequest(format!("Invalid source URL: {}", e)))?; + + // SSRF check before fetching + if let Err(e) = crate::services::scraper::check_ssrf(&base_url).await { + tracing::warn!(url = source_url, error = %e, "Source URL failed SSRF check"); + return Ok(Vec::new()); + } + let base_domain = base_url.host_str().unwrap_or("").to_lowercase(); let response = http_client @@ -194,6 +201,13 @@ pub async fn extract_article_links_with_llm( ) -> Result, AppError> { let base_url = Url::parse(source_url) .map_err(|e| AppError::BadRequest(format!("Invalid source URL: {}", e)))?; + + // SSRF check before fetching + if let Err(e) = crate::services::scraper::check_ssrf(&base_url).await { + tracing::warn!(url = source_url, error = %e, "Source URL failed SSRF check"); + return Ok(Vec::new()); + } + let base_domain = base_url.host_str().unwrap_or("").to_lowercase(); let response = http_client.get(source_url).send().await.map_err(|e| {