fix: pipeline tests use wiremock URLs + skip SSRF for localhost

- Add SKIP_SSRF_CHECK env var to bypass SSRF in test environments
- Use wiremock server as source URL (same domain as article URLs)
- Add source page mock to wiremock setup
- Set SKIP_SSRF_CHECK=1 in integration test script
- Fix unused import warning

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent a158f14311
commit 0874650a7f

@ -245,7 +245,12 @@ fn validate_scheme(url: &url::Url) -> Result<(), AppError> {
/// Perform SSRF checks by resolving the URL's hostname and verifying /// Perform SSRF checks by resolving the URL's hostname and verifying
/// that none of the resolved IP addresses are private, loopback, /// that none of the resolved IP addresses are private, loopback,
/// or link-local. /// or link-local.
///
/// Skipped when `SKIP_SSRF_CHECK=1` is set (integration tests with wiremock).
pub async fn check_ssrf(url: &url::Url) -> Result<(), AppError> { pub async fn check_ssrf(url: &url::Url) -> Result<(), AppError> {
if std::env::var("SKIP_SSRF_CHECK").is_ok() {
return Ok(());
}
let host = url let host = url
.host_str() .host_str()
.ok_or_else(|| AppError::BadRequest("URL has no host".into()))?; .ok_or_else(|| AppError::BadRequest("URL has no host".into()))?;

@ -13,7 +13,6 @@
mod common; mod common;
use axum::body::Body;
use axum::http::StatusCode; use axum::http::StatusCode;
fn require_test_db() -> bool { fn require_test_db() -> bool {

@ -10,6 +10,20 @@ use wiremock::{Mock, MockServer, ResponseTemplate};
async fn setup_mock_server() -> MockServer { async fn setup_mock_server() -> MockServer {
let server = MockServer::start().await; let server = MockServer::start().await;
// Source page with links to articles (for Phase 1 heuristic extraction)
let base = server.uri();
Mock::given(method("GET"))
.and(path("/blog"))
.respond_with(ResponseTemplate::new(200).set_body_string(format!(
r#"<html><body>
<a href="{base}/article-1">Article One</a>
<a href="{base}/article-2">Article Two</a>
<a href="{base}/article-3">Article Three</a>
</body></html>"#
)))
.mount(&server)
.await;
// Article pages // Article pages
for i in 1..=5 { for i in 1..=5 {
Mock::given(method("GET")) Mock::given(method("GET"))
@ -77,12 +91,13 @@ async fn phase1_with_llm_link_extraction_classifies_articles() {
// Use LLM link extraction to bypass SSRF on source page // Use LLM link extraction to bypass SSRF on source page
let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await; let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 4, true).await;
// Add a source (URL doesn't matter much — LLM mock will return article URLs) // Add a source pointing to wiremock (same host as article URLs)
let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"}); let source_url = format!("{}/blog", mock_server.uri());
let source = serde_json::json!({"title": "Test Source", "url": source_url});
let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await; let (status, _) = app.post_with_session("/api/v1/sources", &source, &session).await;
assert!(status.is_success()); assert!(status.is_success());
// Mock provider: LLM link extraction returns wiremock article URLs // Mock provider: LLM link extraction returns wiremock article URLs (same domain)
let article_urls: Vec<String> = (1..=3) let article_urls: Vec<String> = (1..=3)
.map(|i| format!("{}/article-{}", mock_server.uri(), i)) .map(|i| format!("{}/article-{}", mock_server.uri(), i))
.collect(); .collect();
@ -187,7 +202,8 @@ async fn category_overflow_spills_to_autre() {
// max_items_per_category=1, but LLM classifies all articles to "AI News" // max_items_per_category=1, but LLM classifies all articles to "AI News"
let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await; let (user_id, session) = setup_user_with_settings(&app, vec!["AI News"], 1, true).await;
let source = serde_json::json!({"title": "Test Source", "url": "https://example.com/blog"}); let source_url = format!("{}/blog", mock_server.uri());
let source = serde_json::json!({"title": "Test Source", "url": source_url});
app.post_with_session("/api/v1/sources", &source, &session).await; app.post_with_session("/api/v1/sources", &source, &session).await;
let article_urls: Vec<String> = (1..=3) let article_urls: Vec<String> = (1..=3)

@ -26,6 +26,7 @@ PG_PASS="testpassword"
PG_DB="ai_synth_test" PG_DB="ai_synth_test"
export TEST_DATABASE_URL="postgres://${PG_USER}:${PG_PASS}@${PG_HOST}:${PG_PORT}/${PG_DB}" export TEST_DATABASE_URL="postgres://${PG_USER}:${PG_PASS}@${PG_HOST}:${PG_PORT}/${PG_DB}"
export SKIP_SSRF_CHECK=1 # Allow wiremock on localhost for pipeline tests
# ── DB check mode ────────────────────────────────────────────────── # ── DB check mode ──────────────────────────────────────────────────
if [ "${1:-}" = "--db-check" ]; then if [ "${1:-}" = "--db-check" ]; then

Loading…
Cancel
Save