diff --git a/backend/tests/api_llm_logs_test.rs b/backend/tests/api_llm_logs_test.rs new file mode 100644 index 0000000..a98dfae --- /dev/null +++ b/backend/tests/api_llm_logs_test.rs @@ -0,0 +1,141 @@ +//! Integration tests for the LLM logs endpoint (GAP-2). +//! +//! Tests: +//! - GET /api/v1/llm-logs/:job_id +//! +//! The handler first checks that a synthesis with the given job_id exists and +//! belongs to the authenticated user, then returns the associated log entries. +//! A random/unknown job_id therefore returns 404 (not an empty array). +//! +//! Requires a running Postgres instance. Set `TEST_DATABASE_URL` to run. + +mod common; + +use axum::http::StatusCode; + +fn require_test_db() -> bool { + std::env::var("TEST_DATABASE_URL").is_ok() +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Auth (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +#[tokio::test] +async fn get_llm_logs_without_auth_returns_401() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + let fake_job_id = uuid::Uuid::new_v4(); + + let (status, body) = app + .get_with_session( + &format!("/api/v1/llm-logs/{}", fake_job_id), + "invalid-session-token", + ) + .await; + + assert_eq!( + status, + StatusCode::UNAUTHORIZED, + "GET /llm-logs/:job_id without auth should return 401" + ); + assert_eq!(body["error"], "unauthorized"); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Not found (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +/// The handler first verifies the job_id maps to a synthesis owned by the +/// authenticated user. A random UUID that has no matching synthesis in the DB +/// returns 404. This is intentional — it prevents enumeration of job IDs. +#[tokio::test] +async fn get_llm_logs_returns_404_for_unknown_job() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + let (_user_id, session) = app + .create_authenticated_user("llm-logs-404@example.com") + .await; + + let fake_job_id = uuid::Uuid::new_v4(); + let (status, body) = app + .get_with_session(&format!("/api/v1/llm-logs/{}", fake_job_id), &session) + .await; + + assert_eq!( + status, + StatusCode::NOT_FOUND, + "GET /llm-logs/:job_id for an unknown job_id should return 404" + ); + assert_eq!(body["error"], "not_found"); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Happy path (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +/// Verify that when a synthesis exists for the given job_id, the endpoint +/// returns 200 with a JSON array (the log entries, which may be empty if no +/// LLM calls were recorded for the synthesis created directly via helper). +#[tokio::test] +async fn get_llm_logs_returns_array_for_known_job() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + let (user_id, session) = app + .create_authenticated_user("llm-logs-array@example.com") + .await; + + // Insert a test synthesis with a known job_id directly into the database. + // The `insert_test_synthesis` helper uses a random job_id internally; we + // need to insert our own to control the job_id used for the log lookup. + let job_id = uuid::Uuid::new_v4(); + let sections = serde_json::json!([{ + "title": "AI News", + "items": [{"title": "Article 1", "url": "https://example.com/1", "summary": "Summary"}] + }]); + let synthesis_id: (uuid::Uuid,) = sqlx::query_as( + "INSERT INTO syntheses (user_id, week, sections, status, job_id) + VALUES ($1, $2, $3, 'completed', $4) + RETURNING id", + ) + .bind(user_id) + .bind("2026-W13") + .bind(§ions) + .bind(job_id) + .fetch_one(&app.pool) + .await + .expect("Failed to insert test synthesis"); + let _ = synthesis_id; // verify it was inserted; we only need the job_id for the request + + let (status, body) = app + .get_with_session(&format!("/api/v1/llm-logs/{}", job_id), &session) + .await; + + assert_eq!( + status, + StatusCode::OK, + "GET /llm-logs/:job_id for a known synthesis should return 200" + ); + assert!( + body.as_array().is_some(), + "Response should be a JSON array, got: {}", body + ); + // No LLM calls were made for this synthesis (inserted directly), so the + // array is empty — but the important thing is it's a valid array. + assert!( + body.as_array().unwrap().is_empty(), + "Log array should be empty for a synthesis with no recorded LLM calls" + ); +} diff --git a/backend/tests/api_stop_generation_test.rs b/backend/tests/api_stop_generation_test.rs new file mode 100644 index 0000000..b1e88c0 --- /dev/null +++ b/backend/tests/api_stop_generation_test.rs @@ -0,0 +1,292 @@ +//! Integration tests for the stop generation endpoint (GAP-1). +//! +//! Tests: +//! - POST /api/v1/syntheses/generate/:job_id/stop — stop a running job +//! +//! Covers authentication, ownership isolation, non-existent jobs, +//! and stopping an active job. +//! +//! Requires a running Postgres instance. Set `TEST_DATABASE_URL` to run. + +mod common; + +use axum::http::StatusCode; + +fn require_test_db() -> bool { + std::env::var("TEST_DATABASE_URL").is_ok() +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Auth (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +#[tokio::test] +async fn stop_without_auth_returns_401() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + let fake_job_id = uuid::Uuid::new_v4(); + + let (status, body) = app + .post_with_session( + &format!("/api/v1/syntheses/generate/{}/stop", fake_job_id), + &serde_json::json!({}), + "invalid-session-token", + ) + .await; + + assert_eq!( + status, + StatusCode::UNAUTHORIZED, + "POST /syntheses/generate/:id/stop without auth should return 401" + ); + assert_eq!(body["error"], "unauthorized"); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Not found (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +#[tokio::test] +async fn stop_nonexistent_job_returns_404() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + let (_user_id, session) = app + .create_authenticated_user("stop-404@example.com") + .await; + + let fake_job_id = uuid::Uuid::new_v4(); + let (status, body) = app + .post_with_session( + &format!("/api/v1/syntheses/generate/{}/stop", fake_job_id), + &serde_json::json!({}), + &session, + ) + .await; + + assert_eq!( + status, + StatusCode::NOT_FOUND, + "Stopping a non-existent job should return 404" + ); + assert_eq!(body["error"], "not_found"); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Stop active job (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +/// Verify that stopping an active generation job returns 200. +/// +/// The generation will fail at the LLM call (fake API key), but the job_id +/// is registered in the job store immediately on trigger, so the stop +/// endpoint should find it and return 200. +#[tokio::test] +async fn stop_active_job_returns_200() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + let (_user_id, session) = app + .create_authenticated_user("stop-active@example.com") + .await; + + // Configure provider settings + let settings = serde_json::json!({ + "max_articles_per_source": 3, + "max_links_per_source": 8, + "use_brave_search": false, + "article_history_days": 90, + "batch_size": 5, + "source_extraction_window": 3, + "search_agent_behavior": "", + "ai_provider": "openai", + "ai_model": "", + "ai_model_websearch": "", + "rate_limit_max_requests": null, + "rate_limit_time_window_seconds": null + }); + let (settings_status, _) = app + .put_with_session("/api/v1/settings", &settings, &session) + .await; + assert_eq!(settings_status, StatusCode::OK, "Settings save should succeed"); + + // Create a theme + let theme_body = serde_json::json!({ + "name": "Stop Test Theme", + "theme": "Intelligence Artificielle", + "categories": ["AI News"], + "max_items_per_category": 4, + "max_age_days": 7, + "summary_length": 3 + }); + let (theme_status, theme_resp) = app + .post_with_session("/api/v1/themes", &theme_body, &session) + .await; + assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed"); + let theme_id = theme_resp["id"].as_str().expect("Theme should have an id"); + + // Store a fake API key so the pipeline can start + let key_body = serde_json::json!({ + "provider_name": "openai", + "api_key": "sk-fake-test-key-for-stop-test" + }); + let (key_status, _) = app + .post_with_session("/api/v1/user/api-keys", &key_body, &session) + .await; + assert_eq!(key_status, StatusCode::OK, "API key store should succeed"); + + // Add a source so the pipeline has something to process + let source_body = serde_json::json!({ + "title": "Stop Test Source", + "url": "https://example.com/blog", + "theme_id": theme_id + }); + let (source_status, _) = app + .post_with_session("/api/v1/sources", &source_body, &session) + .await; + assert_eq!(source_status, StatusCode::CREATED, "Source creation should succeed"); + + // Trigger generation — returns 202 immediately, job runs async + let gen_body = serde_json::json!({ "theme_id": theme_id }); + let (gen_status, gen_resp) = app + .post_with_session("/api/v1/syntheses/generate", &gen_body, &session) + .await; + assert_eq!( + gen_status, + StatusCode::ACCEPTED, + "Generation trigger should return 202" + ); + let job_id = gen_resp["job_id"].as_str().expect("should have job_id"); + + // Immediately stop the job — it's registered in the job store at trigger time, + // so this should succeed even if generation hasn't finished yet. + let (stop_status, _) = app + .post_with_session( + &format!("/api/v1/syntheses/generate/{}/stop", job_id), + &serde_json::json!({}), + &session, + ) + .await; + + assert_eq!( + stop_status, + StatusCode::OK, + "Stopping an active job should return 200" + ); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Ownership isolation (1 test) +// ═══════════════════════════════════════════════════════════════════════════ + +/// Verify that User B cannot stop User A's generation job. +/// +/// The stop endpoint uses `cancel_job(job_id, user_id)` which checks ownership, +/// so it returns 404 if the job exists but belongs to a different user. +#[tokio::test] +async fn stop_other_users_job_returns_404() { + if !require_test_db() { + eprintln!("SKIPPED: TEST_DATABASE_URL not set"); + return; + } + + let app = common::TestApp::new().await; + + // User A: create user + settings + theme + api key, then trigger generation + let (_user_a_id, session_a) = app + .create_authenticated_user("stop-owner-a@example.com") + .await; + let (_user_b_id, session_b) = app + .create_authenticated_user("stop-owner-b@example.com") + .await; + + // Configure User A's settings + let settings = serde_json::json!({ + "max_articles_per_source": 3, + "max_links_per_source": 8, + "use_brave_search": false, + "article_history_days": 90, + "batch_size": 5, + "source_extraction_window": 3, + "search_agent_behavior": "", + "ai_provider": "openai", + "ai_model": "", + "ai_model_websearch": "", + "rate_limit_max_requests": null, + "rate_limit_time_window_seconds": null + }); + let (settings_status, _) = app + .put_with_session("/api/v1/settings", &settings, &session_a) + .await; + assert_eq!(settings_status, StatusCode::OK, "User A settings save should succeed"); + + // Create theme for User A + let theme_body = serde_json::json!({ + "name": "Owner A Theme", + "theme": "Intelligence Artificielle", + "categories": ["AI News"], + "max_items_per_category": 4, + "max_age_days": 7, + "summary_length": 3 + }); + let (theme_status, theme_resp) = app + .post_with_session("/api/v1/themes", &theme_body, &session_a) + .await; + assert_eq!(theme_status.as_u16(), 201, "User A theme creation should succeed"); + let theme_id_a = theme_resp["id"].as_str().expect("Theme should have an id"); + + // Store fake API key for User A + let key_body = serde_json::json!({ + "provider_name": "openai", + "api_key": "sk-fake-test-key-for-ownership-test" + }); + let (key_status, _) = app + .post_with_session("/api/v1/user/api-keys", &key_body, &session_a) + .await; + assert_eq!(key_status, StatusCode::OK, "User A API key store should succeed"); + + // Add source for User A + let source_body = serde_json::json!({ + "title": "Owner A Source", + "url": "https://example-a.com/blog", + "theme_id": theme_id_a + }); + let (source_status, _) = app + .post_with_session("/api/v1/sources", &source_body, &session_a) + .await; + assert_eq!(source_status, StatusCode::CREATED, "User A source creation should succeed"); + + // User A triggers generation + let gen_body = serde_json::json!({ "theme_id": theme_id_a }); + let (gen_status, gen_resp) = app + .post_with_session("/api/v1/syntheses/generate", &gen_body, &session_a) + .await; + assert_eq!(gen_status, StatusCode::ACCEPTED, "User A generation trigger should return 202"); + let job_id_a = gen_resp["job_id"].as_str().expect("should have job_id"); + + // User B tries to stop User A's job — should return 404 (ownership check) + let (stop_status, stop_body) = app + .post_with_session( + &format!("/api/v1/syntheses/generate/{}/stop", job_id_a), + &serde_json::json!({}), + &session_b, + ) + .await; + + assert_eq!( + stop_status, + StatusCode::NOT_FOUND, + "User B should not be able to stop User A's job (expected 404)" + ); + assert_eq!(stop_body["error"], "not_found"); +} diff --git a/backend/tests/pipeline_test.rs b/backend/tests/pipeline_test.rs index 7b68f8f..6301b88 100644 --- a/backend/tests/pipeline_test.rs +++ b/backend/tests/pipeline_test.rs @@ -500,3 +500,212 @@ async fn article_history_dedup_prevents_repeat_articles() { "Second run should have history-deduped articles (got 0)" ); } + +// ── GAP-08: Preferred source ordering ───────────────────────────────── +// +// The pipeline places preferred sources before non-preferred ones in the +// processing order (`ordered_sources = [preferred, non_preferred].concat()`). +// Within each wave, articles from preferred source URLs are also placed first +// before being shuffled independently within their group (preferred_urls and +// other_urls are each shuffled separately). +// +// Deterministic ordering at the individual article level cannot be guaranteed +// because articles within the preferred group are randomly shuffled. +// +// What we can verify deterministically: +// - With a preferred source and a non-preferred source, both contribute +// articles to the synthesis (i.e., preferred ordering does not prevent +// non-preferred sources from being processed). +// - The article_history table records entries from both sources. +// +// A test that tries to assert "article from preferred source appears before +// article from non-preferred source in the synthesis" would be flaky due to +// the intentional shuffle within each group. + +#[tokio::test] +async fn preferred_sources_processed_first() { + let app = common::TestApp::new().await; + + // Set up two source pages on the mock server: + // /blog-a (preferred) with /article-pref + // /blog-b (non-preferred) with /article-norm + let server = wiremock::MockServer::start().await; + let base = server.uri(); + + // Preferred source page + Mock::given(method("GET")) + .and(path("/blog-a")) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#"Preferred Article"# + ))) + .mount(&server) + .await; + + // Non-preferred source page + Mock::given(method("GET")) + .and(path("/blog-b")) + .respond_with(ResponseTemplate::new(200).set_body_string(format!( + r#"Normal Article"# + ))) + .mount(&server) + .await; + + // Article pages + Mock::given(method("GET")) + .and(path("/article-pref")) + .respond_with(ResponseTemplate::new(200).set_body_string( + r#"Preferred Article +

This is a preferred article about artificial intelligence research.

"# + .to_string(), + )) + .mount(&server) + .await; + + Mock::given(method("GET")) + .and(path("/article-norm")) + .respond_with(ResponseTemplate::new(200).set_body_string( + r#"Normal Article +

This is a normal article about machine learning and AI systems.

"# + .to_string(), + )) + .mount(&server) + .await; + + let email = format!("preferred-order-{}@test.com", uuid::Uuid::new_v4()); + let (user_id, session) = app.create_authenticated_user(&email).await; + + // Create theme + let theme_body = serde_json::json!({ + "name": "Preferred Test Theme", + "theme": "Intelligence Artificielle", + "categories": ["AI News"], + "max_items_per_category": 10, + "max_age_days": 365, + "summary_length": 1 + }); + let (theme_status, theme_resp) = app + .post_with_session("/api/v1/themes", &theme_body, &session) + .await; + assert_eq!(theme_status.as_u16(), 201, "Theme creation should succeed"); + let theme_id: uuid::Uuid = theme_resp["id"].as_str().unwrap().parse().unwrap(); + + // Settings: batch_size=1, source_extraction_window=10 so both sources + // are processed in a single wave, article_history_days=0 to disable dedup + let settings = serde_json::json!({ + "max_articles_per_source": 10, + "max_links_per_source": 8, + "use_brave_search": false, + "article_history_days": 0, + "batch_size": 1, + "source_extraction_window": 10, + "search_agent_behavior": "", + "ai_provider": "", + "ai_model": "", + "ai_model_websearch": "", + "rate_limit_max_requests": null, + "rate_limit_time_window_seconds": null + }); + let (settings_status, _) = app + .put_with_session("/api/v1/settings", &settings, &session) + .await; + assert_eq!(settings_status.as_u16(), 200, "Settings save should succeed"); + + // Create source A (will be marked preferred) + let source_a_body = serde_json::json!({ + "title": "Source A (preferred)", + "url": format!("{}/blog-a", base), + "theme_id": theme_id.to_string() + }); + let (status_a, resp_a) = app + .post_with_session("/api/v1/sources", &source_a_body, &session) + .await; + assert!(status_a.is_success(), "Source A creation should succeed"); + let source_a_id = resp_a["id"].as_str().expect("Source A should have an id"); + + // Create source B (non-preferred) + let source_b_body = serde_json::json!({ + "title": "Source B (normal)", + "url": format!("{}/blog-b", base), + "theme_id": theme_id.to_string() + }); + let (status_b, _) = app + .post_with_session("/api/v1/sources", &source_b_body, &session) + .await; + assert!(status_b.is_success(), "Source B creation should succeed"); + + // Mark source A as preferred + let pref_body = serde_json::json!({ "source_ids": [source_a_id] }); + let (pref_status, _) = app + .put_with_session("/api/v1/sources/preferred", &pref_body, &session) + .await; + assert_eq!(pref_status.as_u16(), 200, "Setting preferred sources should succeed"); + + // Run the pipeline + let mock_provider = MockLlmProvider::new() + .with_default_category("AI News") + .into_arc(); + + let job_id = uuid::Uuid::new_v4(); + let (tx, _rx) = make_progress_channel(); + + let state = ai_synth_backend::app_state::AppState::new( + app.config.clone(), + app.pool.clone(), + reqwest::Client::new(), + ); + + let result = synthesis::run_generation_inner( + job_id, + &state, + user_id, + theme_id, + &tx, + Some(mock_provider), + &AtomicBool::new(false), + ) + .await; + + assert!(result.is_ok(), "Generation should succeed: {:?}", result.err()); + + // Verify both sources contributed articles to article_history. + // This confirms the preferred-first ordering does not prevent non-preferred + // sources from being processed. Asserting the exact order within the + // synthesis is not done here because articles within each group are + // randomly shuffled by the pipeline. + let used_count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM article_history WHERE user_id = $1 AND job_id = $2 AND status = 'used'", + ) + .bind(user_id) + .bind(job_id) + .fetch_one(&app.pool) + .await + .unwrap(); + + assert!( + used_count.0 >= 2, + "Both preferred and non-preferred source articles should appear in history (got {})", + used_count.0 + ); + + // Verify the synthesis has articles from both sources + let synthesis_id = result.unwrap(); + let row: (serde_json::Value,) = + sqlx::query_as("SELECT sections FROM syntheses WHERE id = $1") + .bind(synthesis_id) + .fetch_one(&app.pool) + .await + .unwrap(); + + let sections: Vec = serde_json::from_value(row.0).unwrap(); + let total_items: usize = sections + .iter() + .filter_map(|s| s["items"].as_array()) + .map(|items| items.len()) + .sum(); + + assert!( + total_items >= 2, + "Synthesis should contain articles from both sources (got {})", + total_items + ); +}