ai_synth/e2e/tests/generation-live.spec.ts

/**
 * E2E UAT: Synthesis generation with a real OpenAI API key.
 *
 * Exercises the full pipeline: settings, API key encryption, model resolution,
 * LLM call (search + rewrite), response parsing, and synthesis storage.
 *
 * Gated by OPENAI_TEST_API_KEY in e2e/.env.test — skips if not set.
 * Uses gpt-4o-mini to keep cost under $0.01 per run.
 */

import { test, expect } from '@playwright/test';
import type { Page } from '@playwright/test';
import * as dotenv from 'dotenv';
import * as path from 'path';
import { fileURLToPath } from 'url';
import { loginAsUser } from '../helpers/auth';

// Load .env.test from the e2e directory (ESM-compatible __dirname)
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
dotenv.config({ path: path.resolve(__dirname, '..', '.env.test') });

const OPENAI_KEY = process.env.OPENAI_TEST_API_KEY;

/** Helper to make an authenticated API call from the browser context. */
async function apiCall(
  page: Page,
  method: string,
  url: string,
  body?: object,
): Promise<{ status: number; data: any }> {
  return page.evaluate(
    async ({
      method,
      url,
      body,
    }: {
      method: string;
      url: string;
      body?: object;
    }) => {
      const options: RequestInit = {
        method,
        headers: {
          'Content-Type': 'application/json',
          'X-Requested-With': 'XMLHttpRequest',
        },
        credentials: 'same-origin',
      };
      if (body) {
        options.body = JSON.stringify(body);
      }
      const resp = await fetch(url, options);
      const text = await resp.text();
      let data: unknown = null;
      if (text) {
        try {
          data = JSON.parse(text);
        } catch {
          throw new Error(
            `Expected JSON from ${method} ${url} (status ${resp.status}), got: ${text.slice(0, 200)}`
          );
        }
      }
      return { status: resp.status, data };
    },
    { method, url, body },
  );
}

/** Wait for SSE generation to complete, return the synthesis_id. */
async function waitForGenerationComplete(
  page: Page,
  jobId: string,
  timeoutMs = 120_000,
): Promise<string> {
  return page.evaluate(
    async ({ jobId, timeoutMs }: { jobId: string; timeoutMs: number }) => {
      return new Promise<string>((resolve, reject) => {
        const timer = setTimeout(
          () => reject(new Error('Generation timed out')),
          timeoutMs,
        );
        const es = new EventSource(
          `/api/v1/syntheses/generate/${jobId}/progress`,
        );
        es.addEventListener('complete', (event: MessageEvent) => {
          clearTimeout(timer);
          es.close();
          try {
            const parsed = JSON.parse(event.data);
            resolve(parsed.synthesis_id);
          } catch {
            reject(new Error(`Invalid JSON in complete event: ${event.data}`));
          }
        });
        es.addEventListener('error', (event: MessageEvent) => {
          clearTimeout(timer);
          es.close();
          try {
            const parsed = JSON.parse(event.data);
            reject(new Error(`Generation failed: ${parsed.message}`));
          } catch {
            reject(new Error('Generation failed with unknown error'));
          }
        });
        es.onerror = () => {
          clearTimeout(timer);
          es.close();
          reject(new Error('SSE connection error'));
        };
      });
    },
    { jobId, timeoutMs },
  );
}

test.describe('Live generation with OpenAI', () => {
  test.skip(!OPENAI_KEY, 'OPENAI_TEST_API_KEY not set in e2e/.env.test');

  test('full generation pipeline produces valid synthesis', async ({
    page,
    request,
  }) => {
    test.setTimeout(180_000);

    // Set session cookie, then navigate to a stable page
    // OpenAI provider is already enabled from the migration seed data
    await loginAsUser(page);
    await page.goto('/', { waitUntil: 'domcontentloaded' });
    await page.waitForLoadState('domcontentloaded');

    // Step 2: Configure settings
    const settingsResp = await apiCall(page, 'PUT', '/api/v1/settings', {
      theme: 'Intelligence Artificielle',
      max_age_days: 30,
      categories: ['AI News'],
      max_items_per_category: 4,
      max_articles_per_source: 3,
      search_agent_behavior: '',
      ai_provider: 'openai',
      ai_model: 'gpt-4o-mini',
      ai_model_websearch: 'gpt-4o-mini',
      use_llm_for_source_links: false,
      use_brave_search: false,
      article_history_days: 90,
      batch_size: 5,
    });
    expect(settingsResp.status).toBe(200);

    // Step 3: Store the real OpenAI API key
    const keyResp = await apiCall(page, 'POST', '/api/v1/user/api-keys', {
      provider_name: 'openai',
      api_key: OPENAI_KEY,
    });
    expect(keyResp.status).toBe(200);

    // Step 4: Clean up existing sources, then add a fresh one
    const existingSources = await apiCall(page, 'GET', '/api/v1/sources');
    if (existingSources.data && Array.isArray(existingSources.data)) {
      for (const source of existingSources.data) {
        await apiCall(page, 'DELETE', `/api/v1/sources/${source.id}`);
      }
    }

    const sourceResp = await apiCall(page, 'POST', '/api/v1/sources', {
      title: 'OpenAI Blog',
      url: 'https://openai.com/blog',
    });
    expect(sourceResp.status).toBe(201);

    // Step 5: Trigger generation
    const genResp = await apiCall(
      page,
      'POST',
      '/api/v1/syntheses/generate',
    );
    expect(genResp.status).toBe(202);
    const jobId = genResp.data.job_id;
    expect(jobId).toBeTruthy();

    // Step 6: Wait for SSE completion
    const synthesisId = await waitForGenerationComplete(page, jobId);
    expect(synthesisId).toBeTruthy();

    // Step 7: Fetch the full synthesis
    const synthResp = await apiCall(
      page,
      'GET',
      `/api/v1/syntheses/${synthesisId}`,
    );
    expect(synthResp.status).toBe(200);
    const synthesis = synthResp.data;

    // Step 8: Validate structure
    expect(synthesis.status).toBe('completed');
    expect(synthesis.sections).toBeDefined();
    expect(synthesis.sections.length).toBeGreaterThanOrEqual(1);

    for (const section of synthesis.sections) {
      // Section has a title (category name)
      expect(section.title).toBeTruthy();
      expect(typeof section.title).toBe('string');

      // Section has items
      expect(section.items).toBeDefined();
      expect(section.items.length).toBeGreaterThanOrEqual(1);

      for (const item of section.items) {
        // Each item has a non-empty title
        expect(item.title).toBeTruthy();
        expect(typeof item.title).toBe('string');

        // Each item URL starts with http
        expect(item.url).toBeTruthy();
        expect(item.url.startsWith('http')).toBe(true);

        // No hallucinated URLs: should not point to Wikipedia or generic corporate pages
        expect(item.url).not.toContain('wikipedia.org');

        // Each item summary is non-trivial (> 50 chars) — no empty articles
        expect(item.summary).toBeTruthy();
        expect(item.summary.length).toBeGreaterThan(50);

        // Summary should not be placeholder text or empty content
        expect(item.summary.trim().length).toBeGreaterThan(50);
      }
    }

    // ═══════════════════════════════════════════════════════════════
    // Comprehensive synthesis validation
    // ═══════════════════════════════════════════════════════════════
    const allUrls: string[] = [];
    const domainCounts: Record<string, number> = {};

    for (const section of synthesis.sections) {
      for (const item of section.items) {
        allUrls.push(item.url);
        try {
          const domain = new URL(item.url).hostname;
          domainCounts[domain] = (domainCounts[domain] || 0) + 1;
        } catch {
          // URL parse failure — already caught by earlier assertions
        }
      }

      // Category article count check (including Autre)
      expect(section.items.length).toBeLessThanOrEqual(4); // max_items_per_category
    }

    // No duplicate URLs across all sections
    const uniqueUrls = new Set(allUrls);
    expect(uniqueUrls.size).toBe(allUrls.length);

    // No domain exceeds max_articles_per_source (3)
    for (const [domain, count] of Object.entries(domainCounts)) {
      expect(count).toBeLessThanOrEqual(3);
    }

    // Verify a sample of article links actually work (Playwright request API, no CORS issues)
    const sampleUrls = allUrls.slice(0, 3);
    for (const articleUrl of sampleUrls) {
      try {
        const resp = await request.head(articleUrl);
        expect(resp.status()).toBeGreaterThanOrEqual(200);
        expect(resp.status()).toBeLessThan(400);
      } catch {
        // Some sites block HEAD requests — skip rather than fail
      }
    }

    // ═══════════════════════════════════════════════════════════════
    // Provenance verification
    // ═══════════════════════════════════════════════════════════════
    const provResp = await apiCall(page, 'GET', `/api/v1/syntheses/${synthesisId}/provenance`);
    expect(provResp.status).toBe(200);
    const provenance = provResp.data;
    expect(Array.isArray(provenance)).toBe(true);
    expect(provenance.length).toBeGreaterThan(0);

    // At least some entries should be 'used'
    const usedEntries = provenance.filter((e: any) => e.status === 'used');
    expect(usedEntries.length).toBeGreaterThan(0);

    // Every used entry should have a synthesis_id matching this synthesis
    for (const entry of usedEntries) {
      expect(entry.synthesis_id).toBe(synthesisId);
      expect(entry.job_id).toBeTruthy();
    }

    // ═══════════════════════════════════════════════════════════════
    // LLM call logs verification
    // ═══════════════════════════════════════════════════════════════
    const logJobId = usedEntries[0]?.job_id;
    if (logJobId) {
      const logsResp = await apiCall(page, 'GET', `/api/v1/llm-logs/${logJobId}`);
      expect(logsResp.status).toBe(200);
      const logs = logsResp.data;
      expect(Array.isArray(logs)).toBe(true);
      expect(logs.length).toBeGreaterThan(0);

      // Should have at least a search call (classify_summarize may also
      // appear when the pipeline scrapes and classifies individual articles,
      // but native web grounding pipelines may only log 'search').
      const callTypes = logs.map((l: any) => l.call_type);
      expect(callTypes).toContain('search');

      // Each log entry should have model and timing
      for (const log of logs) {
        expect(log.model).toBeTruthy();
        expect(log.duration_ms).toBeGreaterThanOrEqual(0);
      }
    }
  });
});