You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
316 lines
11 KiB
TypeScript
316 lines
11 KiB
TypeScript
/**
|
|
* E2E UAT: Synthesis generation with a real OpenAI API key.
|
|
*
|
|
* Exercises the full pipeline: settings, API key encryption, model resolution,
|
|
* LLM call (search + rewrite), response parsing, and synthesis storage.
|
|
*
|
|
* Gated by OPENAI_TEST_API_KEY in e2e/.env.test — skips if not set.
|
|
* Uses gpt-4o-mini to keep cost under $0.01 per run.
|
|
*/
|
|
|
|
import { test, expect } from '@playwright/test';
|
|
import type { Page } from '@playwright/test';
|
|
import * as dotenv from 'dotenv';
|
|
import * as path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
import { loginAsUser } from '../helpers/auth';
|
|
|
|
// Load .env.test from the e2e directory (ESM-compatible __dirname)
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
dotenv.config({ path: path.resolve(__dirname, '..', '.env.test') });
|
|
|
|
const OPENAI_KEY = process.env.OPENAI_TEST_API_KEY;
|
|
|
|
/** Helper to make an authenticated API call from the browser context. */
|
|
async function apiCall(
|
|
page: Page,
|
|
method: string,
|
|
url: string,
|
|
body?: object,
|
|
): Promise<{ status: number; data: any }> {
|
|
return page.evaluate(
|
|
async ({
|
|
method,
|
|
url,
|
|
body,
|
|
}: {
|
|
method: string;
|
|
url: string;
|
|
body?: object;
|
|
}) => {
|
|
const options: RequestInit = {
|
|
method,
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
},
|
|
credentials: 'same-origin',
|
|
};
|
|
if (body) {
|
|
options.body = JSON.stringify(body);
|
|
}
|
|
const resp = await fetch(url, options);
|
|
const text = await resp.text();
|
|
let data: unknown = null;
|
|
if (text) {
|
|
try {
|
|
data = JSON.parse(text);
|
|
} catch {
|
|
throw new Error(
|
|
`Expected JSON from ${method} ${url} (status ${resp.status}), got: ${text.slice(0, 200)}`
|
|
);
|
|
}
|
|
}
|
|
return { status: resp.status, data };
|
|
},
|
|
{ method, url, body },
|
|
);
|
|
}
|
|
|
|
/** Wait for SSE generation to complete, return the synthesis_id. */
|
|
async function waitForGenerationComplete(
|
|
page: Page,
|
|
jobId: string,
|
|
timeoutMs = 120_000,
|
|
): Promise<string> {
|
|
return page.evaluate(
|
|
async ({ jobId, timeoutMs }: { jobId: string; timeoutMs: number }) => {
|
|
return new Promise<string>((resolve, reject) => {
|
|
const timer = setTimeout(
|
|
() => reject(new Error('Generation timed out')),
|
|
timeoutMs,
|
|
);
|
|
const es = new EventSource(
|
|
`/api/v1/syntheses/generate/${jobId}/progress`,
|
|
);
|
|
es.addEventListener('complete', (event: MessageEvent) => {
|
|
clearTimeout(timer);
|
|
es.close();
|
|
try {
|
|
const parsed = JSON.parse(event.data);
|
|
resolve(parsed.synthesis_id);
|
|
} catch {
|
|
reject(new Error(`Invalid JSON in complete event: ${event.data}`));
|
|
}
|
|
});
|
|
es.addEventListener('error', (event: MessageEvent) => {
|
|
clearTimeout(timer);
|
|
es.close();
|
|
try {
|
|
const parsed = JSON.parse(event.data);
|
|
reject(new Error(`Generation failed: ${parsed.message}`));
|
|
} catch {
|
|
reject(new Error('Generation failed with unknown error'));
|
|
}
|
|
});
|
|
es.onerror = () => {
|
|
clearTimeout(timer);
|
|
es.close();
|
|
reject(new Error('SSE connection error'));
|
|
};
|
|
});
|
|
},
|
|
{ jobId, timeoutMs },
|
|
);
|
|
}
|
|
|
|
test.describe('Live generation with OpenAI', () => {
|
|
test.skip(!OPENAI_KEY, 'OPENAI_TEST_API_KEY not set in e2e/.env.test');
|
|
|
|
test('full generation pipeline produces valid synthesis', async ({
|
|
page,
|
|
request,
|
|
}) => {
|
|
test.setTimeout(180_000);
|
|
|
|
// Set session cookie, then navigate to a stable page
|
|
// OpenAI provider is already enabled from the migration seed data
|
|
await loginAsUser(page);
|
|
await page.goto('/', { waitUntil: 'domcontentloaded' });
|
|
await page.waitForLoadState('domcontentloaded');
|
|
|
|
// Step 2: Configure settings
|
|
const settingsResp = await apiCall(page, 'PUT', '/api/v1/settings', {
|
|
theme: 'Intelligence Artificielle',
|
|
max_age_days: 30,
|
|
categories: ['AI News'],
|
|
max_items_per_category: 4,
|
|
max_articles_per_source: 3,
|
|
search_agent_behavior: '',
|
|
ai_provider: 'openai',
|
|
ai_model: 'gpt-4o-mini',
|
|
ai_model_websearch: 'gpt-4o-mini',
|
|
use_llm_for_source_links: false,
|
|
use_brave_search: false,
|
|
article_history_days: 90,
|
|
batch_size: 5,
|
|
});
|
|
expect(settingsResp.status).toBe(200);
|
|
|
|
// Step 3: Store the real OpenAI API key
|
|
const keyResp = await apiCall(page, 'POST', '/api/v1/user/api-keys', {
|
|
provider_name: 'openai',
|
|
api_key: OPENAI_KEY,
|
|
});
|
|
expect(keyResp.status).toBe(200);
|
|
|
|
// Step 4: Clean up existing sources, then add a fresh one
|
|
const existingSources = await apiCall(page, 'GET', '/api/v1/sources');
|
|
if (existingSources.data && Array.isArray(existingSources.data)) {
|
|
for (const source of existingSources.data) {
|
|
await apiCall(page, 'DELETE', `/api/v1/sources/${source.id}`);
|
|
}
|
|
}
|
|
|
|
const sourceResp = await apiCall(page, 'POST', '/api/v1/sources', {
|
|
title: 'OpenAI Blog',
|
|
url: 'https://openai.com/blog',
|
|
});
|
|
expect(sourceResp.status).toBe(201);
|
|
|
|
// Step 5: Trigger generation
|
|
const genResp = await apiCall(
|
|
page,
|
|
'POST',
|
|
'/api/v1/syntheses/generate',
|
|
);
|
|
expect(genResp.status).toBe(202);
|
|
const jobId = genResp.data.job_id;
|
|
expect(jobId).toBeTruthy();
|
|
|
|
// Step 6: Wait for SSE completion
|
|
const synthesisId = await waitForGenerationComplete(page, jobId);
|
|
expect(synthesisId).toBeTruthy();
|
|
|
|
// Step 7: Fetch the full synthesis
|
|
const synthResp = await apiCall(
|
|
page,
|
|
'GET',
|
|
`/api/v1/syntheses/${synthesisId}`,
|
|
);
|
|
expect(synthResp.status).toBe(200);
|
|
const synthesis = synthResp.data;
|
|
|
|
// Step 8: Validate structure
|
|
expect(synthesis.status).toBe('completed');
|
|
expect(synthesis.sections).toBeDefined();
|
|
expect(synthesis.sections.length).toBeGreaterThanOrEqual(1);
|
|
|
|
for (const section of synthesis.sections) {
|
|
// Section has a title (category name)
|
|
expect(section.title).toBeTruthy();
|
|
expect(typeof section.title).toBe('string');
|
|
|
|
// Section has items
|
|
expect(section.items).toBeDefined();
|
|
expect(section.items.length).toBeGreaterThanOrEqual(1);
|
|
|
|
for (const item of section.items) {
|
|
// Each item has a non-empty title
|
|
expect(item.title).toBeTruthy();
|
|
expect(typeof item.title).toBe('string');
|
|
|
|
// Each item URL starts with http
|
|
expect(item.url).toBeTruthy();
|
|
expect(item.url.startsWith('http')).toBe(true);
|
|
|
|
// No hallucinated URLs: should not point to Wikipedia or generic corporate pages
|
|
expect(item.url).not.toContain('wikipedia.org');
|
|
|
|
// Each item summary is non-trivial (> 50 chars) — no empty articles
|
|
expect(item.summary).toBeTruthy();
|
|
expect(item.summary.length).toBeGreaterThan(50);
|
|
|
|
// Summary should not be placeholder text or empty content
|
|
expect(item.summary.trim().length).toBeGreaterThan(50);
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════
|
|
// Comprehensive synthesis validation
|
|
// ═══════════════════════════════════════════════════════════════
|
|
const allUrls: string[] = [];
|
|
const domainCounts: Record<string, number> = {};
|
|
|
|
for (const section of synthesis.sections) {
|
|
for (const item of section.items) {
|
|
allUrls.push(item.url);
|
|
try {
|
|
const domain = new URL(item.url).hostname;
|
|
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
|
|
} catch {
|
|
// URL parse failure — already caught by earlier assertions
|
|
}
|
|
}
|
|
|
|
// Category article count check (including Autre)
|
|
expect(section.items.length).toBeLessThanOrEqual(4); // max_items_per_category
|
|
}
|
|
|
|
// No duplicate URLs across all sections
|
|
const uniqueUrls = new Set(allUrls);
|
|
expect(uniqueUrls.size).toBe(allUrls.length);
|
|
|
|
// No domain exceeds max_articles_per_source (3)
|
|
for (const [domain, count] of Object.entries(domainCounts)) {
|
|
expect(count).toBeLessThanOrEqual(3);
|
|
}
|
|
|
|
// Verify a sample of article links actually work (Playwright request API, no CORS issues)
|
|
const sampleUrls = allUrls.slice(0, 3);
|
|
for (const articleUrl of sampleUrls) {
|
|
try {
|
|
const resp = await request.head(articleUrl);
|
|
expect(resp.status()).toBeGreaterThanOrEqual(200);
|
|
expect(resp.status()).toBeLessThan(400);
|
|
} catch {
|
|
// Some sites block HEAD requests — skip rather than fail
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════
|
|
// Provenance verification
|
|
// ═══════════════════════════════════════════════════════════════
|
|
const provResp = await apiCall(page, 'GET', `/api/v1/syntheses/${synthesisId}/provenance`);
|
|
expect(provResp.status).toBe(200);
|
|
const provenance = provResp.data;
|
|
expect(Array.isArray(provenance)).toBe(true);
|
|
expect(provenance.length).toBeGreaterThan(0);
|
|
|
|
// At least some entries should be 'used'
|
|
const usedEntries = provenance.filter((e: any) => e.status === 'used');
|
|
expect(usedEntries.length).toBeGreaterThan(0);
|
|
|
|
// Every used entry should have a synthesis_id matching this synthesis
|
|
for (const entry of usedEntries) {
|
|
expect(entry.synthesis_id).toBe(synthesisId);
|
|
expect(entry.job_id).toBeTruthy();
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════
|
|
// LLM call logs verification
|
|
// ═══════════════════════════════════════════════════════════════
|
|
const logJobId = usedEntries[0]?.job_id;
|
|
if (logJobId) {
|
|
const logsResp = await apiCall(page, 'GET', `/api/v1/llm-logs/${logJobId}`);
|
|
expect(logsResp.status).toBe(200);
|
|
const logs = logsResp.data;
|
|
expect(Array.isArray(logs)).toBe(true);
|
|
expect(logs.length).toBeGreaterThan(0);
|
|
|
|
// Should have at least a search call (classify_summarize may also
|
|
// appear when the pipeline scrapes and classifies individual articles,
|
|
// but native web grounding pipelines may only log 'search').
|
|
const callTypes = logs.map((l: any) => l.call_type);
|
|
expect(callTypes).toContain('search');
|
|
|
|
// Each log entry should have model and timing
|
|
for (const log of logs) {
|
|
expect(log.model).toBeTruthy();
|
|
expect(log.duration_ms).toBeGreaterThanOrEqual(0);
|
|
}
|
|
}
|
|
});
|
|
});
|