From 0b0702de396bc1de5a4af80169fa8c45c102d8e1 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Mon, 23 Mar 2026 11:54:32 +0100 Subject: [PATCH] fix: strip null bytes from LLM output before saving to PostgreSQL JSONB LLM output occasionally contains \u0000 null bytes (e.g., "annonc\u0000...") which PostgreSQL rejects in JSONB columns. Added sanitize_json_null_bytes() that recursively strips null bytes from all string values before DB insert. Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/src/services/synthesis.rs | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/backend/src/services/synthesis.rs b/backend/src/services/synthesis.rs index c38b41d..d95b95a 100644 --- a/backend/src/services/synthesis.rs +++ b/backend/src/services/synthesis.rs @@ -358,6 +358,10 @@ async fn run_generation_inner( AppError::Internal(anyhow::anyhow!("Failed to serialize sections: {}", e)) })?; + // Strip \u0000 null bytes — LLM output occasionally contains them and + // PostgreSQL rejects them in JSONB columns. + let sections_json = sanitize_json_null_bytes(sections_json); + let synthesis = db::syntheses::create(&state.pool, user_id, &week, §ions_json).await?; @@ -368,6 +372,25 @@ async fn run_generation_inner( // Helper Functions // ─────────────────────────────────────────────────────────────────── +/// Recursively strip `\u0000` null bytes from JSON values. +/// +/// PostgreSQL rejects null bytes in JSONB text. LLM output occasionally +/// contains them (e.g., `Meta AI a annonc\u0000...`). +fn sanitize_json_null_bytes(value: serde_json::Value) -> serde_json::Value { + match value { + serde_json::Value::String(s) => serde_json::Value::String(s.replace('\0', "")), + serde_json::Value::Array(arr) => { + serde_json::Value::Array(arr.into_iter().map(sanitize_json_null_bytes).collect()) + } + serde_json::Value::Object(map) => serde_json::Value::Object( + map.into_iter() + .map(|(k, v)| (k, sanitize_json_null_bytes(v))) + .collect(), + ), + other => other, + } +} + /// Emit a progress event via the watch channel. fn emit_progress(tx: &watch::Sender, step: &str, message: &str, percent: u8) { tx.send(ProgressEvent::Progress { @@ -1265,4 +1288,27 @@ mod tests { let result = filter_homepage_urls(parsed); assert_eq!(result[0].1.len(), 1); } + + #[test] + fn sanitize_null_bytes_in_json_strings() { + let json = serde_json::json!({ + "title": "Hello\u{0000}World", + "items": [{"summary": "Text\u{0000}with\u{0000}nulls"}] + }); + let sanitized = sanitize_json_null_bytes(json); + assert_eq!(sanitized["title"], "HelloWorld"); + assert_eq!(sanitized["items"][0]["summary"], "Textwithnulls"); + } + + #[test] + fn sanitize_preserves_clean_json() { + let json = serde_json::json!({ + "title": "Clean text", + "count": 42, + "active": true, + "items": [{"url": "https://example.com"}] + }); + let sanitized = sanitize_json_null_bytes(json.clone()); + assert_eq!(sanitized, json); + } }