fix: strip null bytes from LLM output before saving to PostgreSQL JSONB

LLM output occasionally contains \u0000 null bytes (e.g., "annonc\u0000...")
which PostgreSQL rejects in JSONB columns. Added sanitize_json_null_bytes()
that recursively strips null bytes from all string values before DB insert.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
oabrivard 3 months ago
parent 3fe667591d
commit 0b0702de39

@ -358,6 +358,10 @@ async fn run_generation_inner(
AppError::Internal(anyhow::anyhow!("Failed to serialize sections: {}", e)) AppError::Internal(anyhow::anyhow!("Failed to serialize sections: {}", e))
})?; })?;
// Strip \u0000 null bytes — LLM output occasionally contains them and
// PostgreSQL rejects them in JSONB columns.
let sections_json = sanitize_json_null_bytes(sections_json);
let synthesis = let synthesis =
db::syntheses::create(&state.pool, user_id, &week, &sections_json).await?; db::syntheses::create(&state.pool, user_id, &week, &sections_json).await?;
@ -368,6 +372,25 @@ async fn run_generation_inner(
// Helper Functions // Helper Functions
// ─────────────────────────────────────────────────────────────────── // ───────────────────────────────────────────────────────────────────
/// Recursively strip `\u0000` null bytes from JSON values.
///
/// PostgreSQL rejects null bytes in JSONB text. LLM output occasionally
/// contains them (e.g., `Meta AI a annonc\u0000...`).
fn sanitize_json_null_bytes(value: serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::String(s) => serde_json::Value::String(s.replace('\0', "")),
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(sanitize_json_null_bytes).collect())
}
serde_json::Value::Object(map) => serde_json::Value::Object(
map.into_iter()
.map(|(k, v)| (k, sanitize_json_null_bytes(v)))
.collect(),
),
other => other,
}
}
/// Emit a progress event via the watch channel. /// Emit a progress event via the watch channel.
fn emit_progress(tx: &watch::Sender<ProgressEvent>, step: &str, message: &str, percent: u8) { fn emit_progress(tx: &watch::Sender<ProgressEvent>, step: &str, message: &str, percent: u8) {
tx.send(ProgressEvent::Progress { tx.send(ProgressEvent::Progress {
@ -1265,4 +1288,27 @@ mod tests {
let result = filter_homepage_urls(parsed); let result = filter_homepage_urls(parsed);
assert_eq!(result[0].1.len(), 1); assert_eq!(result[0].1.len(), 1);
} }
#[test]
fn sanitize_null_bytes_in_json_strings() {
let json = serde_json::json!({
"title": "Hello\u{0000}World",
"items": [{"summary": "Text\u{0000}with\u{0000}nulls"}]
});
let sanitized = sanitize_json_null_bytes(json);
assert_eq!(sanitized["title"], "HelloWorld");
assert_eq!(sanitized["items"][0]["summary"], "Textwithnulls");
}
#[test]
fn sanitize_preserves_clean_json() {
let json = serde_json::json!({
"title": "Clean text",
"count": 42,
"active": true,
"items": [{"url": "https://example.com"}]
});
let sanitized = sanitize_json_null_bytes(json.clone());
assert_eq!(sanitized, json);
}
} }

Loading…
Cancel
Save