From 71aaf22573508835c11766c2f8af8390496dc3a1 Mon Sep 17 00:00:00 2001 From: triton6564685 Date: Thu, 7 May 2026 22:08:02 +0800 Subject: [PATCH] =?UTF-8?q?fix(v0.18.2.fork.1):=20v26=20=E2=80=94=20heal?= =?UTF-8?q?=20string-encoded=20source=20configs=20before=20jsonb=5Fset?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prod LXC 107 deploy of v26 (2026-05-07) failed with SQLSTATE 22023 "cannot set path in scalar" because 6 of 7 sources had jsonb_typeof = 'string' instead of 'object'. Root cause is a pre-existing bug in sources.ts:211: await engine.executeRaw( `INSERT INTO sources (...) VALUES (..., $4::jsonb) ...`, [..., JSON.stringify(config)], ); postgres-js's unsafe() with $::jsonb cast double-encodes the JSON string — the cast lands as a JSON STRING scalar, not the intended object. Migration- inlined inserts (e.g. v17 'default' source) work correctly because they use literal '{"key":"val"}'::jsonb at SQL level. v26 was the first migration to hit jsonb_set on these legacy configs, which is why this surfaced now (drill on D-LXC fixture missed it because the fixture was empty + sources-add via CLI hit the bug but no further jsonb_set ran on those rows). Fix: prepend a Step 0 to v26 that unwraps any string-encoded config back to its object form via (config #>> '{}')::jsonb. Idempotent on already- object configs (filtered by jsonb_typeof). Byte-equivalent contents — the JSON parse step is information-preserving. Manual prod recovery (2026-05-07 14:05 UTC): unwrap UPDATE applied to LXC 107 BEFORE this commit, then v26 re-ran and applied cleanly. Post-state verified: 203 gstack-brain pages → 155 stock-dashboard + 40 memory-dashboard + 8 default-ambiguous, gstack-brain source dropped, default-ambiguous + gstack-meta sources created. This fork commit codifies the fix so future Postgres deploys (other dev boxes, fresh prod redeploys, the in-progress gbrain-mcp:v0.18.2-fork.1 image rebuild) self-heal automatically. Adds a regression test (string-encoded config) in tests/migration-v26.test.ts. 14/14 tests pass. Followup TODO: fix sources.ts:211 to either pass an object directly (let postgres-js handle JSON serialisation) OR use postgres.json() helper. Out of scope for this commit — the unwrap heals existing data; an upstream fix prevents new corruption. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/migrate.ts | 14 ++++++++++++++ test/migration-v26.test.ts | 31 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/core/migrate.ts b/src/core/migrate.ts index 5272578..08187a4 100644 --- a/src/core/migrate.ts +++ b/src/core/migrate.ts @@ -866,6 +866,20 @@ export const MIGRATIONS: Migration[] = [ version: 26, name: 'source_taxonomy_rewrite', sql: ` + -- Step 0: heal pre-existing data corruption from sources.ts:211. + -- gbrain CLI's runAdd writes config via $4::jsonb on a JSON.stringify()'d + -- value through postgres-js unsafe(), which double-encodes the payload — + -- the cast lands as a JSON STRING scalar instead of a JSON object. + -- Verified on prod LXC 107 (2026-05-07): 6 of 7 sources had jsonb_typeof + -- = 'string' (only the migration-inlined 'default' source was a true + -- object). jsonb_set() in subsequent steps fails on scalars with + -- "cannot set path in scalar" (SQLSTATE 22023). Unwrap is byte-equivalent + -- (parses the JSON string back to its underlying object form) and + -- idempotent on already-object configs (filtered by jsonb_typeof). + UPDATE sources + SET config = (config #>> '{}')::jsonb + WHERE jsonb_typeof(config) = 'string'; + -- Step 1: ensure tombstone source exists for slug-no-match fallbacks. INSERT INTO sources (id, name, config) VALUES ( diff --git a/test/migration-v26.test.ts b/test/migration-v26.test.ts index 28a143b..b4d34f7 100644 --- a/test/migration-v26.test.ts +++ b/test/migration-v26.test.ts @@ -159,6 +159,37 @@ describe('v26 — page reclassification', () => { }); }); +describe('v26 — string-encoded config heal (regression)', () => { + test('migration unwraps jsonb string scalar configs to objects before jsonb_set', async () => { + // Reproduces prod LXC 107 (2026-05-07) data corruption: gbrain CLI's + // sources.ts:211 INSERT via $::jsonb on JSON.stringify() output produces + // a JSON STRING scalar, not an object. jsonb_set on a scalar throws + // SQLSTATE 22023 'cannot set path in scalar'. v26 step 0 unwraps before + // the rest of the migration touches config. + await engine.executeRaw( + `INSERT INTO sources (id, name, config) VALUES + ('regression-string-cfg', 'regression-string-cfg', + '"{\\"federated\\":true,\\"slug_prefix_rules\\":[\\"regression/\\"]}"'::jsonb) + ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`, + ); + // Sanity: confirm we set up the bug condition. + const before = await engine.executeRaw<{ type: string }>( + `SELECT jsonb_typeof(config) AS type FROM sources WHERE id = 'regression-string-cfg'`, + ); + expect(before[0].type).toBe('string'); + + // Re-run v26: step 0 should unwrap, then the remaining steps proceed cleanly. + await engine.runMigration(26, v26Sql); + + const after = await engine.executeRaw<{ type: string; rules: string[] | null }>( + `SELECT jsonb_typeof(config) AS type, config->'slug_prefix_rules' AS rules FROM sources WHERE id = 'regression-string-cfg'`, + ); + expect(after[0].type).toBe('object'); + // Contents preserved byte-for-byte after unwrap. + expect(after[0].rules).toEqual(['regression/']); + }); +}); + describe('v26 — idempotency (CR-6)', () => { test('re-running migration is a no-op: source distribution unchanged', async () => { const before = await engine.executeRaw<{ source_id: string; n: bigint }>(