Files
gbrain/test/sync-up-to-date-stamping.test.ts
triton6564685 7221dad83b fix(v0.18.2.fork.1): two pre-existing bugs surfaced by PW 1 part 2 prod deploy
Item 1 — sources.ts triple INSERT/UPDATE postgres-js double-encoding (root cause):
  Sites: src/commands/sources.ts:211 (runAdd), :471 (runUpdate), :407 (runFederate)
  Pattern: `JSON.stringify(config)` + `$N::jsonb` cast via `engine.executeRaw`
  → postgres-js's `unsafe()` API auto-encodes string params on `::jsonb` cast,
  re-stringifies the JSON content as a JSON STRING literal, lands in DB as
  jsonb_typeof = 'string' (not 'object'). Subsequent `jsonb_set()` migrations
  throw SQLSTATE 22023 'cannot set path in scalar'.

  Empirical verification (D-LXC fixture 189, 2026-05-07):
    Variant 1: `JSON.stringify(o)` + `$N::jsonb`           → string ✗ (current)
    Variant 2: object `o`           + `$N::jsonb`           → object ✓
    Variant 3: `JSON.stringify(o)` + no cast               → string ✗
    Variant 4: `JSON.stringify(o)` + `($N::text)::jsonb`   → object ✓ (this fix)

  Fix: `($N::text)::jsonb` double cast forces postgres-js to send param
  verbatim as TEXT (not jsonb-typed), then SQL re-parses to object at column
  boundary. Variant 4 over Variant 2 because it's defensive across postgres-js
  versions and the `unsafe()` API contract.

  Pairs with v26 step 0 healing (fork commit 71aaf22) which recovers
  pre-existing string-encoded prod data. After this commit, NEW sources
  written by `gbrain sources add` / `sources update` / `sources federate`
  land as objects directly, no heal needed for newly created rows.

  Test: e2e jsonb-roundtrip extended with sources INSERT/UPDATE coverage +
  source-grep tripwire that flags any future regressions.

Item 2 — sync.ts up_to_date path fails to advance last_sync_at:
  Site: src/commands/sync.ts:211-221 performSync `lastCommit === headCommit`
  branch returns immediately without updating sources.last_sync_at. Quiet
  sources (read-mostly repos) keep stale last_sync_at indefinitely; drift
  monitor (gbrain-projects-drift.sh) flags them stale even though the sync
  cron is firing every tick.

  Fix: advance last_sync_at on up_to_date branch via direct UPDATE (only
  last_sync_at, not last_commit since the commit anchor is genuinely
  unchanged). Preserves drift contract: "is the sync cron alive?" not
  "did the remote add commits?".

  Surfaced 2026-05-07 PW 1 part 2 prod deploy on LXC 107 — first drift
  tick post-deploy reported stock-dashboard 'stale 6197min ago' 30 seconds
  after a successful sync tick.

  Test: tests/sync-up-to-date-stamping.test.ts (3 cases) — quiet repo
  bumps last_sync_at, last_commit anchor stable, legacy non-sourceId path
  no-throws + records sync.last_run.

Both bugs were pre-existing (not introduced by PW 1 part 2 fork patches).
Both surfaced during prod deploy because v26 was the first migration to
hit jsonb_set on long-existing string-encoded configs, and PW 1 part 2's
new drift monitor read sources.last_sync_at directly (vs sync.sh's own
audit log in the prior implementation).

88/88 tests pass across allowlist / migration-v26 / sync-walk-dispatch /
sync-up-to-date-stamping / manifest-routing / manifest-edge-cases /
source-resolver / brain-allowlist / ingest-log-source-id.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 22:22:13 +08:00

158 lines
5.7 KiB
TypeScript

/**
* v0.18.2.fork.1 — sync.ts up_to_date path advances last_sync_at.
*
* Pre-fix: when `lastCommit === headCommit` (no new commits since last sync),
* performSync early-returned without touching the source row. Quiet sources
* (read-mostly repos like claude-config / personal-knowledge / subagent-writes)
* kept stale `last_sync_at` forever; drift monitor (gbrain-projects-drift.sh)
* false-flagged them as "stale (Nmin ago, threshold 60min)" even though sync
* cron was firing every 10 min and pulling correctly.
*
* Fix: advance last_sync_at on up_to_date path so drift's contract holds:
* "is the sync cron alive?" (real behavior), not "did the remote add commits?"
* (red-herring proxy).
*
* Surfaced 2026-05-07 PW 1 part 2 prod deploy on LXC 107 — first drift tick
* post-deploy reported stock-dashboard "stale 6197min ago" 30 seconds after
* a successful sync tick.
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { execFileSync } from 'child_process';
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
import { performSync } from '../src/commands/sync.ts';
let engine: PGLiteEngine;
const cleanupDirs: string[] = [];
function git(cwd: string, ...args: string[]): string {
return execFileSync('git', ['-C', cwd, ...args], { encoding: 'utf-8' }).trim();
}
function makeFixtureRepo(): string {
const dir = mkdtempSync(join(tmpdir(), 'gbrain-syncstamp-'));
cleanupDirs.push(dir);
git(dir, 'init', '--quiet', '--initial-branch=main');
git(dir, 'config', 'user.email', 'test@example.com');
git(dir, 'config', 'user.name', 'test');
writeFileSync(
join(dir, 'note.md'),
`---\ntitle: note\ntype: note\n---\nbody\n`,
);
git(dir, 'add', '.');
git(dir, 'commit', '--quiet', '-m', 'init');
return dir;
}
beforeAll(async () => {
engine = new PGLiteEngine();
await engine.connect({ type: 'pglite' } as never);
await engine.initSchema();
});
afterAll(async () => {
await engine.disconnect();
for (const d of cleanupDirs) {
try { rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
}
});
describe('sync up_to_date path — advances last_sync_at', () => {
test('quiet repo (no new commits) bumps last_sync_at on subsequent sync', async () => {
const repo = makeFixtureRepo();
const headCommit = git(repo, 'rev-parse', 'HEAD');
const sourceId = 'syncstamp-quiet';
// Provision a source row anchored at HEAD with a stale last_sync_at one
// hour in the past. Mirrors the prod state for quiet sources where the
// last actual write happened well before the next idle tick.
await engine.executeRaw(
`INSERT INTO sources (id, name, local_path, last_commit, last_sync_at, config)
VALUES ($1, $1, $2, $3, now() - interval '1 hour', '{"federated": true}'::jsonb)
ON CONFLICT (id) DO UPDATE SET
local_path = EXCLUDED.local_path,
last_commit = EXCLUDED.last_commit,
last_sync_at = EXCLUDED.last_sync_at`,
[sourceId, repo, headCommit],
);
const before = await engine.executeRaw<{ last_sync_at: Date | string }>(
`SELECT last_sync_at FROM sources WHERE id = $1`,
[sourceId],
);
const beforeMs = new Date(before[0].last_sync_at as string).getTime();
// Trigger a sync where HEAD is unchanged from anchor → up_to_date branch.
const result = await performSync(engine, {
repoPath: repo,
noPull: true,
noEmbed: true,
sourceId,
});
expect(result.status).toBe('up_to_date');
const after = await engine.executeRaw<{ last_sync_at: Date | string }>(
`SELECT last_sync_at FROM sources WHERE id = $1`,
[sourceId],
);
const afterMs = new Date(after[0].last_sync_at as string).getTime();
// last_sync_at must advance — was 1h stale, should now be within seconds of now.
expect(afterMs).toBeGreaterThan(beforeMs);
expect(Date.now() - afterMs).toBeLessThan(10_000); // < 10s old
});
test('up_to_date does NOT touch last_commit (commit anchor stable)', async () => {
const repo = makeFixtureRepo();
const headCommit = git(repo, 'rev-parse', 'HEAD');
const sourceId = 'syncstamp-anchor';
await engine.executeRaw(
`INSERT INTO sources (id, name, local_path, last_commit, last_sync_at, config)
VALUES ($1, $1, $2, $3, now() - interval '1 hour', '{"federated": true}'::jsonb)
ON CONFLICT (id) DO UPDATE SET
local_path = EXCLUDED.local_path,
last_commit = EXCLUDED.last_commit,
last_sync_at = EXCLUDED.last_sync_at`,
[sourceId, repo, headCommit],
);
await performSync(engine, {
repoPath: repo,
noPull: true,
noEmbed: true,
sourceId,
});
const after = await engine.executeRaw<{ last_commit: string }>(
`SELECT last_commit FROM sources WHERE id = $1`,
[sourceId],
);
expect(after[0].last_commit).toBe(headCommit);
});
test('non-sourceId (legacy global config) path: no source UPDATE attempted', async () => {
const repo = makeFixtureRepo();
const headCommit = git(repo, 'rev-parse', 'HEAD');
// Seed legacy global config.
await engine.setConfig('sync.last_commit', headCommit);
await engine.setConfig('sync.repo_path', repo);
// No sourceId → legacy path. Should not throw, should still record sync.last_run.
const result = await performSync(engine, {
repoPath: repo,
noPull: true,
noEmbed: true,
});
expect(result.status).toBe('up_to_date');
const lastRun = await engine.getConfig('sync.last_run');
expect(lastRun).not.toBeNull();
expect(Date.now() - new Date(lastRun!).getTime()).toBeLessThan(10_000);
});
});