feat(v0.18.2.fork.1): manifest priority 5 — slug-prefix auto-routing

Adds resolveBySlugPrefix helper + 60s in-process TTL cache and slots it into the resolveSourceId chain at priority 5 (between cwd-prefix and brain-default). put_page handler now passes slug, so a Claude.ai write of `memory-dashboard/foo` (with no source_id param) routes to the memory-dashboard source automatically when that source declares slug_prefix_rules: ['memory-dashboard/'] in its sources.config jsonb. Resolution chain (revised): 1. explicit --source / source_id param 2. GBRAIN_SOURCE env var 3. .gbrain-source dotfile (CWD walk-up) 4. registered source local_path containing CWD 5. NEW: manifest slug-prefix longest-match (caller passes slug) 6. brain-level default (sources.default config) 7. literal 'default' Manifest semantics: - Each source row's config.slug_prefix_rules: string[] (jsonb) - Each rule: literal prefix ('memory-dashboard/') OR trailing-glob ('projects/*' which is normalized to literal 'projects/' since slug grammar treats '/' as a regular character, not a path separator) - Longest literal match wins; ties break alphabetical on source.id - Malformed jsonb safe-skip (continue, don't throw) - 60s TTL cache; cross-process consistency comes from container restart (or future LISTEN/NOTIFY follow-up — see TODOS.md) - source-resolver.ts: resolveBySlugPrefix + cache + __invalidateSlugPrefixCache (test helper) + extended resolveSourceId signature - operations.ts put_page handler: passes slug into resolveSourceId Tests: - test/longest-prefix-match.test.ts (new): pure resolver — longest wins, alphabetical tie-break, multi-prefix per source, glob normalization, empty rules / no rules, cache hit/miss/invalidation - test/manifest-routing.test.ts (new): end-to-end via put_page handler — slug→manifest routes, explicit source_id overrides, no-match fallback to brain-default, subagent slug carve-out (wiki/agents/), subagent escape rejection still enforced Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 22:19:30 +08:00
parent 18f2dcdbe5
commit 52092c64b1
4 changed files with 435 additions and 8 deletions
--- a/src/core/operations.ts
+++ b/src/core/operations.ts
@@ -273,13 +273,21 @@ const put_page: Operation = {

    if (ctx.dryRun) return { dry_run: true, action: 'put_page', slug: p.slug };

-    // v0.18.0 Step 5: resolve target source. Explicit param wins; otherwise
-    // walk the standard chain (env > dotfile > cwd-prefix > brain-default >
-    // 'default'). resolveSourceId throws if explicit/env/dotfile point to a
-    // non-existent source — fail-fast rather than silently writing to a
-    // wrong row.
+    // v0.18.0 Step 5 + v0.18.2.fork.1 manifest: resolve target source.
+    // Explicit param wins; otherwise walk the chain (env > dotfile >
+    // cwd-prefix > manifest slug-prefix > brain-default > 'default'). Passing
+    // `slug` activates priority 5 (manifest) so a Claude.ai put_page with no
+    // source_id param routes via slug content (e.g. `memory-dashboard/foo` →
+    // memory-dashboard source if that source declares `--slug-prefix`
+    // 'memory-dashboard/'). resolveSourceId throws if explicit/env/dotfile
+    // point to a non-existent source — fail-fast rather than writing wrong row.
    const { resolveSourceId } = await import('./source-resolver.ts');
-    const sourceId = await resolveSourceId(ctx.engine, (p.source_id as string | undefined) ?? null);
+    const sourceId = await resolveSourceId(
+      ctx.engine,
+      (p.source_id as string | undefined) ?? null,
+      undefined,
+      slug,
+    );

    // Skip embedding when no OpenAI key is configured. importFromContent's existing
    // try/catch around embed only catches; without a key the OpenAI client would
--- a/src/core/source-resolver.ts
+++ b/src/core/source-resolver.ts
@@ -61,6 +61,14 @@ export async function resolveSourceId(
  engine: BrainEngine,
  explicit: string | null | undefined,
  cwd: string = process.cwd(),
+  /**
+   * v0.18.2.fork.1 — when provided, priority 5 (manifest slug-prefix match)
+   * fires between cwd-prefix and brain-default. When undefined (CLI commands
+   * without per-page context like plain `gbrain sync`), priority 5 is
+   * skipped entirely. This keeps slug-aware put_page behavior aware of
+   * manifest rules without forcing slug-less callers to invent a value.
+   */
+  slug?: string,
 ): Promise<string> {
  // 1. Explicit flag wins.
  if (explicit) {
@@ -106,14 +114,24 @@ export async function resolveSourceId(
  }
  if (best) return best.id;

-  // 5. Brain-level default.
+  // 5. v0.18.2.fork.1 — manifest slug-prefix match (skipped when caller
+  //    didn't pass a slug). Server-derived inference, ranked AFTER user-
+  //    explicit signals (1-4) and BEFORE the brain-level default (6) so a
+  //    bound dotfile / env / flag still wins, but a Claude.ai put_page with
+  //    no source param routes correctly based on slug content.
+  if (slug) {
+    const manifestMatch = await resolveBySlugPrefix(engine, slug);
+    if (manifestMatch) return manifestMatch;
+  }
+
+  // 6. Brain-level default.
  const globalDefault = await engine.getConfig('sources.default');
  if (globalDefault && SOURCE_ID_RE.test(globalDefault)) {
    await assertSourceExists(engine, globalDefault);
    return globalDefault;
  }

-  // 6. Fallback: the seeded 'default' source. Always exists post-migration
+  // 7. Fallback: the seeded 'default' source. Always exists post-migration
  //    v16 so this is a safe terminal.
  return 'default';
 }
@@ -132,6 +150,103 @@ async function assertSourceExists(engine: BrainEngine, id: string): Promise<void
  }
 }

+/**
+ * v0.18.2.fork.1 — manifest convention: longest-prefix slug match.
+ *
+ * Each source row carries `config.slug_prefix_rules: string[]` (jsonb).
+ * Each rule is either a literal prefix ('memory-dashboard/') or a single-
+ * level glob ending in `*` ('projects/*' — which is functionally identical
+ * to literal prefix because slugs use '/' as a regular character, not a
+ * filesystem path separator).
+ *
+ * Resolution returns the source id whose rule literally-prefixes the slug,
+ * choosing the LONGEST literal match (glob `*` excluded from score). Ties
+ * break alphabetically on source id. No match returns null — caller falls
+ * through to the next priority in resolveSourceId's chain.
+ *
+ * Cross-process consistency: each gbrain process (CLI / MCP container / sync
+ * cron) holds its own in-process cache with 60s TTL. After
+ * `gbrain sources update --slug-prefix ...` runs from a CLI, the MCP
+ * container's cache is stale for ≤60s. To force-refresh: restart the
+ * container. Postgres LISTEN/NOTIFY would close the gap but is deferred
+ * (see TODOS.md "gbrain manifest cache cross-process invalidation").
+ */
+const SLUG_PREFIX_CACHE_TTL_MS = 60_000;
+interface SlugPrefixCache {
+  rules: Array<{ id: string; prefixes: string[] }>;
+  expiresAt: number;
+}
+let slugPrefixCache: SlugPrefixCache | null = null;
+
+/** Exposed for tests — clears the cache so timing-sensitive scenarios are deterministic. */
+export function __invalidateSlugPrefixCache(): void {
+  slugPrefixCache = null;
+}
+
+async function loadSlugPrefixRules(
+  engine: BrainEngine,
+): Promise<Array<{ id: string; prefixes: string[] }>> {
+  const now = Date.now();
+  if (slugPrefixCache && slugPrefixCache.expiresAt > now) {
+    return slugPrefixCache.rules;
+  }
+  const rows = await engine.executeRaw<{ id: string; config: string | Record<string, unknown> }>(
+    `SELECT id, config FROM sources`,
+  );
+  const rules: Array<{ id: string; prefixes: string[] }> = [];
+  for (const r of rows) {
+    let cfg: unknown;
+    try {
+      cfg = typeof r.config === 'string' ? JSON.parse(r.config) : r.config;
+    } catch {
+      continue; // Malformed jsonb — safe-skip per failure-modes table CG.
+    }
+    if (!cfg || typeof cfg !== 'object') continue;
+    const raw = (cfg as Record<string, unknown>).slug_prefix_rules;
+    if (!Array.isArray(raw)) continue;
+    const prefixes: string[] = [];
+    for (const item of raw) {
+      if (typeof item !== 'string') continue;
+      // Strip trailing single-level glob `*` (cosmetic only; literal-prefix
+      // semantics are identical because slug grammar treats '/' as a regular
+      // character, not a path separator).
+      const normalized = item.endsWith('*') ? item.slice(0, -1) : item;
+      if (normalized.length > 0) prefixes.push(normalized);
+    }
+    if (prefixes.length > 0) rules.push({ id: r.id, prefixes });
+  }
+  slugPrefixCache = { rules, expiresAt: now + SLUG_PREFIX_CACHE_TTL_MS };
+  return rules;
+}
+
+/**
+ * Returns the source id whose slug_prefix_rules has the longest literal
+ * prefix matching `slug`. Alphabetical tie-break on source id. Returns
+ * null when no rule matches.
+ */
+export async function resolveBySlugPrefix(
+  engine: BrainEngine,
+  slug: string,
+): Promise<string | null> {
+  const rules = await loadSlugPrefixRules(engine);
+  let best: { id: string; score: number } | null = null;
+  for (const r of rules) {
+    for (const p of r.prefixes) {
+      if (slug.startsWith(p)) {
+        const score = p.length;
+        if (
+          !best ||
+          score > best.score ||
+          (score === best.score && r.id < best.id)
+        ) {
+          best = { id: r.id, score };
+        }
+      }
+    }
+  }
+  return best ? best.id : null;
+}
+
 /** Exposed for tests. */
 export const __testing = {
  readDotfileWalk,
--- a/test/longest-prefix-match.test.ts
+++ b/test/longest-prefix-match.test.ts
@@ -0,0 +1,157 @@
+/**
+ * v0.18.2.fork.1 — manifest slug-prefix matching algorithm tests.
+ *
+ * Pure resolver tests (no put_page handler). Verifies:
+ *   - longest literal prefix wins over shorter overlapping rules
+ *   - tie-break on prefix length goes alphabetical on source id
+ *   - trailing single-level glob `*` is treated as literal prefix
+ *     (cosmetic only, scored without the `*`)
+ *   - empty rules / no rules / no match return null
+ *   - cache is hit within 60s TTL, refetched after invalidation
+ */
+
+import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
+import { PGLiteEngine } from '../src/core/pglite-engine.ts';
+import {
+  resolveBySlugPrefix,
+  __invalidateSlugPrefixCache,
+} from '../src/core/source-resolver.ts';
+
+let engine: PGLiteEngine;
+
+beforeAll(async () => {
+  engine = new PGLiteEngine();
+  await engine.connect({ type: 'pglite' } as never);
+  await engine.initSchema();
+
+  // Seed sources with overlapping prefix rules so all branch combinations
+  // are exercised.
+  await engine.executeRaw(
+    `INSERT INTO sources (id, name, config) VALUES
+      ('alpha-design', 'alpha-design', '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb),
+      ('beta-design',  'beta-design',  '{"federated": true, "slug_prefix_rules": ["design/memory-dashboard/"]}'::jsonb),
+      ('multi-prefix', 'multi-prefix', '{"federated": true, "slug_prefix_rules": ["projects/", "builder-journey"]}'::jsonb),
+      ('glob-form',    'glob-form',    '{"federated": true, "slug_prefix_rules": ["wedding-planning/*"]}'::jsonb),
+      ('no-rules',     'no-rules',     '{"federated": true}'::jsonb),
+      ('empty-rules',  'empty-rules',  '{"federated": true, "slug_prefix_rules": []}'::jsonb),
+      ('alpha-tie',    'alpha-tie',    '{"federated": true, "slug_prefix_rules": ["tied/"]}'::jsonb),
+      ('zeta-tie',     'zeta-tie',     '{"federated": true, "slug_prefix_rules": ["tied/"]}'::jsonb)
+     ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
+  );
+});
+
+afterAll(async () => {
+  await engine.disconnect();
+});
+
+beforeEach(() => {
+  __invalidateSlugPrefixCache();
+});
+
+describe('Longest-prefix selection', () => {
+  test('longer rule wins over shorter overlapping rule', async () => {
+    // 'design/' (alpha-design, score 7) vs 'design/memory-dashboard/' (beta-design, score 24)
+    const result = await resolveBySlugPrefix(engine, 'design/memory-dashboard/notes/x');
+    expect(result).toBe('beta-design');
+  });
+
+  test('shorter prefix wins when longer rule does not match', async () => {
+    // 'design/stock-dashboard/x' matches alpha-design only (beta requires 'design/memory-dashboard/')
+    const result = await resolveBySlugPrefix(engine, 'design/stock-dashboard/x');
+    expect(result).toBe('alpha-design');
+  });
+
+  test('exact-prefix match (no trailing content) still routes', async () => {
+    const result = await resolveBySlugPrefix(engine, 'design/');
+    expect(result).toBe('alpha-design');
+  });
+
+  test('non-prefixed slug returns null (no match)', async () => {
+    const result = await resolveBySlugPrefix(engine, 'unrelated/random');
+    expect(result).toBeNull();
+  });
+});
+
+describe('Multi-prefix per source', () => {
+  test('first prefix in rules array matches → routes to that source', async () => {
+    const result = await resolveBySlugPrefix(engine, 'projects/foo/bar');
+    expect(result).toBe('multi-prefix');
+  });
+
+  test('second prefix in rules array matches → still routes to same source', async () => {
+    const result = await resolveBySlugPrefix(engine, 'builder-journey');
+    expect(result).toBe('multi-prefix');
+  });
+});
+
+describe('Glob `*` form', () => {
+  test('trailing `*` is normalized to literal prefix (same matching semantics)', async () => {
+    // Rule was 'wedding-planning/*'; should match same as 'wedding-planning/'.
+    const result = await resolveBySlugPrefix(engine, 'wedding-planning/budget');
+    expect(result).toBe('glob-form');
+  });
+
+  test('`*` does not change scoring — competing literal of same length still ties', async () => {
+    // 'wedding-planning/*' normalizes to 'wedding-planning/' (score 17).
+    // No other source has a longer match → glob-form wins.
+    const result = await resolveBySlugPrefix(engine, 'wedding-planning/X');
+    expect(result).toBe('glob-form');
+  });
+});
+
+describe('Tie-break: alphabetical on source id', () => {
+  test('two sources with identical prefix → alpha-tie wins (alphabetical)', async () => {
+    const result = await resolveBySlugPrefix(engine, 'tied/some-page');
+    expect(result).toBe('alpha-tie');
+  });
+});
+
+describe('Sources without rules / empty rules', () => {
+  test('source with no slug_prefix_rules key in config does not match anything', async () => {
+    // 'no-rules' source exists but has no rules → can't claim any slug.
+    const result = await resolveBySlugPrefix(engine, 'no-rules/x');
+    expect(result).toBeNull();
+  });
+
+  test('source with empty rules array does not match anything', async () => {
+    const result = await resolveBySlugPrefix(engine, 'empty-rules/x');
+    expect(result).toBeNull();
+  });
+});
+
+describe('Cache TTL behaviour', () => {
+  test('cache hit on second call within TTL — DB content change is invisible', async () => {
+    // First call populates cache.
+    const r1 = await resolveBySlugPrefix(engine, 'design/x');
+    expect(r1).toBe('alpha-design');
+
+    // Mutate sources directly without invalidating cache. resolver should
+    // still see cached snapshot.
+    await engine.executeRaw(
+      `UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": []}'::jsonb WHERE id = 'alpha-design'`,
+    );
+    const r2 = await resolveBySlugPrefix(engine, 'design/x');
+    expect(r2).toBe('alpha-design');
+
+    // Restore for next tests.
+    await engine.executeRaw(
+      `UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb WHERE id = 'alpha-design'`,
+    );
+  });
+
+  test('explicit invalidation forces refetch', async () => {
+    // Prime cache.
+    await resolveBySlugPrefix(engine, 'design/x');
+    // Mutate then invalidate.
+    await engine.executeRaw(
+      `UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": []}'::jsonb WHERE id = 'alpha-design'`,
+    );
+    __invalidateSlugPrefixCache();
+    const r = await resolveBySlugPrefix(engine, 'design/x');
+    expect(r).toBeNull();
+    // Restore.
+    await engine.executeRaw(
+      `UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb WHERE id = 'alpha-design'`,
+    );
+  });
+});
--- a/test/manifest-routing.test.ts
+++ b/test/manifest-routing.test.ts
@@ -0,0 +1,147 @@
+/**
+ * v0.18.2.fork.1 — manifest routing end-to-end via put_page handler.
+ *
+ * Verifies the full chain works through the operations layer:
+ *
+ *   put_page(slug='memory-dashboard/foo', no source_id)
+ *     → resolveSourceId(engine, null, cwd, 'memory-dashboard/foo')
+ *     → manifest matches 'memory-dashboard/' prefix
+ *     → page row's source_id = 'memory-dashboard'
+ *
+ *   put_page(slug='memory-dashboard/foo', source_id='stock-dashboard')
+ *     → explicit param wins, manifest skipped
+ *
+ *   put_page(slug='random-content', no source_id, no manifest match)
+ *     → falls to brain-default (config 'sources.default')
+ *
+ * Subagent slug routing is also exercised: writes from a subagent context
+ * use slug `wiki/agents/<id>/...`. When the manifest declares a rule for
+ * `wiki/agents/`, those writes get carved into a separate source.
+ */
+
+import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
+import { PGLiteEngine } from '../src/core/pglite-engine.ts';
+import { operationsByName, type OperationContext } from '../src/core/operations.ts';
+import { __invalidateSlugPrefixCache } from '../src/core/source-resolver.ts';
+
+let engine: PGLiteEngine;
+
+beforeAll(async () => {
+  engine = new PGLiteEngine();
+  await engine.connect({ type: 'pglite' } as never);
+  await engine.initSchema();
+
+  await engine.executeRaw(
+    `INSERT INTO sources (id, name, config) VALUES
+      ('memory-dashboard', 'memory-dashboard', '{"federated": true, "slug_prefix_rules": ["memory-dashboard/"]}'::jsonb),
+      ('stock-dashboard',  'stock-dashboard',  '{"federated": true, "slug_prefix_rules": ["stock-dashboard/"]}'::jsonb),
+      ('subagent-writes',  'subagent-writes',  '{"federated": true, "slug_prefix_rules": ["wiki/agents/"]}'::jsonb),
+      ('personal-knowledge', 'personal-knowledge', '{"federated": true}'::jsonb)
+     ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
+  );
+
+  // Brain-level fallback target — set so unmanifested writes have a clear home.
+  await engine.setConfig('sources.default', 'personal-knowledge');
+
+  __invalidateSlugPrefixCache();
+});
+
+afterAll(async () => {
+  await engine.disconnect();
+});
+
+// Minimal OperationContext stub. Tests don't exercise config or logger
+// branches in put_page; both can be cheap stubs.
+const noopLogger = {
+  info: () => {},
+  warn: () => {},
+  error: () => {},
+  debug: () => {},
+};
+const stubConfig = {} as never; // GBrainConfig: tests don't read from it.
+
+const baseCtx = (overrides: Partial<OperationContext> = {}): OperationContext => ({
+  engine,
+  config: stubConfig,
+  logger: noopLogger,
+  remote: false,
+  dryRun: false,
+  ...overrides,
+});
+
+const md = `---
+title: Test Page
+type: note
+---
+Some body content.
+`;
+
+describe('Manifest routing — slug prefix → source', () => {
+  test('slug=memory-dashboard/foo, no source_id → routes to memory-dashboard', async () => {
+    await operationsByName.put_page.handler(baseCtx(), { slug: 'memory-dashboard/foo', content: md });
+    const rows = await engine.executeRaw<{ source_id: string }>(
+      `SELECT source_id FROM pages WHERE slug = 'memory-dashboard/foo'`,
+    );
+    expect(rows.length).toBe(1);
+    expect(rows[0].source_id).toBe('memory-dashboard');
+  });
+
+  test('slug=stock-dashboard/foo → routes to stock-dashboard', async () => {
+    await operationsByName.put_page.handler(baseCtx(), { slug: 'stock-dashboard/quote', content: md });
+    const rows = await engine.executeRaw<{ source_id: string }>(
+      `SELECT source_id FROM pages WHERE slug = 'stock-dashboard/quote'`,
+    );
+    expect(rows[0].source_id).toBe('stock-dashboard');
+  });
+});
+
+describe('Manifest routing — explicit source_id wins over manifest', () => {
+  test('slug=memory-dashboard/x with source_id=stock-dashboard → stock-dashboard', async () => {
+    await operationsByName.put_page.handler(
+      baseCtx(),
+      { slug: 'memory-dashboard/manual-override', content: md, source_id: 'stock-dashboard' },
+    );
+    const rows = await engine.executeRaw<{ source_id: string }>(
+      `SELECT source_id FROM pages WHERE slug = 'memory-dashboard/manual-override' AND source_id = 'stock-dashboard'`,
+    );
+    expect(rows.length).toBe(1);
+  });
+});
+
+describe('Manifest routing — no match falls to brain-default', () => {
+  test('slug=random-thought → personal-knowledge (brain-level default)', async () => {
+    await operationsByName.put_page.handler(baseCtx(), { slug: 'random-thought', content: md });
+    const rows = await engine.executeRaw<{ source_id: string }>(
+      `SELECT source_id FROM pages WHERE slug = 'random-thought'`,
+    );
+    expect(rows[0].source_id).toBe('personal-knowledge');
+  });
+});
+
+describe('Manifest routing — subagent slug carve-out (wiki/agents/)', () => {
+  test('subagent put_page slug=wiki/agents/3/note → routes to subagent-writes via manifest', async () => {
+    await operationsByName.put_page.handler(
+      baseCtx({ viaSubagent: true, subagentId: 3 }),
+      { slug: 'wiki/agents/3/note', content: md },
+    );
+    const rows = await engine.executeRaw<{ source_id: string }>(
+      `SELECT source_id FROM pages WHERE slug = 'wiki/agents/3/note'`,
+    );
+    expect(rows[0].source_id).toBe('subagent-writes');
+  });
+
+  test('subagent escape attempt (slug not under wiki/agents/<id>) is rejected', async () => {
+    let threw = false;
+    try {
+      await operationsByName.put_page.handler(
+        baseCtx({ viaSubagent: true, subagentId: 3 }),
+        { slug: 'wiki/random/escape', content: md },
+      );
+    } catch (e) {
+      threw = true;
+      const msg = e instanceof Error ? e.message : String(e);
+      expect(msg).toContain('wiki/agents/3/');
+    }
+    expect(threw).toBe(true);
+  });
+});