feat(v0.18.2.fork.1): manifest priority 5 — slug-prefix auto-routing

Adds resolveBySlugPrefix helper + 60s in-process TTL cache and slots it
into the resolveSourceId chain at priority 5 (between cwd-prefix and
brain-default). put_page handler now passes slug, so a Claude.ai write
of `memory-dashboard/foo` (with no source_id param) routes to the
memory-dashboard source automatically when that source declares
slug_prefix_rules: ['memory-dashboard/'] in its sources.config jsonb.

Resolution chain (revised):
  1. explicit --source / source_id param
  2. GBRAIN_SOURCE env var
  3. .gbrain-source dotfile (CWD walk-up)
  4. registered source local_path containing CWD
  5. NEW: manifest slug-prefix longest-match (caller passes slug)
  6. brain-level default (sources.default config)
  7. literal 'default'

Manifest semantics:
- Each source row's config.slug_prefix_rules: string[] (jsonb)
- Each rule: literal prefix ('memory-dashboard/') OR trailing-glob
  ('projects/*' which is normalized to literal 'projects/' since slug
  grammar treats '/' as a regular character, not a path separator)
- Longest literal match wins; ties break alphabetical on source.id
- Malformed jsonb safe-skip (continue, don't throw)
- 60s TTL cache; cross-process consistency comes from container
  restart (or future LISTEN/NOTIFY follow-up — see TODOS.md)

- source-resolver.ts: resolveBySlugPrefix + cache + __invalidateSlugPrefixCache
  (test helper) + extended resolveSourceId signature
- operations.ts put_page handler: passes slug into resolveSourceId

Tests:
- test/longest-prefix-match.test.ts (new): pure resolver — longest wins,
  alphabetical tie-break, multi-prefix per source, glob normalization,
  empty rules / no rules, cache hit/miss/invalidation
- test/manifest-routing.test.ts (new): end-to-end via put_page handler —
  slug→manifest routes, explicit source_id overrides, no-match fallback
  to brain-default, subagent slug carve-out (wiki/agents/), subagent
  escape rejection still enforced

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 22:19:30 +08:00
parent 18f2dcdbe5
commit 52092c64b1
4 changed files with 435 additions and 8 deletions

View File

@@ -273,13 +273,21 @@ const put_page: Operation = {
if (ctx.dryRun) return { dry_run: true, action: 'put_page', slug: p.slug };
// v0.18.0 Step 5: resolve target source. Explicit param wins; otherwise
// walk the standard chain (env > dotfile > cwd-prefix > brain-default >
// 'default'). resolveSourceId throws if explicit/env/dotfile point to a
// non-existent source — fail-fast rather than silently writing to a
// wrong row.
// v0.18.0 Step 5 + v0.18.2.fork.1 manifest: resolve target source.
// Explicit param wins; otherwise walk the chain (env > dotfile >
// cwd-prefix > manifest slug-prefix > brain-default > 'default'). Passing
// `slug` activates priority 5 (manifest) so a Claude.ai put_page with no
// source_id param routes via slug content (e.g. `memory-dashboard/foo` →
// memory-dashboard source if that source declares `--slug-prefix`
// 'memory-dashboard/'). resolveSourceId throws if explicit/env/dotfile
// point to a non-existent source — fail-fast rather than writing wrong row.
const { resolveSourceId } = await import('./source-resolver.ts');
const sourceId = await resolveSourceId(ctx.engine, (p.source_id as string | undefined) ?? null);
const sourceId = await resolveSourceId(
ctx.engine,
(p.source_id as string | undefined) ?? null,
undefined,
slug,
);
// Skip embedding when no OpenAI key is configured. importFromContent's existing
// try/catch around embed only catches; without a key the OpenAI client would

View File

@@ -61,6 +61,14 @@ export async function resolveSourceId(
engine: BrainEngine,
explicit: string | null | undefined,
cwd: string = process.cwd(),
/**
* v0.18.2.fork.1 — when provided, priority 5 (manifest slug-prefix match)
* fires between cwd-prefix and brain-default. When undefined (CLI commands
* without per-page context like plain `gbrain sync`), priority 5 is
* skipped entirely. This keeps slug-aware put_page behavior aware of
* manifest rules without forcing slug-less callers to invent a value.
*/
slug?: string,
): Promise<string> {
// 1. Explicit flag wins.
if (explicit) {
@@ -106,14 +114,24 @@ export async function resolveSourceId(
}
if (best) return best.id;
// 5. Brain-level default.
// 5. v0.18.2.fork.1 — manifest slug-prefix match (skipped when caller
// didn't pass a slug). Server-derived inference, ranked AFTER user-
// explicit signals (1-4) and BEFORE the brain-level default (6) so a
// bound dotfile / env / flag still wins, but a Claude.ai put_page with
// no source param routes correctly based on slug content.
if (slug) {
const manifestMatch = await resolveBySlugPrefix(engine, slug);
if (manifestMatch) return manifestMatch;
}
// 6. Brain-level default.
const globalDefault = await engine.getConfig('sources.default');
if (globalDefault && SOURCE_ID_RE.test(globalDefault)) {
await assertSourceExists(engine, globalDefault);
return globalDefault;
}
// 6. Fallback: the seeded 'default' source. Always exists post-migration
// 7. Fallback: the seeded 'default' source. Always exists post-migration
// v16 so this is a safe terminal.
return 'default';
}
@@ -132,6 +150,103 @@ async function assertSourceExists(engine: BrainEngine, id: string): Promise<void
}
}
/**
* v0.18.2.fork.1 — manifest convention: longest-prefix slug match.
*
* Each source row carries `config.slug_prefix_rules: string[]` (jsonb).
* Each rule is either a literal prefix ('memory-dashboard/') or a single-
* level glob ending in `*` ('projects/*' — which is functionally identical
* to literal prefix because slugs use '/' as a regular character, not a
* filesystem path separator).
*
* Resolution returns the source id whose rule literally-prefixes the slug,
* choosing the LONGEST literal match (glob `*` excluded from score). Ties
* break alphabetically on source id. No match returns null — caller falls
* through to the next priority in resolveSourceId's chain.
*
* Cross-process consistency: each gbrain process (CLI / MCP container / sync
* cron) holds its own in-process cache with 60s TTL. After
* `gbrain sources update --slug-prefix ...` runs from a CLI, the MCP
* container's cache is stale for ≤60s. To force-refresh: restart the
* container. Postgres LISTEN/NOTIFY would close the gap but is deferred
* (see TODOS.md "gbrain manifest cache cross-process invalidation").
*/
const SLUG_PREFIX_CACHE_TTL_MS = 60_000;
interface SlugPrefixCache {
rules: Array<{ id: string; prefixes: string[] }>;
expiresAt: number;
}
let slugPrefixCache: SlugPrefixCache | null = null;
/** Exposed for tests — clears the cache so timing-sensitive scenarios are deterministic. */
export function __invalidateSlugPrefixCache(): void {
slugPrefixCache = null;
}
async function loadSlugPrefixRules(
engine: BrainEngine,
): Promise<Array<{ id: string; prefixes: string[] }>> {
const now = Date.now();
if (slugPrefixCache && slugPrefixCache.expiresAt > now) {
return slugPrefixCache.rules;
}
const rows = await engine.executeRaw<{ id: string; config: string | Record<string, unknown> }>(
`SELECT id, config FROM sources`,
);
const rules: Array<{ id: string; prefixes: string[] }> = [];
for (const r of rows) {
let cfg: unknown;
try {
cfg = typeof r.config === 'string' ? JSON.parse(r.config) : r.config;
} catch {
continue; // Malformed jsonb — safe-skip per failure-modes table CG.
}
if (!cfg || typeof cfg !== 'object') continue;
const raw = (cfg as Record<string, unknown>).slug_prefix_rules;
if (!Array.isArray(raw)) continue;
const prefixes: string[] = [];
for (const item of raw) {
if (typeof item !== 'string') continue;
// Strip trailing single-level glob `*` (cosmetic only; literal-prefix
// semantics are identical because slug grammar treats '/' as a regular
// character, not a path separator).
const normalized = item.endsWith('*') ? item.slice(0, -1) : item;
if (normalized.length > 0) prefixes.push(normalized);
}
if (prefixes.length > 0) rules.push({ id: r.id, prefixes });
}
slugPrefixCache = { rules, expiresAt: now + SLUG_PREFIX_CACHE_TTL_MS };
return rules;
}
/**
* Returns the source id whose slug_prefix_rules has the longest literal
* prefix matching `slug`. Alphabetical tie-break on source id. Returns
* null when no rule matches.
*/
export async function resolveBySlugPrefix(
engine: BrainEngine,
slug: string,
): Promise<string | null> {
const rules = await loadSlugPrefixRules(engine);
let best: { id: string; score: number } | null = null;
for (const r of rules) {
for (const p of r.prefixes) {
if (slug.startsWith(p)) {
const score = p.length;
if (
!best ||
score > best.score ||
(score === best.score && r.id < best.id)
) {
best = { id: r.id, score };
}
}
}
}
return best ? best.id : null;
}
/** Exposed for tests. */
export const __testing = {
readDotfileWalk,

View File

@@ -0,0 +1,157 @@
/**
* v0.18.2.fork.1 — manifest slug-prefix matching algorithm tests.
*
* Pure resolver tests (no put_page handler). Verifies:
* - longest literal prefix wins over shorter overlapping rules
* - tie-break on prefix length goes alphabetical on source id
* - trailing single-level glob `*` is treated as literal prefix
* (cosmetic only, scored without the `*`)
* - empty rules / no rules / no match return null
* - cache is hit within 60s TTL, refetched after invalidation
*/
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
import {
resolveBySlugPrefix,
__invalidateSlugPrefixCache,
} from '../src/core/source-resolver.ts';
let engine: PGLiteEngine;
beforeAll(async () => {
engine = new PGLiteEngine();
await engine.connect({ type: 'pglite' } as never);
await engine.initSchema();
// Seed sources with overlapping prefix rules so all branch combinations
// are exercised.
await engine.executeRaw(
`INSERT INTO sources (id, name, config) VALUES
('alpha-design', 'alpha-design', '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb),
('beta-design', 'beta-design', '{"federated": true, "slug_prefix_rules": ["design/memory-dashboard/"]}'::jsonb),
('multi-prefix', 'multi-prefix', '{"federated": true, "slug_prefix_rules": ["projects/", "builder-journey"]}'::jsonb),
('glob-form', 'glob-form', '{"federated": true, "slug_prefix_rules": ["wedding-planning/*"]}'::jsonb),
('no-rules', 'no-rules', '{"federated": true}'::jsonb),
('empty-rules', 'empty-rules', '{"federated": true, "slug_prefix_rules": []}'::jsonb),
('alpha-tie', 'alpha-tie', '{"federated": true, "slug_prefix_rules": ["tied/"]}'::jsonb),
('zeta-tie', 'zeta-tie', '{"federated": true, "slug_prefix_rules": ["tied/"]}'::jsonb)
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
);
});
afterAll(async () => {
await engine.disconnect();
});
beforeEach(() => {
__invalidateSlugPrefixCache();
});
describe('Longest-prefix selection', () => {
test('longer rule wins over shorter overlapping rule', async () => {
// 'design/' (alpha-design, score 7) vs 'design/memory-dashboard/' (beta-design, score 24)
const result = await resolveBySlugPrefix(engine, 'design/memory-dashboard/notes/x');
expect(result).toBe('beta-design');
});
test('shorter prefix wins when longer rule does not match', async () => {
// 'design/stock-dashboard/x' matches alpha-design only (beta requires 'design/memory-dashboard/')
const result = await resolveBySlugPrefix(engine, 'design/stock-dashboard/x');
expect(result).toBe('alpha-design');
});
test('exact-prefix match (no trailing content) still routes', async () => {
const result = await resolveBySlugPrefix(engine, 'design/');
expect(result).toBe('alpha-design');
});
test('non-prefixed slug returns null (no match)', async () => {
const result = await resolveBySlugPrefix(engine, 'unrelated/random');
expect(result).toBeNull();
});
});
describe('Multi-prefix per source', () => {
test('first prefix in rules array matches → routes to that source', async () => {
const result = await resolveBySlugPrefix(engine, 'projects/foo/bar');
expect(result).toBe('multi-prefix');
});
test('second prefix in rules array matches → still routes to same source', async () => {
const result = await resolveBySlugPrefix(engine, 'builder-journey');
expect(result).toBe('multi-prefix');
});
});
describe('Glob `*` form', () => {
test('trailing `*` is normalized to literal prefix (same matching semantics)', async () => {
// Rule was 'wedding-planning/*'; should match same as 'wedding-planning/'.
const result = await resolveBySlugPrefix(engine, 'wedding-planning/budget');
expect(result).toBe('glob-form');
});
test('`*` does not change scoring — competing literal of same length still ties', async () => {
// 'wedding-planning/*' normalizes to 'wedding-planning/' (score 17).
// No other source has a longer match → glob-form wins.
const result = await resolveBySlugPrefix(engine, 'wedding-planning/X');
expect(result).toBe('glob-form');
});
});
describe('Tie-break: alphabetical on source id', () => {
test('two sources with identical prefix → alpha-tie wins (alphabetical)', async () => {
const result = await resolveBySlugPrefix(engine, 'tied/some-page');
expect(result).toBe('alpha-tie');
});
});
describe('Sources without rules / empty rules', () => {
test('source with no slug_prefix_rules key in config does not match anything', async () => {
// 'no-rules' source exists but has no rules → can't claim any slug.
const result = await resolveBySlugPrefix(engine, 'no-rules/x');
expect(result).toBeNull();
});
test('source with empty rules array does not match anything', async () => {
const result = await resolveBySlugPrefix(engine, 'empty-rules/x');
expect(result).toBeNull();
});
});
describe('Cache TTL behaviour', () => {
test('cache hit on second call within TTL — DB content change is invisible', async () => {
// First call populates cache.
const r1 = await resolveBySlugPrefix(engine, 'design/x');
expect(r1).toBe('alpha-design');
// Mutate sources directly without invalidating cache. resolver should
// still see cached snapshot.
await engine.executeRaw(
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": []}'::jsonb WHERE id = 'alpha-design'`,
);
const r2 = await resolveBySlugPrefix(engine, 'design/x');
expect(r2).toBe('alpha-design');
// Restore for next tests.
await engine.executeRaw(
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb WHERE id = 'alpha-design'`,
);
});
test('explicit invalidation forces refetch', async () => {
// Prime cache.
await resolveBySlugPrefix(engine, 'design/x');
// Mutate then invalidate.
await engine.executeRaw(
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": []}'::jsonb WHERE id = 'alpha-design'`,
);
__invalidateSlugPrefixCache();
const r = await resolveBySlugPrefix(engine, 'design/x');
expect(r).toBeNull();
// Restore.
await engine.executeRaw(
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb WHERE id = 'alpha-design'`,
);
});
});

View File

@@ -0,0 +1,147 @@
/**
* v0.18.2.fork.1 — manifest routing end-to-end via put_page handler.
*
* Verifies the full chain works through the operations layer:
*
* put_page(slug='memory-dashboard/foo', no source_id)
* → resolveSourceId(engine, null, cwd, 'memory-dashboard/foo')
* → manifest matches 'memory-dashboard/' prefix
* → page row's source_id = 'memory-dashboard'
*
* put_page(slug='memory-dashboard/foo', source_id='stock-dashboard')
* → explicit param wins, manifest skipped
*
* put_page(slug='random-content', no source_id, no manifest match)
* → falls to brain-default (config 'sources.default')
*
* Subagent slug routing is also exercised: writes from a subagent context
* use slug `wiki/agents/<id>/...`. When the manifest declares a rule for
* `wiki/agents/`, those writes get carved into a separate source.
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
import { operationsByName, type OperationContext } from '../src/core/operations.ts';
import { __invalidateSlugPrefixCache } from '../src/core/source-resolver.ts';
let engine: PGLiteEngine;
beforeAll(async () => {
engine = new PGLiteEngine();
await engine.connect({ type: 'pglite' } as never);
await engine.initSchema();
await engine.executeRaw(
`INSERT INTO sources (id, name, config) VALUES
('memory-dashboard', 'memory-dashboard', '{"federated": true, "slug_prefix_rules": ["memory-dashboard/"]}'::jsonb),
('stock-dashboard', 'stock-dashboard', '{"federated": true, "slug_prefix_rules": ["stock-dashboard/"]}'::jsonb),
('subagent-writes', 'subagent-writes', '{"federated": true, "slug_prefix_rules": ["wiki/agents/"]}'::jsonb),
('personal-knowledge', 'personal-knowledge', '{"federated": true}'::jsonb)
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
);
// Brain-level fallback target — set so unmanifested writes have a clear home.
await engine.setConfig('sources.default', 'personal-knowledge');
__invalidateSlugPrefixCache();
});
afterAll(async () => {
await engine.disconnect();
});
// Minimal OperationContext stub. Tests don't exercise config or logger
// branches in put_page; both can be cheap stubs.
const noopLogger = {
info: () => {},
warn: () => {},
error: () => {},
debug: () => {},
};
const stubConfig = {} as never; // GBrainConfig: tests don't read from it.
const baseCtx = (overrides: Partial<OperationContext> = {}): OperationContext => ({
engine,
config: stubConfig,
logger: noopLogger,
remote: false,
dryRun: false,
...overrides,
});
const md = `---
title: Test Page
type: note
---
Some body content.
`;
describe('Manifest routing — slug prefix → source', () => {
test('slug=memory-dashboard/foo, no source_id → routes to memory-dashboard', async () => {
await operationsByName.put_page.handler(baseCtx(), { slug: 'memory-dashboard/foo', content: md });
const rows = await engine.executeRaw<{ source_id: string }>(
`SELECT source_id FROM pages WHERE slug = 'memory-dashboard/foo'`,
);
expect(rows.length).toBe(1);
expect(rows[0].source_id).toBe('memory-dashboard');
});
test('slug=stock-dashboard/foo → routes to stock-dashboard', async () => {
await operationsByName.put_page.handler(baseCtx(), { slug: 'stock-dashboard/quote', content: md });
const rows = await engine.executeRaw<{ source_id: string }>(
`SELECT source_id FROM pages WHERE slug = 'stock-dashboard/quote'`,
);
expect(rows[0].source_id).toBe('stock-dashboard');
});
});
describe('Manifest routing — explicit source_id wins over manifest', () => {
test('slug=memory-dashboard/x with source_id=stock-dashboard → stock-dashboard', async () => {
await operationsByName.put_page.handler(
baseCtx(),
{ slug: 'memory-dashboard/manual-override', content: md, source_id: 'stock-dashboard' },
);
const rows = await engine.executeRaw<{ source_id: string }>(
`SELECT source_id FROM pages WHERE slug = 'memory-dashboard/manual-override' AND source_id = 'stock-dashboard'`,
);
expect(rows.length).toBe(1);
});
});
describe('Manifest routing — no match falls to brain-default', () => {
test('slug=random-thought → personal-knowledge (brain-level default)', async () => {
await operationsByName.put_page.handler(baseCtx(), { slug: 'random-thought', content: md });
const rows = await engine.executeRaw<{ source_id: string }>(
`SELECT source_id FROM pages WHERE slug = 'random-thought'`,
);
expect(rows[0].source_id).toBe('personal-knowledge');
});
});
describe('Manifest routing — subagent slug carve-out (wiki/agents/)', () => {
test('subagent put_page slug=wiki/agents/3/note → routes to subagent-writes via manifest', async () => {
await operationsByName.put_page.handler(
baseCtx({ viaSubagent: true, subagentId: 3 }),
{ slug: 'wiki/agents/3/note', content: md },
);
const rows = await engine.executeRaw<{ source_id: string }>(
`SELECT source_id FROM pages WHERE slug = 'wiki/agents/3/note'`,
);
expect(rows[0].source_id).toBe('subagent-writes');
});
test('subagent escape attempt (slug not under wiki/agents/<id>) is rejected', async () => {
let threw = false;
try {
await operationsByName.put_page.handler(
baseCtx({ viaSubagent: true, subagentId: 3 }),
{ slug: 'wiki/random/escape', content: md },
);
} catch (e) {
threw = true;
const msg = e instanceof Error ? e.message : String(e);
expect(msg).toContain('wiki/agents/3/');
}
expect(threw).toBe(true);
});
});