Compare commits
12 Commits
master
...
source-id-
| Author | SHA1 | Date | |
|---|---|---|---|
| 7221dad83b | |||
| 71aaf22573 | |||
| dfdf97748e | |||
| 5df6031adc | |||
| bb125e2baa | |||
| b951a8a8b3 | |||
| 37b9e8dca3 | |||
| 888fe26c24 | |||
| 676d4283c7 | |||
| 52092c64b1 | |||
| 18f2dcdbe5 | |||
| e5d94f63d9 |
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "gbrain",
|
||||
"version": "0.18.2",
|
||||
"version": "0.18.2-fork.1",
|
||||
"description": "Postgres-native personal knowledge brain with hybrid RAG search",
|
||||
"type": "module",
|
||||
"main": "src/core/index.ts",
|
||||
|
||||
@@ -7,6 +7,11 @@ import { importFile } from '../core/import-file.ts';
|
||||
import { loadConfig } from '../core/config.ts';
|
||||
import { createProgress } from '../core/progress.ts';
|
||||
import { getCliOptions, cliOptsToProgressOptions } from '../core/cli-options.ts';
|
||||
import { checkAllowlist } from '../core/allowlist-resolver.ts';
|
||||
import { resolveBySlugPrefix } from '../core/source-resolver.ts';
|
||||
import { pathToSlug } from '../core/sync.ts';
|
||||
|
||||
const IMPORT_MANIFEST_FALLBACK_SOURCE = 'default-ambiguous';
|
||||
|
||||
function defaultWorkers(): number {
|
||||
const cpuCount = cpus().length;
|
||||
@@ -28,7 +33,7 @@ export interface RunImportResult {
|
||||
failures: Array<{ path: string; error: string }>;
|
||||
}
|
||||
|
||||
export async function runImport(engine: BrainEngine, args: string[], opts: { commit?: string } = {}): Promise<RunImportResult> {
|
||||
export async function runImport(engine: BrainEngine, args: string[], opts: { commit?: string; sourceId?: string; manifestMode?: boolean } = {}): Promise<RunImportResult> {
|
||||
const noEmbed = args.includes('--no-embed');
|
||||
const fresh = args.includes('--fresh');
|
||||
const jsonOutput = args.includes('--json');
|
||||
@@ -94,8 +99,26 @@ export async function runImport(engine: BrainEngine, args: string[], opts: { com
|
||||
|
||||
async function processFile(eng: BrainEngine, filePath: string) {
|
||||
const relativePath = relative(dir, filePath);
|
||||
// Patch #2 (Gap 4): allowlist gate. STRICT mode when `.gbrain-allowlist`
|
||||
// present at dir or any ancestor; lenient when absent (backward compat).
|
||||
const allowDecision = checkAllowlist(dir, relativePath);
|
||||
if (!allowDecision.allowed) {
|
||||
skipped++;
|
||||
processed++;
|
||||
tickProgress();
|
||||
return;
|
||||
}
|
||||
// Patch #3 (Gap 7 D13): in manifest mode, dispatch per-file via
|
||||
// `resolveBySlugPrefix(engine, slug)`. Slug-prefix no-match falls back to
|
||||
// `default-ambiguous` (tombstone), per design plan.
|
||||
let fileSourceId: string | undefined = opts.sourceId;
|
||||
if (opts.manifestMode) {
|
||||
const fileSlug = pathToSlug(relativePath);
|
||||
const matched = await resolveBySlugPrefix(eng, fileSlug);
|
||||
fileSourceId = matched ?? IMPORT_MANIFEST_FALLBACK_SOURCE;
|
||||
}
|
||||
try {
|
||||
const result = await importFile(eng, filePath, relativePath, { noEmbed });
|
||||
const result = await importFile(eng, filePath, relativePath, { noEmbed, sourceId: fileSourceId });
|
||||
if (result.status === 'imported') {
|
||||
imported++;
|
||||
chunksCreated += result.chunks;
|
||||
@@ -213,12 +236,13 @@ export async function runImport(engine: BrainEngine, args: string[], opts: { com
|
||||
console.log(` ${chunksCreated} chunks created`);
|
||||
}
|
||||
|
||||
// Log the ingest
|
||||
// Log the ingest (v0.18.2.fork.1: scope to opts.sourceId when provided)
|
||||
await engine.logIngest({
|
||||
source_type: 'directory',
|
||||
source_ref: dir,
|
||||
pages_updated: importedSlugs,
|
||||
summary: `Imported ${imported} pages, ${skipped} skipped, ${chunksCreated} chunks`,
|
||||
source_id: opts.sourceId,
|
||||
});
|
||||
|
||||
// Import → sync continuity: write sync checkpoint if this is a git repo.
|
||||
|
||||
@@ -40,6 +40,64 @@ function validateSourceId(id: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
// v0.18.2.fork.1 — manifest prefix grammar.
|
||||
//
|
||||
// A slug-prefix rule is one of:
|
||||
// - Literal prefix: 'memory-dashboard/' (most common)
|
||||
// - Single-level glob: 'wedding-planning/*' (cosmetic; same as literal)
|
||||
//
|
||||
// Rules:
|
||||
// - Allowed chars: lowercase a-z, 0-9, '-' (hyphen), '_' (underscore), '/' (slash)
|
||||
// - Optional single trailing '*' (no other position)
|
||||
// - Length 1..64 (excluding the trailing '*')
|
||||
// - Reject uppercase, whitespace, mid-string '*', multi-level globs
|
||||
// ('**'), any other punctuation
|
||||
//
|
||||
// Why fail-fast at write time: a typo'd rule writes successfully into
|
||||
// config_jsonb but never matches anything at routing time — the put_page
|
||||
// silently falls to brain-default. Catching at CLI-write moment surfaces
|
||||
// the typo before bad data lands.
|
||||
//
|
||||
// Underscore note (v0.18.2.fork.1 Phase 6 fix): chezmoi-managed prefixes
|
||||
// like `dot_claude/` are legitimate slugs — chezmoi conventionally maps
|
||||
// `~/.claude/` → `dot_claude/` in source. Earlier draft of this validator
|
||||
// rejected underscores, which broke Phase 4 source taxonomy. Underscore
|
||||
// is now first-class.
|
||||
const SLUG_PREFIX_RULE_RE = /^[a-z0-9_](?:[a-z0-9_\-/]{0,62}[a-z0-9_\-/])?\*?$/;
|
||||
function validateSlugPrefix(rule: string): void {
|
||||
if (!rule || rule.length === 0) {
|
||||
throw new Error('Empty slug-prefix rule. Each --slug-prefix entry must be a non-empty string.');
|
||||
}
|
||||
if (rule.length > 64) {
|
||||
throw new Error(`Slug-prefix rule too long (${rule.length} chars, max 64): "${rule}"`);
|
||||
}
|
||||
if (rule.includes('**')) {
|
||||
throw new Error(`Multi-level glob ('**') not supported: "${rule}". Use a literal prefix or trailing single '*'.`);
|
||||
}
|
||||
// '*' permitted only as the final character.
|
||||
const starIdx = rule.indexOf('*');
|
||||
if (starIdx !== -1 && starIdx !== rule.length - 1) {
|
||||
throw new Error(`'*' may only appear as the final character: "${rule}". For a literal prefix, drop the '*'.`);
|
||||
}
|
||||
if (!SLUG_PREFIX_RULE_RE.test(rule)) {
|
||||
throw new Error(
|
||||
`Invalid slug-prefix rule "${rule}". ` +
|
||||
`Must be lowercase a-z, 0-9, '-', '/', optionally ending in '*'. ` +
|
||||
`Reject: underscores, uppercase, whitespace, other punctuation.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function parseSlugPrefixFlag(value: string): string[] {
|
||||
// Comma-separated list. Each item validated independently.
|
||||
const parts = value.split(',').map(s => s.trim()).filter(s => s.length > 0);
|
||||
if (parts.length === 0) {
|
||||
throw new Error('--slug-prefix value is empty after parsing. Provide one or more comma-separated rules.');
|
||||
}
|
||||
for (const p of parts) validateSlugPrefix(p);
|
||||
return parts;
|
||||
}
|
||||
|
||||
// ── Types ───────────────────────────────────────────────────
|
||||
|
||||
interface SourceRow {
|
||||
@@ -59,6 +117,8 @@ interface SourceListEntry {
|
||||
federated: boolean;
|
||||
page_count: number;
|
||||
last_sync_at: string | null;
|
||||
/** v0.18.2.fork.1 — surfaces config.slug_prefix_rules when set. */
|
||||
slug_prefix_rules?: string[];
|
||||
}
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────────
|
||||
@@ -98,7 +158,7 @@ async function countPages(engine: BrainEngine, sourceId: string): Promise<number
|
||||
async function runAdd(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
const id = args[0];
|
||||
if (!id) {
|
||||
console.error('Usage: gbrain sources add <id> --path <path> [--name <display>] [--federated|--no-federated]');
|
||||
console.error('Usage: gbrain sources add <id> [--path <path>] [--name <display>] [--federated|--no-federated] [--slug-prefix \'<rule>,<rule>\']');
|
||||
process.exit(2);
|
||||
}
|
||||
validateSourceId(id);
|
||||
@@ -106,6 +166,7 @@ async function runAdd(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
let localPath: string | null = null;
|
||||
let displayName = id;
|
||||
let federated: boolean | null = null; // null = default (false for new, opt-in via --federated)
|
||||
let slugPrefixRules: string[] | null = null;
|
||||
|
||||
for (let i = 1; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
@@ -113,6 +174,7 @@ async function runAdd(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
if (a === '--name') { displayName = args[++i]; continue; }
|
||||
if (a === '--federated') { federated = true; continue; }
|
||||
if (a === '--no-federated') { federated = false; continue; }
|
||||
if (a === '--slug-prefix') { slugPrefixRules = parseSlugPrefixFlag(args[++i]); continue; }
|
||||
console.error(`Unknown flag: ${a}`);
|
||||
process.exit(2);
|
||||
}
|
||||
@@ -138,10 +200,23 @@ async function runAdd(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
const config = federated === null ? {} : { federated };
|
||||
const config: Record<string, unknown> = {};
|
||||
if (federated !== null) config.federated = federated;
|
||||
if (slugPrefixRules) config.slug_prefix_rules = slugPrefixRules;
|
||||
|
||||
// Double cast `($4::text)::jsonb` is intentional: postgres-js's `unsafe()` API
|
||||
// detects `$N::jsonb` casts on string params and re-stringifies the value as a
|
||||
// JSON-encoded literal, which Postgres then stores as a JSON STRING scalar
|
||||
// (jsonb_typeof = 'string'). The text cast forces postgres-js to send the
|
||||
// param verbatim as TEXT, then SQL re-parses to a jsonb object at the column
|
||||
// boundary. PGLite is unaffected (its driver doesn't have postgres-js's
|
||||
// auto-encoding) but the double cast is a no-op there. Verified empirically
|
||||
// on D-LXC fixture 189 (2026-05-07): variant 1 (current pattern) → string,
|
||||
// variant 4 (this fix) → object. See migration v26 step 0 for the matching
|
||||
// healing path that recovers existing string-encoded prod data.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, local_path, config)
|
||||
VALUES ($1, $2, $3, $4::jsonb)
|
||||
VALUES ($1, $2, $3, ($4::text)::jsonb)
|
||||
ON CONFLICT (id) DO NOTHING`,
|
||||
[id, displayName, localPath, JSON.stringify(config)],
|
||||
);
|
||||
@@ -154,6 +229,9 @@ async function runAdd(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
const fed = isFederated(created.config);
|
||||
console.log(`Created source "${id}"${displayName !== id ? ` (name: ${displayName})` : ''}${localPath ? ` → ${localPath}` : ''}`);
|
||||
console.log(` federated: ${fed}${fed ? ' — appears in cross-source default search' : ' — only searched when explicitly named via --source'}`);
|
||||
if (slugPrefixRules) {
|
||||
console.log(` slug_prefix_rules: ${slugPrefixRules.join(', ')}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Subcommand: list ────────────────────────────────────────
|
||||
@@ -169,6 +247,10 @@ async function runList(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
const entries: SourceListEntry[] = [];
|
||||
for (const r of rows) {
|
||||
const pageCount = await countPages(engine, r.id);
|
||||
const cfg = parseConfig(r.config);
|
||||
const rules = Array.isArray(cfg.slug_prefix_rules)
|
||||
? (cfg.slug_prefix_rules as unknown[]).filter(x => typeof x === 'string') as string[]
|
||||
: undefined;
|
||||
entries.push({
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
@@ -176,6 +258,7 @@ async function runList(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
federated: isFederated(r.config),
|
||||
page_count: pageCount,
|
||||
last_sync_at: r.last_sync_at ? new Date(r.last_sync_at).toISOString() : null,
|
||||
...(rules && rules.length > 0 ? { slug_prefix_rules: rules } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -193,6 +276,9 @@ async function runList(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
const sync = e.last_sync_at ? `last sync ${e.last_sync_at}` : 'never synced';
|
||||
console.log(` ${e.id.padEnd(20)} ${fedMark.padEnd(10)} ${String(e.page_count).padStart(6)} pages ${sync}`);
|
||||
if (e.local_path) console.log(` ${' '.repeat(22)}${pathStr}`);
|
||||
if (e.slug_prefix_rules && e.slug_prefix_rules.length > 0) {
|
||||
console.log(` ${' '.repeat(22)}slug-prefix: ${e.slug_prefix_rules.join(', ')}`);
|
||||
}
|
||||
}
|
||||
if (entries.length === 0) console.log(' (no sources registered)');
|
||||
}
|
||||
@@ -317,13 +403,95 @@ async function runFederate(engine: BrainEngine, args: string[], value: boolean):
|
||||
}
|
||||
const config = parseConfig(src.config);
|
||||
config.federated = value;
|
||||
// Double cast `($1::text)::jsonb` matches the runAdd path. See comment there
|
||||
// for full rationale (postgres-js auto-jsonb-encoding bug). Without this
|
||||
// cast the UPDATE writes a JSON string scalar (jsonb_typeof = 'string')
|
||||
// and any subsequent migration using jsonb_set throws SQLSTATE 22023.
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET config = $1::jsonb WHERE id = $2`,
|
||||
`UPDATE sources SET config = ($1::text)::jsonb WHERE id = $2`,
|
||||
[JSON.stringify(config), id],
|
||||
);
|
||||
console.log(`Source "${id}" is now ${value ? 'federated (appears in cross-source default search)' : 'isolated (only searched when explicitly named)'}.`);
|
||||
}
|
||||
|
||||
// ── Subcommand: update (v0.18.2.fork.1) ─────────────────────
|
||||
//
|
||||
// Mutates fields on an existing source in-place. Currently supports:
|
||||
// --slug-prefix '<rule>,<rule>' Replace config.slug_prefix_rules
|
||||
// --slug-prefix '' Clear all prefix rules
|
||||
//
|
||||
// Future flags can be slotted in (e.g. --read-boost / --preprocessor)
|
||||
// once the manifest projection model is implemented (PR #2+ per design
|
||||
// doc Open Q #4). Update is intentionally additive on the config_jsonb
|
||||
// blob, not destructive on other keys (federated, etc. remain).
|
||||
|
||||
async function runUpdate(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
const id = args[0];
|
||||
if (!id) {
|
||||
console.error("Usage: gbrain sources update <id> --slug-prefix '<rule>,<rule>'");
|
||||
process.exit(2);
|
||||
}
|
||||
validateSourceId(id);
|
||||
|
||||
const existing = await fetchSource(engine, id);
|
||||
if (!existing) {
|
||||
console.error(`Source "${id}" does not exist. Run 'gbrain sources add ${id}' first.`);
|
||||
process.exit(3);
|
||||
}
|
||||
|
||||
let slugPrefixRules: string[] | null = null;
|
||||
let slugPrefixSet = false;
|
||||
|
||||
for (let i = 1; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
if (a === '--slug-prefix') {
|
||||
const value = args[++i];
|
||||
if (value === '') {
|
||||
// Explicit clear.
|
||||
slugPrefixRules = [];
|
||||
} else {
|
||||
slugPrefixRules = parseSlugPrefixFlag(value);
|
||||
}
|
||||
slugPrefixSet = true;
|
||||
continue;
|
||||
}
|
||||
console.error(`Unknown flag: ${a}`);
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
if (!slugPrefixSet) {
|
||||
console.error('Nothing to update. Pass --slug-prefix to set/clear manifest rules.');
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const config = parseConfig(existing.config);
|
||||
if (slugPrefixRules && slugPrefixRules.length > 0) {
|
||||
config.slug_prefix_rules = slugPrefixRules;
|
||||
} else {
|
||||
delete config.slug_prefix_rules;
|
||||
}
|
||||
|
||||
// Double cast `($1::text)::jsonb` matches the runAdd path. See comment there
|
||||
// for full rationale (postgres-js auto-jsonb-encoding bug). Without this
|
||||
// cast the UPDATE writes a JSON string scalar (jsonb_typeof = 'string')
|
||||
// and any subsequent migration using jsonb_set throws SQLSTATE 22023.
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET config = ($1::text)::jsonb WHERE id = $2`,
|
||||
[JSON.stringify(config), id],
|
||||
);
|
||||
|
||||
if (slugPrefixRules && slugPrefixRules.length > 0) {
|
||||
console.log(`Updated source "${id}": slug_prefix_rules = ${slugPrefixRules.join(', ')}`);
|
||||
} else {
|
||||
console.log(`Updated source "${id}": slug_prefix_rules cleared`);
|
||||
}
|
||||
console.log(
|
||||
' Note: cache TTL ~60s — other gbrain processes (MCP container, sync cron) ' +
|
||||
'see the new rules within 60s. Restart with `docker compose restart gbrain-mcp` ' +
|
||||
'for immediate effect.',
|
||||
);
|
||||
}
|
||||
|
||||
// ── Dispatcher ──────────────────────────────────────────────
|
||||
|
||||
export async function runSources(engine: BrainEngine, args: string[]): Promise<void> {
|
||||
@@ -332,6 +500,7 @@ export async function runSources(engine: BrainEngine, args: string[]): Promise<v
|
||||
|
||||
switch (sub) {
|
||||
case 'add': return runAdd(engine, rest);
|
||||
case 'update': return runUpdate(engine, rest);
|
||||
case 'list': return runList(engine, rest);
|
||||
case 'remove': return runRemove(engine, rest);
|
||||
case 'rename': return runRename(engine, rest);
|
||||
@@ -353,12 +522,23 @@ export async function runSources(engine: BrainEngine, args: string[]): Promise<v
|
||||
}
|
||||
|
||||
function printHelp(): void {
|
||||
console.log(`gbrain sources — manage multi-source brain configuration (v0.18.0)
|
||||
console.log(`gbrain sources — manage multi-source brain configuration (v0.18.0 + v0.18.2.fork.1 manifest)
|
||||
|
||||
Subcommands:
|
||||
add <id> --path <p> [--name <n>] [--federated|--no-federated]
|
||||
Register a new source.
|
||||
list [--json] List registered sources with page counts.
|
||||
add <id> [--path <p>] [--name <n>] [--federated|--no-federated]
|
||||
[--slug-prefix '<rule>,<rule>']
|
||||
Register a new source. --slug-prefix
|
||||
enables manifest auto-routing: a put_page
|
||||
whose slug starts with one of these rules
|
||||
routes here automatically (priority 5 in
|
||||
the resolveSourceId chain).
|
||||
update <id> --slug-prefix '<rule>,<rule>'
|
||||
Replace manifest rules on an existing
|
||||
source. Pass empty string to clear.
|
||||
Other gbrain processes pick up the change
|
||||
within ~60s (cache TTL).
|
||||
list [--json] List registered sources with page counts
|
||||
and slug_prefix_rules.
|
||||
remove <id> [--yes] [--dry-run] Cascade-delete a source and its pages.
|
||||
rename <id> <new-name> Rename display name (id is immutable).
|
||||
default <id> Set the brain-level default source.
|
||||
@@ -368,5 +548,12 @@ Subcommands:
|
||||
unfederate <id> Isolate source from default search.
|
||||
|
||||
Source id: [a-z0-9-]{1,32}. Immutable citation key.
|
||||
|
||||
Slug-prefix rule grammar:
|
||||
- lowercase a-z, 0-9, '-', '/' allowed
|
||||
- optional single trailing '*'
|
||||
- max 64 chars
|
||||
- reject: underscores, uppercase, whitespace, mid-string '*', '**'
|
||||
examples: memory-dashboard/ projects/ wedding-planning/*
|
||||
`);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { existsSync } from 'fs';
|
||||
import { existsSync, readFileSync } from 'fs';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { join, relative } from 'path';
|
||||
import type { BrainEngine } from '../core/engine.ts';
|
||||
@@ -14,6 +14,8 @@ import {
|
||||
import type { SyncManifest } from '../core/sync.ts';
|
||||
import { createProgress } from '../core/progress.ts';
|
||||
import { getCliOptions, cliOptsToProgressOptions } from '../core/cli-options.ts';
|
||||
import { checkAllowlist } from '../core/allowlist-resolver.ts';
|
||||
import { resolveBySlugPrefix } from '../core/source-resolver.ts';
|
||||
|
||||
export interface SyncResult {
|
||||
status: 'up_to_date' | 'synced' | 'first_sync' | 'dry_run' | 'blocked_by_failures';
|
||||
@@ -49,6 +51,44 @@ export interface SyncOpts {
|
||||
* pre-v0.17 global-config path unchanged.
|
||||
*/
|
||||
sourceId?: string;
|
||||
/**
|
||||
* v0.18.2.fork.1 Patch #3 (Gap 7 D13) — manifest mode. When true, sourceId
|
||||
* stays undefined at sync session level; per-file sourceId is resolved via
|
||||
* `resolveBySlugPrefix(engine, slug)` before each importFile call. Triggered
|
||||
* when the repo's `.gbrain-source` content is the literal sentinel
|
||||
* "MANIFEST" (case-sensitive). See runSync below for detection.
|
||||
*/
|
||||
manifestMode?: boolean;
|
||||
}
|
||||
|
||||
const MANIFEST_SENTINEL = 'MANIFEST';
|
||||
const MANIFEST_FALLBACK_SOURCE = 'default-ambiguous';
|
||||
|
||||
function readManifestSentinel(repoPath: string): boolean {
|
||||
// Mirrors source-resolver.ts readDotfileWalk but reads the raw content
|
||||
// without applying SOURCE_ID_RE — the sentinel is intentionally not a
|
||||
// valid source id.
|
||||
const candidate = join(repoPath, '.gbrain-source');
|
||||
if (!existsSync(candidate)) return false;
|
||||
try {
|
||||
const content = readFileSync(candidate, 'utf-8').trim().split('\n')[0].trim();
|
||||
return content === MANIFEST_SENTINEL;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveManifestSource(
|
||||
engine: BrainEngine,
|
||||
slug: string,
|
||||
): Promise<string> {
|
||||
// Patch #3: per-file dispatch via manifest priority 5 only. Skip priorities
|
||||
// 1-4 (explicit/env/dotfile/cwd-prefix) — they were already considered at
|
||||
// session level; falling back to them per-file would mis-attribute pages
|
||||
// whose cwd-prefix matches the wrong source. Slug-prefix no-match lands in
|
||||
// `default-ambiguous` (tombstone) per design plan.
|
||||
const matched = await resolveBySlugPrefix(engine, slug);
|
||||
return matched ?? MANIFEST_FALLBACK_SOURCE;
|
||||
}
|
||||
|
||||
function git(repoPath: string, ...args: string[]): string {
|
||||
@@ -169,6 +209,22 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
|
||||
|
||||
// No changes
|
||||
if (lastCommit === headCommit) {
|
||||
// v0.18.2.fork.1: advance last_sync_at even on up-to-date sync. Pre-fix
|
||||
// this branch returned without touching the source row, so quiet sources
|
||||
// (no commits since last sync) kept stale last_sync_at forever and the
|
||||
// drift monitor (gbrain-projects-drift.sh) would falsely flag them as
|
||||
// stale. Drift's contract is "is the sync cron alive?", not "did the
|
||||
// remote add commits?" — record the successful sync attempt regardless.
|
||||
// Only last_sync_at advances here; last_commit is untouched (no semantic
|
||||
// change to commit anchor). engine.setConfig still records the global
|
||||
// sync.last_run for legacy non-sourceId callers.
|
||||
if (opts.sourceId) {
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET last_sync_at = now() WHERE id = $1`,
|
||||
[opts.sourceId],
|
||||
);
|
||||
}
|
||||
await engine.setConfig('sync.last_run', new Date().toISOString());
|
||||
return {
|
||||
status: 'up_to_date',
|
||||
fromCommit: lastCommit,
|
||||
@@ -276,6 +332,15 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
|
||||
for (const { from, to } of filtered.renamed) {
|
||||
const oldSlug = pathToSlug(from);
|
||||
const newSlug = pathToSlug(to);
|
||||
// Patch #2: allowlist gate. If newPath fails the repo's `.gbrain-allowlist`,
|
||||
// treat the rename as a delete of oldSlug — keeping a stale page under a
|
||||
// path the curator no longer wants indexed would defeat the allowlist.
|
||||
const allowDecision = checkAllowlist(repoPath, to);
|
||||
if (!allowDecision.allowed) {
|
||||
try { await engine.deletePage(oldSlug); } catch { /* may not exist */ }
|
||||
progress.tick(1, `allowlist-skipped:${to}`);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
await engine.updateSlug(oldSlug, newSlug);
|
||||
} catch {
|
||||
@@ -284,7 +349,11 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
|
||||
// Reimport at new path (picks up content changes)
|
||||
const filePath = join(repoPath, to);
|
||||
if (existsSync(filePath)) {
|
||||
const result = await importFile(engine, filePath, to, { noEmbed });
|
||||
// Patch #3: in manifest mode, dispatch per-file via slug-prefix.
|
||||
const fileSourceId = opts.manifestMode
|
||||
? await resolveManifestSource(engine, newSlug)
|
||||
: opts.sourceId;
|
||||
const result = await importFile(engine, filePath, to, { noEmbed, sourceId: fileSourceId });
|
||||
if (result.status === 'imported') chunksCreated += result.chunks;
|
||||
}
|
||||
pagesAffected.push(newSlug);
|
||||
@@ -317,8 +386,19 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
|
||||
progress.tick(1, `skip:${path}`);
|
||||
continue;
|
||||
}
|
||||
// Patch #2: allowlist gate. Curator-listed knowledge surface only.
|
||||
const allowDecision = checkAllowlist(repoPath, path);
|
||||
if (!allowDecision.allowed) {
|
||||
progress.tick(1, `allowlist-skipped:${path}`);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const result = await importFile(engine, filePath, path, { noEmbed });
|
||||
// Patch #3: in manifest mode, dispatch per-file via slug-prefix.
|
||||
const fileSlug = pathToSlug(path);
|
||||
const fileSourceId = opts.manifestMode
|
||||
? await resolveManifestSource(engine, fileSlug)
|
||||
: opts.sourceId;
|
||||
const result = await importFile(engine, filePath, path, { noEmbed, sourceId: fileSourceId });
|
||||
if (result.status === 'imported') {
|
||||
chunksCreated += result.chunks;
|
||||
pagesAffected.push(result.slug);
|
||||
@@ -387,6 +467,7 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
|
||||
source_ref: `${repoPath} @ ${headCommit.slice(0, 8)}`,
|
||||
pages_updated: pagesAffected,
|
||||
summary: `Sync: +${filtered.added.length} ~${filtered.modified.length} -${filtered.deleted.length} R${filtered.renamed.length}, ${chunksCreated} chunks, ${elapsed}ms`,
|
||||
source_id: opts.sourceId,
|
||||
});
|
||||
|
||||
// Auto-extract links + timeline (always, extraction is cheap CPU)
|
||||
@@ -467,7 +548,11 @@ async function performFullSync(
|
||||
const { runImport } = await import('./import.ts');
|
||||
const importArgs = [repoPath];
|
||||
if (opts.noEmbed) importArgs.push('--no-embed');
|
||||
const result = await runImport(engine, importArgs, { commit: headCommit });
|
||||
const result = await runImport(engine, importArgs, {
|
||||
commit: headCommit,
|
||||
sourceId: opts.sourceId,
|
||||
manifestMode: opts.manifestMode,
|
||||
});
|
||||
|
||||
// Bug 9 — gate the full-sync bookmark on success. runImport already
|
||||
// writes its own sync.last_commit conditionally (import.ts), but
|
||||
@@ -542,17 +627,42 @@ export async function runSync(engine: BrainEngine, args: string[]) {
|
||||
const skipFailed = args.includes('--skip-failed');
|
||||
const retryFailed = args.includes('--retry-failed');
|
||||
|
||||
// v0.18.0 Step 5: --source resolves to a sources(id) row. Falls back
|
||||
// to pre-v0.17 global config (sync.repo_path + sync.last_commit) when
|
||||
// no flag, no env, no dotfile is present.
|
||||
// v0.18.0 Step 5: --source resolves to a sources(id) row. Resolution is
|
||||
// unconditional now so .gbrain-source dotfile + cwd-prefix branches fire
|
||||
// for plain `gbrain sync` (no flag, no env). Backward compat for pre-v0.17
|
||||
// brains: when no user signal is present and the resolver returns the
|
||||
// literal 'default' fallback, drop sourceId to undefined so the legacy
|
||||
// global config path (sync.repo_path + sync.last_commit) is used instead
|
||||
// of the per-source anchor on the 'default' row (which may be NULL on
|
||||
// brains that haven't been migrated to per-source state yet).
|
||||
//
|
||||
// v0.18.2.fork.1 Patch #3 (Gap 7 D13): manifest mode. When `--source` is
|
||||
// absent AND the repo's `.gbrain-source` content is the literal sentinel
|
||||
// "MANIFEST" (case-sensitive uppercase), enter manifest mode: sourceId
|
||||
// stays undefined at session level; per-file dispatch happens in
|
||||
// performSync's import loops via `resolveBySlugPrefix`. Detection runs
|
||||
// BEFORE resolveSourceId because `MANIFEST` fails SOURCE_ID_RE and would
|
||||
// otherwise be silently skipped by the dotfile walk.
|
||||
const explicitSource = args.find((a, i) => args[i - 1] === '--source') || null;
|
||||
let sourceId: string | undefined = undefined;
|
||||
if (explicitSource || process.env.GBRAIN_SOURCE) {
|
||||
const cwdForResolve = repoPath || process.cwd();
|
||||
const manifestMode =
|
||||
!explicitSource &&
|
||||
!process.env.GBRAIN_SOURCE &&
|
||||
readManifestSentinel(cwdForResolve);
|
||||
let sourceId: string | undefined;
|
||||
if (manifestMode) {
|
||||
sourceId = undefined;
|
||||
} else {
|
||||
const { resolveSourceId } = await import('../core/source-resolver.ts');
|
||||
sourceId = await resolveSourceId(engine, explicitSource);
|
||||
if (!explicitSource && !process.env.GBRAIN_SOURCE && sourceId === 'default') {
|
||||
sourceId = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const opts: SyncOpts = { repoPath, dryRun, full, noPull, noEmbed, skipFailed, retryFailed, sourceId };
|
||||
const opts: SyncOpts = {
|
||||
repoPath, dryRun, full, noPull, noEmbed, skipFailed, retryFailed, sourceId, manifestMode,
|
||||
};
|
||||
|
||||
// Bug 9 — --retry-failed: before running normal sync, clear acknowledgment
|
||||
// flags so the sync picks them up as fresh work. The actual re-attempt
|
||||
|
||||
194
src/core/allowlist-resolver.ts
Normal file
194
src/core/allowlist-resolver.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — Patch #2 (Gap 4): native `.gbrain-allowlist` enforcement.
|
||||
*
|
||||
* Each source-bearing repo can declare a curated knowledge surface in a
|
||||
* `.gbrain-allowlist` file at its root. Every file path the import / sync
|
||||
* pipelines consider — and every MCP put_page that resolves to a source with
|
||||
* a local_path — is checked against that allowlist before chunks + embeddings
|
||||
* are spent on it.
|
||||
*
|
||||
* Behavior:
|
||||
* - File present → STRICT mode: only paths matched by an allow rule pass.
|
||||
* No-match files are skipped (sync) or rejected (MCP put_page).
|
||||
* - File absent → LENIENT mode: allow all (backward compat, EC-9 fallback).
|
||||
*
|
||||
* Glob semantics (matches the existing gstack-brain-sync grammar):
|
||||
* `**` match zero or more chars including `/`
|
||||
* `*` match zero or more chars excluding `/`
|
||||
* `?` match exactly one char excluding `/`
|
||||
* `# ...` line comment
|
||||
* `!pattern` exclusion (rsync-style); LATER rule wins on overlap
|
||||
*
|
||||
* Cache: 60s TTL keyed by repo root. Stale entries cost up to one tick of
|
||||
* cron-driven sync; cross-process invalidation is acceptable lag (same
|
||||
* tradeoff as the manifest slug-prefix cache in source-resolver.ts).
|
||||
*/
|
||||
|
||||
import { readFileSync, existsSync } from 'fs';
|
||||
import { join, dirname, resolve } from 'path';
|
||||
|
||||
const ALLOWLIST_FILE = '.gbrain-allowlist';
|
||||
const CACHE_TTL_MS = 60_000;
|
||||
|
||||
interface CompiledRule {
|
||||
pattern: string;
|
||||
regex: RegExp;
|
||||
negate: boolean;
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
/** null = no allowlist file found at this root; lenient mode applies. */
|
||||
rules: CompiledRule[] | null;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
const cache = new Map<string, CacheEntry>();
|
||||
|
||||
/** Test-only: clear the cache between scenarios. */
|
||||
export function __invalidateAllowlistCache(): void {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
function findAllowlistFile(startDir: string): { path: string; root: string } | null {
|
||||
let dir = resolve(startDir);
|
||||
for (let i = 0; i < 50; i++) {
|
||||
const candidate = join(dir, ALLOWLIST_FILE);
|
||||
if (existsSync(candidate)) return { path: candidate, root: dir };
|
||||
const parent = dirname(dir);
|
||||
if (parent === dir) break;
|
||||
dir = parent;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function globToRegex(pattern: string): RegExp {
|
||||
// Single-pass tokenizer with gitignore-style semantics:
|
||||
// `**/` zero or more dir segments (so `docs/**/*.md` matches
|
||||
// `docs/foo.md` AND `docs/sub/foo.md`)
|
||||
// `/**` trailing — match this dir and all descendants
|
||||
// `**` bare middle/standalone — match anything (rare)
|
||||
// `*` match any chars excluding `/`
|
||||
// `?` match exactly one char excluding `/`
|
||||
// No character-class support; brackets are escaped as literals.
|
||||
const tokens: string[] = [];
|
||||
let i = 0;
|
||||
while (i < pattern.length) {
|
||||
const c = pattern[i];
|
||||
if (c === '*' && pattern[i + 1] === '*') {
|
||||
if (pattern[i + 2] === '/') {
|
||||
// **/ → zero-or-more dir segments. Empty match → top-level.
|
||||
tokens.push('(?:[^/]+/)*');
|
||||
i += 3;
|
||||
} else if (i + 2 === pattern.length && pattern[i - 1] === '/') {
|
||||
// trailing /** → already-consumed `/` + all descendants
|
||||
// (the `/` was emitted as a literal token already).
|
||||
tokens.push('.*');
|
||||
i += 2;
|
||||
} else {
|
||||
// bare ** in middle without trailing slash — match anything
|
||||
tokens.push('.*');
|
||||
i += 2;
|
||||
}
|
||||
} else if (c === '*') {
|
||||
tokens.push('[^/]*');
|
||||
i++;
|
||||
} else if (c === '?') {
|
||||
tokens.push('[^/]');
|
||||
i++;
|
||||
} else if ('.+^${}()|\\[]'.includes(c)) {
|
||||
tokens.push('\\' + c);
|
||||
i++;
|
||||
} else {
|
||||
tokens.push(c);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return new RegExp('^' + tokens.join('') + '$');
|
||||
}
|
||||
|
||||
function loadAllowlistRules(repoRoot: string): CompiledRule[] | null {
|
||||
const now = Date.now();
|
||||
const rootKey = resolve(repoRoot);
|
||||
const cached = cache.get(rootKey);
|
||||
if (cached && cached.expiresAt > now) return cached.rules;
|
||||
|
||||
const found = findAllowlistFile(rootKey);
|
||||
let rules: CompiledRule[] | null;
|
||||
if (!found) {
|
||||
rules = null;
|
||||
} else {
|
||||
try {
|
||||
const content = readFileSync(found.path, 'utf-8');
|
||||
const parsed: CompiledRule[] = [];
|
||||
for (const rawLine of content.split('\n')) {
|
||||
const line = rawLine.trim();
|
||||
if (!line || line.startsWith('#')) continue;
|
||||
const negate = line.startsWith('!');
|
||||
const pattern = negate ? line.slice(1).trim() : line;
|
||||
if (!pattern) continue;
|
||||
try {
|
||||
parsed.push({ pattern, regex: globToRegex(pattern), negate });
|
||||
} catch (e: unknown) {
|
||||
// Malformed glob — log + skip this rule, lenient EC-2 fallback.
|
||||
console.warn(
|
||||
`[gbrain allowlist] Skipping malformed pattern "${pattern}" in ${found.path}: ` +
|
||||
(e instanceof Error ? e.message : String(e)),
|
||||
);
|
||||
}
|
||||
}
|
||||
rules = parsed;
|
||||
} catch (e: unknown) {
|
||||
console.warn(
|
||||
`[gbrain allowlist] Failed to read ${found.path}: ` +
|
||||
(e instanceof Error ? e.message : String(e)) +
|
||||
'. Falling back to lenient (allow all).',
|
||||
);
|
||||
rules = null;
|
||||
}
|
||||
}
|
||||
cache.set(rootKey, { rules, expiresAt: now + CACHE_TTL_MS });
|
||||
return rules;
|
||||
}
|
||||
|
||||
export interface AllowlistDecision {
|
||||
allowed: boolean;
|
||||
reason: 'no-allowlist' | 'matched' | 'excluded' | 'no-match';
|
||||
matchedPattern?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether `relativePath` (relative to a repo root containing
|
||||
* `.gbrain-allowlist`) is allowed for ingestion.
|
||||
*
|
||||
* Lenient default: when no allowlist file is present at `repoRoot` or any
|
||||
* ancestor (up to 50 levels), allow all.
|
||||
*
|
||||
* Strict mode: when an allowlist file IS present, allow only paths matched
|
||||
* by an allow rule. Negated rules (`!pattern`) override matching allow rules
|
||||
* iff they appear LATER in the file (rsync semantics).
|
||||
*/
|
||||
export function checkAllowlist(repoRoot: string, relativePath: string): AllowlistDecision {
|
||||
const rules = loadAllowlistRules(repoRoot);
|
||||
if (rules === null) {
|
||||
return { allowed: true, reason: 'no-allowlist' };
|
||||
}
|
||||
// Last-match-wins. Default deny when allowlist is present but no rule fires.
|
||||
let decision: AllowlistDecision = { allowed: false, reason: 'no-match' };
|
||||
for (const r of rules) {
|
||||
if (r.regex.test(relativePath)) {
|
||||
if (r.negate) {
|
||||
decision = { allowed: false, reason: 'excluded', matchedPattern: '!' + r.pattern };
|
||||
} else {
|
||||
decision = { allowed: true, reason: 'matched', matchedPattern: r.pattern };
|
||||
}
|
||||
}
|
||||
}
|
||||
return decision;
|
||||
}
|
||||
|
||||
/** Test-only exports. */
|
||||
export const __testing = {
|
||||
globToRegex,
|
||||
loadAllowlistRules,
|
||||
findAllowlistFile,
|
||||
};
|
||||
@@ -130,9 +130,13 @@ export interface BrainEngine {
|
||||
getEmbeddingsByChunkIds(ids: number[]): Promise<Map<number, Float32Array>>;
|
||||
|
||||
// Chunks
|
||||
upsertChunks(slug: string, chunks: ChunkInput[]): Promise<void>;
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page_id lookup to a
|
||||
// specific source so multi-source same-slug data doesn't return ambiguous
|
||||
// matches. When omitted, falls back to slug-only (legacy single-source
|
||||
// semantics, breaks under multi-source same-slug).
|
||||
upsertChunks(slug: string, chunks: ChunkInput[], sourceId?: string): Promise<void>;
|
||||
getChunks(slug: string): Promise<Chunk[]>;
|
||||
deleteChunks(slug: string): Promise<void>;
|
||||
deleteChunks(slug: string, sourceId?: string): Promise<void>;
|
||||
|
||||
// Links
|
||||
/**
|
||||
@@ -213,9 +217,10 @@ export interface BrainEngine {
|
||||
findOrphanPages(): Promise<Array<{ slug: string; title: string; domain: string | null }>>;
|
||||
|
||||
// Tags
|
||||
addTag(slug: string, tag: string): Promise<void>;
|
||||
removeTag(slug: string, tag: string): Promise<void>;
|
||||
getTags(slug: string): Promise<string[]>;
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page_id lookup.
|
||||
addTag(slug: string, tag: string, sourceId?: string): Promise<void>;
|
||||
removeTag(slug: string, tag: string, sourceId?: string): Promise<void>;
|
||||
getTags(slug: string, sourceId?: string): Promise<string[]>;
|
||||
|
||||
// Timeline
|
||||
/**
|
||||
@@ -243,7 +248,9 @@ export interface BrainEngine {
|
||||
getRawData(slug: string, source?: string): Promise<RawData[]>;
|
||||
|
||||
// Versions
|
||||
createVersion(slug: string): Promise<PageVersion>;
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page lookup so multi-
|
||||
// source same-slug doesn't snapshot the wrong page.
|
||||
createVersion(slug: string, sourceId?: string): Promise<PageVersion>;
|
||||
getVersions(slug: string): Promise<PageVersion[]>;
|
||||
revertToVersion(slug: string, versionId: number): Promise<void>;
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ export async function importFromContent(
|
||||
engine: BrainEngine,
|
||||
slug: string,
|
||||
content: string,
|
||||
opts: { noEmbed?: boolean } = {},
|
||||
opts: { noEmbed?: boolean; sourceId?: string } = {},
|
||||
): Promise<ImportResult> {
|
||||
// Reject oversized payloads before any parsing, chunking, or embedding happens.
|
||||
// Uses Buffer.byteLength to count UTF-8 bytes the same way disk size would,
|
||||
@@ -91,7 +91,19 @@ export async function importFromContent(
|
||||
tags: parsed.tags,
|
||||
};
|
||||
|
||||
const existing = await engine.getPage(slug);
|
||||
// v0.18.0 Step 5: idempotency check must be source-scoped. engine.getPage
|
||||
// matches by slug only, which collides with the composite UNIQUE
|
||||
// (source_id, slug) when the same slug exists across sources — without
|
||||
// this scope, a legitimate cross-source write of identical content gets
|
||||
// skipped because a different source's row hashed the same.
|
||||
const existing = opts.sourceId
|
||||
? (
|
||||
await engine.executeRaw<{ content_hash: string | null }>(
|
||||
`SELECT content_hash FROM pages WHERE source_id = $1 AND slug = $2`,
|
||||
[opts.sourceId, slug],
|
||||
)
|
||||
)[0] ?? null
|
||||
: await engine.getPage(slug);
|
||||
if (existing?.content_hash === hash) {
|
||||
return { slug, status: 'skipped', chunks: 0, parsedPage };
|
||||
}
|
||||
@@ -123,8 +135,12 @@ export async function importFromContent(
|
||||
}
|
||||
|
||||
// Transaction wraps all DB writes
|
||||
// v0.18.0 Step 5: thread opts.sourceId through all slug-keyed engine
|
||||
// methods so multi-source same-slug data doesn't trigger ambiguous
|
||||
// lookups (subquery uniqueness violations) on the slug→page_id path.
|
||||
const sid = opts.sourceId;
|
||||
await engine.transaction(async (tx) => {
|
||||
if (existing) await tx.createVersion(slug);
|
||||
if (existing) await tx.createVersion(slug, sid);
|
||||
|
||||
await tx.putPage(slug, {
|
||||
type: parsed.type,
|
||||
@@ -133,23 +149,24 @@ export async function importFromContent(
|
||||
timeline: parsed.timeline || '',
|
||||
frontmatter: parsed.frontmatter,
|
||||
content_hash: hash,
|
||||
source_id: sid,
|
||||
});
|
||||
|
||||
// Tag reconciliation: remove stale, add current
|
||||
const existingTags = await tx.getTags(slug);
|
||||
const existingTags = await tx.getTags(slug, sid);
|
||||
const newTags = new Set(parsed.tags);
|
||||
for (const old of existingTags) {
|
||||
if (!newTags.has(old)) await tx.removeTag(slug, old);
|
||||
if (!newTags.has(old)) await tx.removeTag(slug, old, sid);
|
||||
}
|
||||
for (const tag of parsed.tags) {
|
||||
await tx.addTag(slug, tag);
|
||||
await tx.addTag(slug, tag, sid);
|
||||
}
|
||||
|
||||
if (chunks.length > 0) {
|
||||
await tx.upsertChunks(slug, chunks);
|
||||
await tx.upsertChunks(slug, chunks, sid);
|
||||
} else {
|
||||
// Content is empty — delete stale chunks so they don't ghost in search results
|
||||
await tx.deleteChunks(slug);
|
||||
await tx.deleteChunks(slug, sid);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -170,7 +187,7 @@ export async function importFromFile(
|
||||
engine: BrainEngine,
|
||||
filePath: string,
|
||||
relativePath: string,
|
||||
opts: { noEmbed?: boolean } = {},
|
||||
opts: { noEmbed?: boolean; sourceId?: string } = {},
|
||||
): Promise<ImportResult> {
|
||||
// Defense-in-depth: reject symlinks before reading content.
|
||||
const lstat = lstatSync(filePath);
|
||||
|
||||
@@ -812,6 +812,150 @@ export const MIGRATIONS: Migration[] = [
|
||||
END $$;
|
||||
`,
|
||||
},
|
||||
{
|
||||
// v0.18.2.fork.1 — Step 5 follow-up. The v0.18.0 schema-embedded.ts
|
||||
// comment at the ingest_log block reads:
|
||||
//
|
||||
// "ingest_log.source_id is NOT added yet — lands in v17 alongside
|
||||
// the sync rewrite (Step 5)"
|
||||
//
|
||||
// Upstream's v17 (= MIGRATIONS version 17 here, name=
|
||||
// pages_source_id_composite_unique) only addressed pages.source_id.
|
||||
// The ingest_log half was deferred without ever shipping. This fork
|
||||
// closes the gap so per-source ingest history isn't permanently
|
||||
// blind: dashboards / dream-cron / audits that JOIN ingest_log to
|
||||
// sources can now scope cleanly without inferring source from
|
||||
// pages_updated payload contents.
|
||||
//
|
||||
// Backward-compat: column is NOT NULL DEFAULT 'default' so legacy
|
||||
// rows that never had a source_id get the same value the schema
|
||||
// assumes for pages.source_id (matching v0.18.0's seed).
|
||||
version: 25,
|
||||
name: 'ingest_log_source_id',
|
||||
sql: `
|
||||
ALTER TABLE ingest_log
|
||||
ADD COLUMN IF NOT EXISTS source_id TEXT NOT NULL DEFAULT 'default'
|
||||
REFERENCES sources(id) ON DELETE CASCADE;
|
||||
CREATE INDEX IF NOT EXISTS idx_ingest_log_source_id ON ingest_log(source_id);
|
||||
`,
|
||||
},
|
||||
{
|
||||
// v0.18.2.fork.1 — Patch #4 (Gap 0 D4 + D9): source taxonomy rewrite.
|
||||
// Renames gstack-brain (overly-broad slug-prefix `projects/, builder-journey`)
|
||||
// → gstack-meta (narrow slug-prefix `retros/, analytics/`) via the
|
||||
// create + migrate pages + drop pattern (id is immutable, can't UPDATE).
|
||||
// Also installs longer per-project slug-prefix rules so cross-prefix
|
||||
// routing dispatches `projects/triton6564685-stock-dashboard/...` to
|
||||
// stock-dashboard rather than the legacy `projects/` catch-all.
|
||||
//
|
||||
// Idempotent design (ON CONFLICT, IF EXISTS, UPDATE-where-source-matches):
|
||||
// running twice is a no-op. Composite UNIQUE (source_id, slug) on pages
|
||||
// prevents page duplication even if the migration is interrupted and
|
||||
// retried. ROLLBACK on any error within initSchema's transaction wrapper.
|
||||
//
|
||||
// Resulting taxonomy (per design plan § "Source taxonomy after v26"):
|
||||
// default — kept; ambiguous fallback only
|
||||
// default-ambiguous — created if absent; tombstone for slug-no-match writes
|
||||
// gstack-meta — created with rules [retros/, analytics/]
|
||||
// memory-dashboard — adds rule projects/triton6564685-memory-dashboard/
|
||||
// stock-dashboard — adds rule projects/triton6564685-stock-dashboard/
|
||||
// personal-knowledge — unchanged
|
||||
// subagent-writes — unchanged
|
||||
// claude-config — unchanged
|
||||
// gstack-brain — DELETED if no remaining pages reference it
|
||||
version: 26,
|
||||
name: 'source_taxonomy_rewrite',
|
||||
sql: `
|
||||
-- Step 0: heal pre-existing data corruption from sources.ts:211.
|
||||
-- gbrain CLI's runAdd writes config via $4::jsonb on a JSON.stringify()'d
|
||||
-- value through postgres-js unsafe(), which double-encodes the payload —
|
||||
-- the cast lands as a JSON STRING scalar instead of a JSON object.
|
||||
-- Verified on prod LXC 107 (2026-05-07): 6 of 7 sources had jsonb_typeof
|
||||
-- = 'string' (only the migration-inlined 'default' source was a true
|
||||
-- object). jsonb_set() in subsequent steps fails on scalars with
|
||||
-- "cannot set path in scalar" (SQLSTATE 22023). Unwrap is byte-equivalent
|
||||
-- (parses the JSON string back to its underlying object form) and
|
||||
-- idempotent on already-object configs (filtered by jsonb_typeof).
|
||||
UPDATE sources
|
||||
SET config = (config #>> '{}')::jsonb
|
||||
WHERE jsonb_typeof(config) = 'string';
|
||||
|
||||
-- Step 1: ensure tombstone source exists for slug-no-match fallbacks.
|
||||
INSERT INTO sources (id, name, config)
|
||||
VALUES (
|
||||
'default-ambiguous',
|
||||
'default-ambiguous',
|
||||
'{"federated": false, "description": "tombstone for pages whose slug does not match any source slug-prefix rule"}'::jsonb
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Step 2: create gstack-meta with the narrowed slug-prefix rules.
|
||||
INSERT INTO sources (id, name, config)
|
||||
VALUES (
|
||||
'gstack-meta',
|
||||
'gstack-meta',
|
||||
'{"federated": true, "slug_prefix_rules": ["retros/", "analytics/"]}'::jsonb
|
||||
)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
config = jsonb_set(
|
||||
COALESCE(sources.config, '{}'::jsonb),
|
||||
'{slug_prefix_rules}',
|
||||
'["retros/", "analytics/"]'::jsonb,
|
||||
true
|
||||
);
|
||||
|
||||
-- Step 3: install longer per-project slug-prefix rules. Existing per-project
|
||||
-- sources are common but not guaranteed; UPDATE-where-exists is idempotent
|
||||
-- and skips brains that never created them.
|
||||
UPDATE sources
|
||||
SET config = jsonb_set(
|
||||
COALESCE(config, '{}'::jsonb),
|
||||
'{slug_prefix_rules}',
|
||||
'["memory-dashboard/", "projects/triton6564685-memory-dashboard/"]'::jsonb,
|
||||
true
|
||||
)
|
||||
WHERE id = 'memory-dashboard';
|
||||
|
||||
UPDATE sources
|
||||
SET config = jsonb_set(
|
||||
COALESCE(config, '{}'::jsonb),
|
||||
'{slug_prefix_rules}',
|
||||
'["stock-dashboard/", "projects/triton6564685-stock-dashboard/"]'::jsonb,
|
||||
true
|
||||
)
|
||||
WHERE id = 'stock-dashboard';
|
||||
|
||||
-- Step 4: reclassify pages currently on gstack-brain. Order matters:
|
||||
-- per-project longer prefixes first, then gstack-meta, then tombstone.
|
||||
-- Each UPDATE is guarded by source_id = 'gstack-brain' so re-running
|
||||
-- after a successful migration is a no-op (no rows match).
|
||||
UPDATE pages
|
||||
SET source_id = 'stock-dashboard'
|
||||
WHERE source_id = 'gstack-brain'
|
||||
AND (slug LIKE 'stock-dashboard/%' OR slug LIKE 'projects/triton6564685-stock-dashboard/%');
|
||||
|
||||
UPDATE pages
|
||||
SET source_id = 'memory-dashboard'
|
||||
WHERE source_id = 'gstack-brain'
|
||||
AND (slug LIKE 'memory-dashboard/%' OR slug LIKE 'projects/triton6564685-memory-dashboard/%');
|
||||
|
||||
UPDATE pages
|
||||
SET source_id = 'gstack-meta'
|
||||
WHERE source_id = 'gstack-brain'
|
||||
AND (slug LIKE 'retros/%' OR slug LIKE 'analytics/%');
|
||||
|
||||
UPDATE pages
|
||||
SET source_id = 'default-ambiguous'
|
||||
WHERE source_id = 'gstack-brain';
|
||||
|
||||
-- Step 5: drop gstack-brain if empty. Conditional DELETE — if any page
|
||||
-- still references it (shouldn't happen after Step 4, but defensive),
|
||||
-- the FK constraint blocks cascade and the migration aborts cleanly.
|
||||
DELETE FROM sources
|
||||
WHERE id = 'gstack-brain'
|
||||
AND NOT EXISTS (SELECT 1 FROM pages WHERE source_id = 'gstack-brain');
|
||||
`,
|
||||
},
|
||||
];
|
||||
|
||||
export const LATEST_VERSION = MIGRATIONS.length > 0
|
||||
|
||||
@@ -10,6 +10,7 @@ import { clampSearchLimit } from './engine.ts';
|
||||
import type { GBrainConfig } from './config.ts';
|
||||
import type { PageType } from './types.ts';
|
||||
import { importFromContent } from './import-file.ts';
|
||||
import { checkAllowlist } from './allowlist-resolver.ts';
|
||||
import { hybridSearch } from './search/hybrid.ts';
|
||||
import { expandQuery } from './search/expansion.ts';
|
||||
import { dedupResults } from './search/dedup.ts';
|
||||
@@ -242,10 +243,11 @@ const get_page: Operation = {
|
||||
|
||||
const put_page: Operation = {
|
||||
name: 'put_page',
|
||||
description: 'Write/update a page (markdown with frontmatter). Chunks, embeds, reconciles tags, and (when auto_link/auto_timeline are enabled) extracts + reconciles graph links and timeline entries.',
|
||||
description: 'Write/update a page (markdown with frontmatter). Chunks, embeds, reconciles tags, and (when auto_link/auto_timeline are enabled) extracts + reconciles graph links and timeline entries. Multi-source: pass `source_id` to write to a specific source; otherwise resolves via env/dotfile/cwd-prefix/brain-default chain.',
|
||||
params: {
|
||||
slug: { type: 'string', required: true, description: 'Page slug' },
|
||||
content: { type: 'string', required: true, description: 'Full markdown content with YAML frontmatter' },
|
||||
source_id: { type: 'string', required: false, description: 'Optional target source. When omitted, resolves via the standard chain (env > dotfile > cwd-prefix > brain-default).' },
|
||||
},
|
||||
mutating: true,
|
||||
handler: async (ctx, p) => {
|
||||
@@ -271,12 +273,52 @@ const put_page: Operation = {
|
||||
}
|
||||
|
||||
if (ctx.dryRun) return { dry_run: true, action: 'put_page', slug: p.slug };
|
||||
|
||||
// v0.18.0 Step 5 + v0.18.2.fork.1 manifest: resolve target source.
|
||||
// Explicit param wins; otherwise walk the chain (env > dotfile >
|
||||
// cwd-prefix > manifest slug-prefix > brain-default > 'default'). Passing
|
||||
// `slug` activates priority 5 (manifest) so a Claude.ai put_page with no
|
||||
// source_id param routes via slug content (e.g. `memory-dashboard/foo` →
|
||||
// memory-dashboard source if that source declares `--slug-prefix`
|
||||
// 'memory-dashboard/'). resolveSourceId throws if explicit/env/dotfile
|
||||
// point to a non-existent source — fail-fast rather than writing wrong row.
|
||||
const { resolveSourceId } = await import('./source-resolver.ts');
|
||||
const sourceId = await resolveSourceId(
|
||||
ctx.engine,
|
||||
(p.source_id as string | undefined) ?? null,
|
||||
undefined,
|
||||
slug,
|
||||
);
|
||||
|
||||
// Patch #2 (Gap 4): allowlist gate for MCP writes. When the resolved
|
||||
// source has a `local_path`, treat that as the repo root and consult
|
||||
// its `.gbrain-allowlist`. Slugs lack file extensions so we synthesize
|
||||
// `<slug>.md` for matching — this is the same shape allowlists use
|
||||
// when filtering on-disk imports. No local_path → no enforcement
|
||||
// surface; lenient by definition (matches existing behavior).
|
||||
const sourceRows = await ctx.engine.executeRaw<{ local_path: string | null }>(
|
||||
`SELECT local_path FROM sources WHERE id = $1`,
|
||||
[sourceId],
|
||||
);
|
||||
const localPath = sourceRows[0]?.local_path ?? null;
|
||||
if (localPath) {
|
||||
const candidatePath = `${slug}.md`;
|
||||
const decision = checkAllowlist(localPath, candidatePath);
|
||||
if (!decision.allowed) {
|
||||
throw new OperationError(
|
||||
'permission_denied',
|
||||
`Slug "${slug}" not allowed by source "${sourceId}" allowlist (.gbrain-allowlist at ${localPath}). ` +
|
||||
`Add a matching rule or write under a permitted prefix.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Skip embedding when no OpenAI key is configured. importFromContent's existing
|
||||
// try/catch around embed only catches; without a key the OpenAI client would
|
||||
// attempt 5 retries with exponential backoff (up to ~2 minutes total) before
|
||||
// giving up. Detect early.
|
||||
const noEmbed = !process.env.OPENAI_API_KEY;
|
||||
const result = await importFromContent(ctx.engine, slug, p.content as string, { noEmbed });
|
||||
const result = await importFromContent(ctx.engine, slug, p.content as string, { noEmbed, sourceId });
|
||||
|
||||
// Auto-link post-hook: runs AFTER importFromContent (which is its own
|
||||
// transaction). Runs even on status='skipped' so reconciliation catches drift
|
||||
|
||||
@@ -129,25 +129,40 @@ export class PGLiteEngine implements BrainEngine {
|
||||
const hash = page.content_hash || contentHash(page);
|
||||
const frontmatter = page.frontmatter || {};
|
||||
|
||||
// v0.18.0 Step 2: source_id relies on the schema DEFAULT 'default' so
|
||||
// existing callers still target the default source without threading
|
||||
// a parameter. ON CONFLICT target becomes (source_id, slug) since the
|
||||
// global UNIQUE(slug) was dropped in migration v17. Step 5+ will
|
||||
// surface an explicit sourceId param on putPage for multi-source sync.
|
||||
const { rows } = await this.db.query(
|
||||
`INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, content_hash, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7, now())
|
||||
ON CONFLICT (source_id, slug) DO UPDATE SET
|
||||
type = EXCLUDED.type,
|
||||
title = EXCLUDED.title,
|
||||
compiled_truth = EXCLUDED.compiled_truth,
|
||||
timeline = EXCLUDED.timeline,
|
||||
frontmatter = EXCLUDED.frontmatter,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
updated_at = now()
|
||||
RETURNING id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at`,
|
||||
[slug, page.type, page.title, page.compiled_truth, page.timeline || '', JSON.stringify(frontmatter), hash]
|
||||
);
|
||||
// v0.18.0 Step 5: when source_id is provided, write to that source.
|
||||
// When omitted, falls back to schema DEFAULT 'default' (pre-Step-5
|
||||
// single-source behaviour). ON CONFLICT target is (source_id, slug)
|
||||
// since global UNIQUE(slug) was dropped in migration v17.
|
||||
const sourceId = page.source_id;
|
||||
const { rows } = sourceId
|
||||
? await this.db.query(
|
||||
`INSERT INTO pages (source_id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8, now())
|
||||
ON CONFLICT (source_id, slug) DO UPDATE SET
|
||||
type = EXCLUDED.type,
|
||||
title = EXCLUDED.title,
|
||||
compiled_truth = EXCLUDED.compiled_truth,
|
||||
timeline = EXCLUDED.timeline,
|
||||
frontmatter = EXCLUDED.frontmatter,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
updated_at = now()
|
||||
RETURNING id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at`,
|
||||
[sourceId, slug, page.type, page.title, page.compiled_truth, page.timeline || '', JSON.stringify(frontmatter), hash]
|
||||
)
|
||||
: await this.db.query(
|
||||
`INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, content_hash, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7, now())
|
||||
ON CONFLICT (source_id, slug) DO UPDATE SET
|
||||
type = EXCLUDED.type,
|
||||
title = EXCLUDED.title,
|
||||
compiled_truth = EXCLUDED.compiled_truth,
|
||||
timeline = EXCLUDED.timeline,
|
||||
frontmatter = EXCLUDED.frontmatter,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
updated_at = now()
|
||||
RETURNING id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at`,
|
||||
[slug, page.type, page.title, page.compiled_truth, page.timeline || '', JSON.stringify(frontmatter), hash]
|
||||
);
|
||||
return rowToPage(rows[0] as Record<string, unknown>);
|
||||
}
|
||||
|
||||
@@ -290,9 +305,11 @@ export class PGLiteEngine implements BrainEngine {
|
||||
}
|
||||
|
||||
// Chunks
|
||||
async upsertChunks(slug: string, chunks: ChunkInput[]): Promise<void> {
|
||||
// Get page_id
|
||||
const pageResult = await this.db.query('SELECT id FROM pages WHERE slug = $1', [slug]);
|
||||
async upsertChunks(slug: string, chunks: ChunkInput[], sourceId?: string): Promise<void> {
|
||||
// Get page_id (v0.18.0 Step 5: source-scoped lookup when sourceId provided)
|
||||
const pageResult = sourceId
|
||||
? await this.db.query('SELECT id FROM pages WHERE slug = $1 AND source_id = $2', [slug, sourceId])
|
||||
: await this.db.query('SELECT id FROM pages WHERE slug = $1', [slug]);
|
||||
if (pageResult.rows.length === 0) throw new Error(`Page not found: ${slug}`);
|
||||
const pageId = (pageResult.rows[0] as { id: number }).id;
|
||||
|
||||
@@ -353,12 +370,20 @@ export class PGLiteEngine implements BrainEngine {
|
||||
return (rows as Record<string, unknown>[]).map(r => rowToChunk(r));
|
||||
}
|
||||
|
||||
async deleteChunks(slug: string): Promise<void> {
|
||||
await this.db.query(
|
||||
`DELETE FROM content_chunks
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1)`,
|
||||
[slug]
|
||||
);
|
||||
async deleteChunks(slug: string, sourceId?: string): Promise<void> {
|
||||
if (sourceId) {
|
||||
await this.db.query(
|
||||
`DELETE FROM content_chunks
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1 AND source_id = $2)`,
|
||||
[slug, sourceId]
|
||||
);
|
||||
} else {
|
||||
await this.db.query(
|
||||
`DELETE FROM content_chunks
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1)`,
|
||||
[slug]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Links
|
||||
@@ -698,31 +723,58 @@ export class PGLiteEngine implements BrainEngine {
|
||||
}
|
||||
|
||||
// Tags
|
||||
async addTag(slug: string, tag: string): Promise<void> {
|
||||
await this.db.query(
|
||||
`INSERT INTO tags (page_id, tag)
|
||||
SELECT id, $2 FROM pages WHERE slug = $1
|
||||
ON CONFLICT (page_id, tag) DO NOTHING`,
|
||||
[slug, tag]
|
||||
);
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page_id lookup so
|
||||
// multi-source same-slug data doesn't trigger subquery uniqueness errors.
|
||||
async addTag(slug: string, tag: string, sourceId?: string): Promise<void> {
|
||||
if (sourceId) {
|
||||
await this.db.query(
|
||||
`INSERT INTO tags (page_id, tag)
|
||||
SELECT id, $2 FROM pages WHERE slug = $1 AND source_id = $3
|
||||
ON CONFLICT (page_id, tag) DO NOTHING`,
|
||||
[slug, tag, sourceId]
|
||||
);
|
||||
} else {
|
||||
await this.db.query(
|
||||
`INSERT INTO tags (page_id, tag)
|
||||
SELECT id, $2 FROM pages WHERE slug = $1
|
||||
ON CONFLICT (page_id, tag) DO NOTHING`,
|
||||
[slug, tag]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async removeTag(slug: string, tag: string): Promise<void> {
|
||||
await this.db.query(
|
||||
`DELETE FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1)
|
||||
AND tag = $2`,
|
||||
[slug, tag]
|
||||
);
|
||||
async removeTag(slug: string, tag: string, sourceId?: string): Promise<void> {
|
||||
if (sourceId) {
|
||||
await this.db.query(
|
||||
`DELETE FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1 AND source_id = $3)
|
||||
AND tag = $2`,
|
||||
[slug, tag, sourceId]
|
||||
);
|
||||
} else {
|
||||
await this.db.query(
|
||||
`DELETE FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1)
|
||||
AND tag = $2`,
|
||||
[slug, tag]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async getTags(slug: string): Promise<string[]> {
|
||||
const { rows } = await this.db.query(
|
||||
`SELECT tag FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1)
|
||||
ORDER BY tag`,
|
||||
[slug]
|
||||
);
|
||||
async getTags(slug: string, sourceId?: string): Promise<string[]> {
|
||||
const { rows } = sourceId
|
||||
? await this.db.query(
|
||||
`SELECT tag FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1 AND source_id = $2)
|
||||
ORDER BY tag`,
|
||||
[slug, sourceId]
|
||||
)
|
||||
: await this.db.query(
|
||||
`SELECT tag FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = $1)
|
||||
ORDER BY tag`,
|
||||
[slug]
|
||||
);
|
||||
return (rows as { tag: string }[]).map(r => r.tag);
|
||||
}
|
||||
|
||||
@@ -837,14 +889,23 @@ export class PGLiteEngine implements BrainEngine {
|
||||
}
|
||||
|
||||
// Versions
|
||||
async createVersion(slug: string): Promise<PageVersion> {
|
||||
const { rows } = await this.db.query(
|
||||
`INSERT INTO page_versions (page_id, compiled_truth, frontmatter)
|
||||
SELECT id, compiled_truth, frontmatter
|
||||
FROM pages WHERE slug = $1
|
||||
RETURNING *`,
|
||||
[slug]
|
||||
);
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page lookup.
|
||||
async createVersion(slug: string, sourceId?: string): Promise<PageVersion> {
|
||||
const { rows } = sourceId
|
||||
? await this.db.query(
|
||||
`INSERT INTO page_versions (page_id, compiled_truth, frontmatter)
|
||||
SELECT id, compiled_truth, frontmatter
|
||||
FROM pages WHERE slug = $1 AND source_id = $2
|
||||
RETURNING *`,
|
||||
[slug, sourceId]
|
||||
)
|
||||
: await this.db.query(
|
||||
`INSERT INTO page_versions (page_id, compiled_truth, frontmatter)
|
||||
SELECT id, compiled_truth, frontmatter
|
||||
FROM pages WHERE slug = $1
|
||||
RETURNING *`,
|
||||
[slug]
|
||||
);
|
||||
return rows[0] as unknown as PageVersion;
|
||||
}
|
||||
|
||||
@@ -994,11 +1055,20 @@ export class PGLiteEngine implements BrainEngine {
|
||||
|
||||
// Ingest log
|
||||
async logIngest(entry: IngestLogInput): Promise<void> {
|
||||
await this.db.query(
|
||||
`INSERT INTO ingest_log (source_type, source_ref, pages_updated, summary)
|
||||
VALUES ($1, $2, $3::jsonb, $4)`,
|
||||
[entry.source_type, entry.source_ref, JSON.stringify(entry.pages_updated), entry.summary]
|
||||
);
|
||||
// v0.18.2.fork.1: source_id explicit when provided, schema DEFAULT 'default' otherwise.
|
||||
if (entry.source_id) {
|
||||
await this.db.query(
|
||||
`INSERT INTO ingest_log (source_id, source_type, source_ref, pages_updated, summary)
|
||||
VALUES ($1, $2, $3, $4::jsonb, $5)`,
|
||||
[entry.source_id, entry.source_type, entry.source_ref, JSON.stringify(entry.pages_updated), entry.summary]
|
||||
);
|
||||
} else {
|
||||
await this.db.query(
|
||||
`INSERT INTO ingest_log (source_type, source_ref, pages_updated, summary)
|
||||
VALUES ($1, $2, $3::jsonb, $4)`,
|
||||
[entry.source_type, entry.source_ref, JSON.stringify(entry.pages_updated), entry.summary]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async getIngestLog(opts?: { limit?: number }): Promise<IngestLogEntry[]> {
|
||||
|
||||
@@ -134,23 +134,38 @@ export class PostgresEngine implements BrainEngine {
|
||||
const hash = page.content_hash || contentHash(page);
|
||||
const frontmatter = page.frontmatter || {};
|
||||
|
||||
// v0.18.0 Step 2: source_id relies on schema DEFAULT 'default'. ON
|
||||
// CONFLICT target becomes (source_id, slug) since global UNIQUE(slug)
|
||||
// was dropped in migration v17. See pglite-engine.ts for matching
|
||||
// notes; multi-source sync (Step 5) will surface an explicit sourceId.
|
||||
const rows = await sql`
|
||||
INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, content_hash, updated_at)
|
||||
VALUES (${slug}, ${page.type}, ${page.title}, ${page.compiled_truth}, ${page.timeline || ''}, ${sql.json(frontmatter as Parameters<typeof sql.json>[0])}, ${hash}, now())
|
||||
ON CONFLICT (source_id, slug) DO UPDATE SET
|
||||
type = EXCLUDED.type,
|
||||
title = EXCLUDED.title,
|
||||
compiled_truth = EXCLUDED.compiled_truth,
|
||||
timeline = EXCLUDED.timeline,
|
||||
frontmatter = EXCLUDED.frontmatter,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
updated_at = now()
|
||||
RETURNING id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at
|
||||
`;
|
||||
// v0.18.0 Step 5: when source_id is provided, write to that source.
|
||||
// When omitted, falls back to schema DEFAULT 'default' (pre-Step-5
|
||||
// single-source behaviour). ON CONFLICT target is (source_id, slug)
|
||||
// since global UNIQUE(slug) was dropped in migration v17.
|
||||
const sourceId = page.source_id;
|
||||
const rows = sourceId
|
||||
? await sql`
|
||||
INSERT INTO pages (source_id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, updated_at)
|
||||
VALUES (${sourceId}, ${slug}, ${page.type}, ${page.title}, ${page.compiled_truth}, ${page.timeline || ''}, ${sql.json(frontmatter as Parameters<typeof sql.json>[0])}, ${hash}, now())
|
||||
ON CONFLICT (source_id, slug) DO UPDATE SET
|
||||
type = EXCLUDED.type,
|
||||
title = EXCLUDED.title,
|
||||
compiled_truth = EXCLUDED.compiled_truth,
|
||||
timeline = EXCLUDED.timeline,
|
||||
frontmatter = EXCLUDED.frontmatter,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
updated_at = now()
|
||||
RETURNING id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at
|
||||
`
|
||||
: await sql`
|
||||
INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, content_hash, updated_at)
|
||||
VALUES (${slug}, ${page.type}, ${page.title}, ${page.compiled_truth}, ${page.timeline || ''}, ${sql.json(frontmatter as Parameters<typeof sql.json>[0])}, ${hash}, now())
|
||||
ON CONFLICT (source_id, slug) DO UPDATE SET
|
||||
type = EXCLUDED.type,
|
||||
title = EXCLUDED.title,
|
||||
compiled_truth = EXCLUDED.compiled_truth,
|
||||
timeline = EXCLUDED.timeline,
|
||||
frontmatter = EXCLUDED.frontmatter,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
updated_at = now()
|
||||
RETURNING id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at
|
||||
`;
|
||||
return rowToPage(rows[0]);
|
||||
}
|
||||
|
||||
@@ -319,11 +334,13 @@ export class PostgresEngine implements BrainEngine {
|
||||
}
|
||||
|
||||
// Chunks
|
||||
async upsertChunks(slug: string, chunks: ChunkInput[]): Promise<void> {
|
||||
async upsertChunks(slug: string, chunks: ChunkInput[], sourceId?: string): Promise<void> {
|
||||
const sql = this.sql;
|
||||
|
||||
// Get page_id
|
||||
const pages = await sql`SELECT id FROM pages WHERE slug = ${slug}`;
|
||||
// Get page_id (v0.18.0 Step 5: source-scoped lookup when sourceId provided)
|
||||
const pages = sourceId
|
||||
? await sql`SELECT id FROM pages WHERE slug = ${slug} AND source_id = ${sourceId}`
|
||||
: await sql`SELECT id FROM pages WHERE slug = ${slug}`;
|
||||
if (pages.length === 0) throw new Error(`Page not found: ${slug}`);
|
||||
const pageId = pages[0].id;
|
||||
|
||||
@@ -382,12 +399,19 @@ export class PostgresEngine implements BrainEngine {
|
||||
return rows.map((r) => rowToChunk(r as Record<string, unknown>));
|
||||
}
|
||||
|
||||
async deleteChunks(slug: string): Promise<void> {
|
||||
async deleteChunks(slug: string, sourceId?: string): Promise<void> {
|
||||
const sql = this.sql;
|
||||
await sql`
|
||||
DELETE FROM content_chunks
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug})
|
||||
`;
|
||||
if (sourceId) {
|
||||
await sql`
|
||||
DELETE FROM content_chunks
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug} AND source_id = ${sourceId})
|
||||
`;
|
||||
} else {
|
||||
await sql`
|
||||
DELETE FROM content_chunks
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug})
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
// Links
|
||||
@@ -743,11 +767,15 @@ export class PostgresEngine implements BrainEngine {
|
||||
}
|
||||
|
||||
// Tags
|
||||
async addTag(slug: string, tag: string): Promise<void> {
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page_id lookup so
|
||||
// multi-source same-slug data doesn't trigger subquery uniqueness errors.
|
||||
async addTag(slug: string, tag: string, sourceId?: string): Promise<void> {
|
||||
const sql = this.sql;
|
||||
// Verify page exists before attempting insert (ON CONFLICT DO NOTHING
|
||||
// swallows the "already tagged" case, but we still need to detect missing pages)
|
||||
const page = await sql`SELECT id FROM pages WHERE slug = ${slug}`;
|
||||
const page = sourceId
|
||||
? await sql`SELECT id FROM pages WHERE slug = ${slug} AND source_id = ${sourceId}`
|
||||
: await sql`SELECT id FROM pages WHERE slug = ${slug}`;
|
||||
if (page.length === 0) throw new Error(`addTag failed: page "${slug}" not found`);
|
||||
await sql`
|
||||
INSERT INTO tags (page_id, tag)
|
||||
@@ -756,22 +784,36 @@ export class PostgresEngine implements BrainEngine {
|
||||
`;
|
||||
}
|
||||
|
||||
async removeTag(slug: string, tag: string): Promise<void> {
|
||||
async removeTag(slug: string, tag: string, sourceId?: string): Promise<void> {
|
||||
const sql = this.sql;
|
||||
await sql`
|
||||
DELETE FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug})
|
||||
AND tag = ${tag}
|
||||
`;
|
||||
if (sourceId) {
|
||||
await sql`
|
||||
DELETE FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug} AND source_id = ${sourceId})
|
||||
AND tag = ${tag}
|
||||
`;
|
||||
} else {
|
||||
await sql`
|
||||
DELETE FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug})
|
||||
AND tag = ${tag}
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
async getTags(slug: string): Promise<string[]> {
|
||||
async getTags(slug: string, sourceId?: string): Promise<string[]> {
|
||||
const sql = this.sql;
|
||||
const rows = await sql`
|
||||
SELECT tag FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug})
|
||||
ORDER BY tag
|
||||
`;
|
||||
const rows = sourceId
|
||||
? await sql`
|
||||
SELECT tag FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug} AND source_id = ${sourceId})
|
||||
ORDER BY tag
|
||||
`
|
||||
: await sql`
|
||||
SELECT tag FROM tags
|
||||
WHERE page_id = (SELECT id FROM pages WHERE slug = ${slug})
|
||||
ORDER BY tag
|
||||
`;
|
||||
return rows.map((r) => r.tag as string);
|
||||
}
|
||||
|
||||
@@ -886,14 +928,22 @@ export class PostgresEngine implements BrainEngine {
|
||||
}
|
||||
|
||||
// Versions
|
||||
async createVersion(slug: string): Promise<PageVersion> {
|
||||
// v0.18.0 Step 5: optional sourceId scopes the slug→page lookup.
|
||||
async createVersion(slug: string, sourceId?: string): Promise<PageVersion> {
|
||||
const sql = this.sql;
|
||||
const rows = await sql`
|
||||
INSERT INTO page_versions (page_id, compiled_truth, frontmatter)
|
||||
SELECT id, compiled_truth, frontmatter
|
||||
FROM pages WHERE slug = ${slug}
|
||||
RETURNING *
|
||||
`;
|
||||
const rows = sourceId
|
||||
? await sql`
|
||||
INSERT INTO page_versions (page_id, compiled_truth, frontmatter)
|
||||
SELECT id, compiled_truth, frontmatter
|
||||
FROM pages WHERE slug = ${slug} AND source_id = ${sourceId}
|
||||
RETURNING *
|
||||
`
|
||||
: await sql`
|
||||
INSERT INTO page_versions (page_id, compiled_truth, frontmatter)
|
||||
SELECT id, compiled_truth, frontmatter
|
||||
FROM pages WHERE slug = ${slug}
|
||||
RETURNING *
|
||||
`;
|
||||
if (rows.length === 0) throw new Error(`createVersion failed: page "${slug}" not found`);
|
||||
return rows[0] as unknown as PageVersion;
|
||||
}
|
||||
@@ -1044,10 +1094,18 @@ export class PostgresEngine implements BrainEngine {
|
||||
// Ingest log
|
||||
async logIngest(entry: IngestLogInput): Promise<void> {
|
||||
const sql = this.sql;
|
||||
await sql`
|
||||
INSERT INTO ingest_log (source_type, source_ref, pages_updated, summary)
|
||||
VALUES (${entry.source_type}, ${entry.source_ref}, ${sql.json(entry.pages_updated)}, ${entry.summary})
|
||||
`;
|
||||
// v0.18.2.fork.1: source_id explicit when provided, schema DEFAULT 'default' otherwise.
|
||||
if (entry.source_id) {
|
||||
await sql`
|
||||
INSERT INTO ingest_log (source_id, source_type, source_ref, pages_updated, summary)
|
||||
VALUES (${entry.source_id}, ${entry.source_type}, ${entry.source_ref}, ${sql.json(entry.pages_updated)}, ${entry.summary})
|
||||
`;
|
||||
} else {
|
||||
await sql`
|
||||
INSERT INTO ingest_log (source_type, source_ref, pages_updated, summary)
|
||||
VALUES (${entry.source_type}, ${entry.source_ref}, ${sql.json(entry.pages_updated)}, ${entry.summary})
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
async getIngestLog(opts?: { limit?: number }): Promise<IngestLogEntry[]> {
|
||||
|
||||
@@ -199,11 +199,23 @@ CREATE INDEX IF NOT EXISTS idx_versions_page ON page_versions(page_id);
|
||||
-- ============================================================
|
||||
-- ingest_log
|
||||
-- ============================================================
|
||||
-- NOTE (v0.18.0 Step 1): ingest_log.source_id is NOT added yet — lands
|
||||
-- in v17 alongside the sync rewrite (Step 5), which starts writing
|
||||
-- source-scoped entries.
|
||||
-- v0.18.2.fork.1 (migration v25): source_id added per fork's Step 5
|
||||
-- closure. Fresh installs get the column inline; existing brains pick
|
||||
-- it up via the v25 ALTER TABLE migration (NOT NULL DEFAULT 'default').
|
||||
-- v0.18.2.fork.1: source_id added to fresh-install schema; migration v25
|
||||
-- handles the ALTER TABLE for existing brains that pre-date this column.
|
||||
-- The idx_ingest_log_source_id index is owned ENTIRELY by migration v25
|
||||
-- (NOT here as a standalone CREATE INDEX), because schema-embedded reruns
|
||||
-- on every gbrain start and an unconditional CREATE INDEX referencing
|
||||
-- source_id would fail on pre-v25 brains where the table existed before
|
||||
-- the column. Migration v25 creates col + index in one transaction; for
|
||||
-- fresh installs the col is in the CREATE TABLE here and v25's
|
||||
-- ADD COLUMN IF NOT EXISTS is a no-op while CREATE INDEX IF NOT EXISTS
|
||||
-- still installs the index.
|
||||
CREATE TABLE IF NOT EXISTS ingest_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_id TEXT NOT NULL DEFAULT 'default'
|
||||
REFERENCES sources(id) ON DELETE CASCADE,
|
||||
source_type TEXT NOT NULL,
|
||||
source_ref TEXT NOT NULL,
|
||||
pages_updated JSONB NOT NULL DEFAULT '[]',
|
||||
|
||||
@@ -61,6 +61,14 @@ export async function resolveSourceId(
|
||||
engine: BrainEngine,
|
||||
explicit: string | null | undefined,
|
||||
cwd: string = process.cwd(),
|
||||
/**
|
||||
* v0.18.2.fork.1 — when provided, priority 5 (manifest slug-prefix match)
|
||||
* fires between cwd-prefix and brain-default. When undefined (CLI commands
|
||||
* without per-page context like plain `gbrain sync`), priority 5 is
|
||||
* skipped entirely. This keeps slug-aware put_page behavior aware of
|
||||
* manifest rules without forcing slug-less callers to invent a value.
|
||||
*/
|
||||
slug?: string,
|
||||
): Promise<string> {
|
||||
// 1. Explicit flag wins.
|
||||
if (explicit) {
|
||||
@@ -106,14 +114,24 @@ export async function resolveSourceId(
|
||||
}
|
||||
if (best) return best.id;
|
||||
|
||||
// 5. Brain-level default.
|
||||
// 5. v0.18.2.fork.1 — manifest slug-prefix match (skipped when caller
|
||||
// didn't pass a slug). Server-derived inference, ranked AFTER user-
|
||||
// explicit signals (1-4) and BEFORE the brain-level default (6) so a
|
||||
// bound dotfile / env / flag still wins, but a Claude.ai put_page with
|
||||
// no source param routes correctly based on slug content.
|
||||
if (slug) {
|
||||
const manifestMatch = await resolveBySlugPrefix(engine, slug);
|
||||
if (manifestMatch) return manifestMatch;
|
||||
}
|
||||
|
||||
// 6. Brain-level default.
|
||||
const globalDefault = await engine.getConfig('sources.default');
|
||||
if (globalDefault && SOURCE_ID_RE.test(globalDefault)) {
|
||||
await assertSourceExists(engine, globalDefault);
|
||||
return globalDefault;
|
||||
}
|
||||
|
||||
// 6. Fallback: the seeded 'default' source. Always exists post-migration
|
||||
// 7. Fallback: the seeded 'default' source. Always exists post-migration
|
||||
// v16 so this is a safe terminal.
|
||||
return 'default';
|
||||
}
|
||||
@@ -132,6 +150,103 @@ async function assertSourceExists(engine: BrainEngine, id: string): Promise<void
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* v0.18.2.fork.1 — manifest convention: longest-prefix slug match.
|
||||
*
|
||||
* Each source row carries `config.slug_prefix_rules: string[]` (jsonb).
|
||||
* Each rule is either a literal prefix ('memory-dashboard/') or a single-
|
||||
* level glob ending in `*` ('projects/*' — which is functionally identical
|
||||
* to literal prefix because slugs use '/' as a regular character, not a
|
||||
* filesystem path separator).
|
||||
*
|
||||
* Resolution returns the source id whose rule literally-prefixes the slug,
|
||||
* choosing the LONGEST literal match (glob `*` excluded from score). Ties
|
||||
* break alphabetically on source id. No match returns null — caller falls
|
||||
* through to the next priority in resolveSourceId's chain.
|
||||
*
|
||||
* Cross-process consistency: each gbrain process (CLI / MCP container / sync
|
||||
* cron) holds its own in-process cache with 60s TTL. After
|
||||
* `gbrain sources update --slug-prefix ...` runs from a CLI, the MCP
|
||||
* container's cache is stale for ≤60s. To force-refresh: restart the
|
||||
* container. Postgres LISTEN/NOTIFY would close the gap but is deferred
|
||||
* (see TODOS.md "gbrain manifest cache cross-process invalidation").
|
||||
*/
|
||||
const SLUG_PREFIX_CACHE_TTL_MS = 60_000;
|
||||
interface SlugPrefixCache {
|
||||
rules: Array<{ id: string; prefixes: string[] }>;
|
||||
expiresAt: number;
|
||||
}
|
||||
let slugPrefixCache: SlugPrefixCache | null = null;
|
||||
|
||||
/** Exposed for tests — clears the cache so timing-sensitive scenarios are deterministic. */
|
||||
export function __invalidateSlugPrefixCache(): void {
|
||||
slugPrefixCache = null;
|
||||
}
|
||||
|
||||
async function loadSlugPrefixRules(
|
||||
engine: BrainEngine,
|
||||
): Promise<Array<{ id: string; prefixes: string[] }>> {
|
||||
const now = Date.now();
|
||||
if (slugPrefixCache && slugPrefixCache.expiresAt > now) {
|
||||
return slugPrefixCache.rules;
|
||||
}
|
||||
const rows = await engine.executeRaw<{ id: string; config: string | Record<string, unknown> }>(
|
||||
`SELECT id, config FROM sources`,
|
||||
);
|
||||
const rules: Array<{ id: string; prefixes: string[] }> = [];
|
||||
for (const r of rows) {
|
||||
let cfg: unknown;
|
||||
try {
|
||||
cfg = typeof r.config === 'string' ? JSON.parse(r.config) : r.config;
|
||||
} catch {
|
||||
continue; // Malformed jsonb — safe-skip per failure-modes table CG.
|
||||
}
|
||||
if (!cfg || typeof cfg !== 'object') continue;
|
||||
const raw = (cfg as Record<string, unknown>).slug_prefix_rules;
|
||||
if (!Array.isArray(raw)) continue;
|
||||
const prefixes: string[] = [];
|
||||
for (const item of raw) {
|
||||
if (typeof item !== 'string') continue;
|
||||
// Strip trailing single-level glob `*` (cosmetic only; literal-prefix
|
||||
// semantics are identical because slug grammar treats '/' as a regular
|
||||
// character, not a path separator).
|
||||
const normalized = item.endsWith('*') ? item.slice(0, -1) : item;
|
||||
if (normalized.length > 0) prefixes.push(normalized);
|
||||
}
|
||||
if (prefixes.length > 0) rules.push({ id: r.id, prefixes });
|
||||
}
|
||||
slugPrefixCache = { rules, expiresAt: now + SLUG_PREFIX_CACHE_TTL_MS };
|
||||
return rules;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the source id whose slug_prefix_rules has the longest literal
|
||||
* prefix matching `slug`. Alphabetical tie-break on source id. Returns
|
||||
* null when no rule matches.
|
||||
*/
|
||||
export async function resolveBySlugPrefix(
|
||||
engine: BrainEngine,
|
||||
slug: string,
|
||||
): Promise<string | null> {
|
||||
const rules = await loadSlugPrefixRules(engine);
|
||||
let best: { id: string; score: number } | null = null;
|
||||
for (const r of rules) {
|
||||
for (const p of r.prefixes) {
|
||||
if (slug.startsWith(p)) {
|
||||
const score = p.length;
|
||||
if (
|
||||
!best ||
|
||||
score > best.score ||
|
||||
(score === best.score && r.id < best.id)
|
||||
) {
|
||||
best = { id: r.id, score };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return best ? best.id : null;
|
||||
}
|
||||
|
||||
/** Exposed for tests. */
|
||||
export const __testing = {
|
||||
readDotfileWalk,
|
||||
|
||||
@@ -21,6 +21,8 @@ export interface PageInput {
|
||||
timeline?: string;
|
||||
frontmatter?: Record<string, unknown>;
|
||||
content_hash?: string;
|
||||
/** v0.18.0 Step 5: target source. When omitted, INSERT uses schema DEFAULT 'default'. */
|
||||
source_id?: string;
|
||||
}
|
||||
|
||||
export interface PageFilters {
|
||||
@@ -244,6 +246,8 @@ export interface IngestLogInput {
|
||||
source_ref: string;
|
||||
pages_updated: string[];
|
||||
summary: string;
|
||||
/** v0.18.2.fork.1 — per-source ingest history. Falls back to schema DEFAULT 'default' when omitted. */
|
||||
source_id?: string;
|
||||
}
|
||||
|
||||
// Config
|
||||
|
||||
232
test/allowlist-resolver.test.ts
Normal file
232
test/allowlist-resolver.test.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — Patch #2 (Gap 4) unit tests for `.gbrain-allowlist`
|
||||
* resolver. Pure FS + glob logic; no engine / DB needed.
|
||||
*
|
||||
* Coverage:
|
||||
* - Lenient default when no allowlist file present (T4 EC-9)
|
||||
* - Strict mode when allowlist file IS present (T4 base)
|
||||
* - Glob semantics: *, **, ? (T4 globs)
|
||||
* - Exclusion rules with `!` prefix, last-match-wins (T4 negation)
|
||||
* - Comments (`#`) and blank lines ignored (T4 parser)
|
||||
* - Malformed glob: log + skip rule, lenient EC-2 fallback (T4 EC-2)
|
||||
* - Cache: 60s TTL, invalidate-on-demand for tests (T4 cache)
|
||||
* - findAllowlistFile walks up to 50 ancestors (T4 walk)
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterAll } from 'bun:test';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import {
|
||||
checkAllowlist,
|
||||
__invalidateAllowlistCache,
|
||||
__testing,
|
||||
} from '../src/core/allowlist-resolver.ts';
|
||||
|
||||
const fixtures: string[] = [];
|
||||
|
||||
function mkFixture(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'gbrain-allowlist-'));
|
||||
fixtures.push(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
__invalidateAllowlistCache();
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
for (const d of fixtures) {
|
||||
try { rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
}
|
||||
});
|
||||
|
||||
describe('Lenient default — no allowlist file', () => {
|
||||
test('any path is allowed when no allowlist exists at root or ancestors', () => {
|
||||
const root = mkFixture();
|
||||
const decision = checkAllowlist(root, 'src/foo.md');
|
||||
expect(decision.allowed).toBe(true);
|
||||
expect(decision.reason).toBe('no-allowlist');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Strict mode — allowlist present', () => {
|
||||
test('allow rule matches → allowed=true with matched pattern', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'docs/*.md\n');
|
||||
const d = checkAllowlist(root, 'docs/intro.md');
|
||||
expect(d.allowed).toBe(true);
|
||||
expect(d.reason).toBe('matched');
|
||||
expect(d.matchedPattern).toBe('docs/*.md');
|
||||
});
|
||||
|
||||
test('no rule matches → allowed=false reason=no-match (default deny)', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'docs/*.md\n');
|
||||
const d = checkAllowlist(root, 'src/foo.md');
|
||||
expect(d.allowed).toBe(false);
|
||||
expect(d.reason).toBe('no-match');
|
||||
});
|
||||
|
||||
test('comments and blank lines ignored', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(
|
||||
join(root, '.gbrain-allowlist'),
|
||||
'# header comment\n\n \ndocs/*.md\n# trailing\n',
|
||||
);
|
||||
expect(checkAllowlist(root, 'docs/x.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'src/x.md').allowed).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Glob semantics', () => {
|
||||
test('* matches single path segment, not slash', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'docs/*.md\n');
|
||||
expect(checkAllowlist(root, 'docs/a.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/sub/a.md').allowed).toBe(false);
|
||||
});
|
||||
|
||||
test('** matches across slashes', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'docs/**/*.md\n');
|
||||
expect(checkAllowlist(root, 'docs/a.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/sub/a.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/sub/deeper/a.md').allowed).toBe(true);
|
||||
});
|
||||
|
||||
test('? matches exactly one non-slash char', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'log-?.md\n');
|
||||
expect(checkAllowlist(root, 'log-1.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'log-12.md').allowed).toBe(false);
|
||||
expect(checkAllowlist(root, 'log-/.md').allowed).toBe(false);
|
||||
});
|
||||
|
||||
test('exact literal pattern matches anchored', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'TODOS.md\n');
|
||||
expect(checkAllowlist(root, 'TODOS.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/TODOS.md').allowed).toBe(false);
|
||||
expect(checkAllowlist(root, 'TODOS.md.bak').allowed).toBe(false);
|
||||
});
|
||||
|
||||
test('multi-segment globs (rsync-style)', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'projects/*/learnings.jsonl\n');
|
||||
expect(checkAllowlist(root, 'projects/foo/learnings.jsonl').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'projects/foo/bar/learnings.jsonl').allowed).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Negation (! prefix) — last-match-wins', () => {
|
||||
test('exclusion appearing after allow rule wins', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(
|
||||
join(root, '.gbrain-allowlist'),
|
||||
'docs/**/*.md\n!docs/secret.md\n',
|
||||
);
|
||||
expect(checkAllowlist(root, 'docs/intro.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/secret.md').allowed).toBe(false);
|
||||
const d = checkAllowlist(root, 'docs/secret.md');
|
||||
expect(d.reason).toBe('excluded');
|
||||
expect(d.matchedPattern).toBe('!docs/secret.md');
|
||||
});
|
||||
|
||||
test('exclusion before re-allow: re-allow wins (last-match)', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(
|
||||
join(root, '.gbrain-allowlist'),
|
||||
'!docs/secret.md\ndocs/secret.md\n',
|
||||
);
|
||||
// Last rule matching the path is the allow rule, so allowed.
|
||||
expect(checkAllowlist(root, 'docs/secret.md').allowed).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('EC-2 — malformed glob', () => {
|
||||
test('skipped with warn; remaining rules still applied', () => {
|
||||
const root = mkFixture();
|
||||
// The escape sequence `\(unterminated` is regex meta the converter escapes safely.
|
||||
// To actually trigger a malformed glob we'd need the regex engine to throw — our
|
||||
// converter is conservative enough that it doesn't. EC-2 verifies the *resilience*
|
||||
// path: a glob that produces a usable regex isn't rejected, and the loader does
|
||||
// not crash on unusual input.
|
||||
writeFileSync(
|
||||
join(root, '.gbrain-allowlist'),
|
||||
'docs/[unbalanced.md\ndocs/clean.md\n',
|
||||
);
|
||||
expect(checkAllowlist(root, 'docs/clean.md').allowed).toBe(true);
|
||||
});
|
||||
|
||||
test('empty pattern after ! is ignored', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), '!\ndocs/clean.md\n');
|
||||
expect(checkAllowlist(root, 'docs/clean.md').allowed).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('findAllowlistFile walk-up', () => {
|
||||
test('finds allowlist in ancestor directory, treating that as root', () => {
|
||||
const root = mkFixture();
|
||||
const sub = join(root, 'a', 'b', 'c');
|
||||
mkdirSync(sub, { recursive: true });
|
||||
writeFileSync(join(root, '.gbrain-allowlist'), 'a/**/*.md\n');
|
||||
// checkAllowlist starts walk at `sub`, finds .gbrain-allowlist at `root`.
|
||||
// The relativePath we pass must be relative to the ANCESTOR root, not sub.
|
||||
const decision = checkAllowlist(sub, 'a/b/c/x.md');
|
||||
expect(decision.allowed).toBe(true);
|
||||
const found = __testing.findAllowlistFile(sub);
|
||||
expect(found).not.toBeNull();
|
||||
expect(found!.root).toBe(root);
|
||||
});
|
||||
|
||||
test('no allowlist anywhere → returns null', () => {
|
||||
const root = mkFixture();
|
||||
const sub = join(root, 'a', 'b');
|
||||
mkdirSync(sub, { recursive: true });
|
||||
expect(__testing.findAllowlistFile(sub)).toBeNull();
|
||||
expect(checkAllowlist(sub, 'anything.md').allowed).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('globToRegex — direct unit tests', () => {
|
||||
test('star matches non-slash any-length', () => {
|
||||
const re = __testing.globToRegex('a/*.md');
|
||||
expect(re.test('a/foo.md')).toBe(true);
|
||||
expect(re.test('a/foo/bar.md')).toBe(false);
|
||||
expect(re.test('a/.md')).toBe(true);
|
||||
});
|
||||
|
||||
test('double-star matches across slashes', () => {
|
||||
const re = __testing.globToRegex('**/*.md');
|
||||
expect(re.test('foo.md')).toBe(true);
|
||||
expect(re.test('a/b/c/foo.md')).toBe(true);
|
||||
});
|
||||
|
||||
test('regex meta in literal pattern is escaped', () => {
|
||||
const re = __testing.globToRegex('a.b+c.md');
|
||||
expect(re.test('a.b+c.md')).toBe(true);
|
||||
expect(re.test('aXbXc.md')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Real-world memory-dashboard allowlist parity', () => {
|
||||
test('TODOS.md / docs/*.md / docs/**/*.md pattern set', () => {
|
||||
const root = mkFixture();
|
||||
writeFileSync(
|
||||
join(root, '.gbrain-allowlist'),
|
||||
[
|
||||
'TODOS.md',
|
||||
'CLAUDE.md',
|
||||
'docs/*.md',
|
||||
'docs/**/*.md',
|
||||
].join('\n') + '\n',
|
||||
);
|
||||
expect(checkAllowlist(root, 'TODOS.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/intro.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'docs/sub/deep.md').allowed).toBe(true);
|
||||
expect(checkAllowlist(root, 'src/middleware.ts').allowed).toBe(false);
|
||||
expect(checkAllowlist(root, 'random.md').allowed).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -126,4 +126,61 @@ describeE2E('E2E: JSONB roundtrip — v0.12.1 reliability wave', () => {
|
||||
expect(source.match(bad)?.[0] ?? null).toBeNull();
|
||||
}
|
||||
});
|
||||
|
||||
// v0.18.2.fork.1: sources.ts triple INSERT/UPDATE missed in v0.12.1 wave.
|
||||
// Different fix variant — the unsafe()-API path uses `$N::jsonb` cast on a
|
||||
// JSON.stringify'd param (not template-tag `${..}::jsonb`). postgres-js's
|
||||
// unsafe() detects the cast and re-stringifies the param, landing as a
|
||||
// JSON STRING scalar (jsonb_typeof = 'string'). v26 migration's jsonb_set
|
||||
// then throws SQLSTATE 22023 "cannot set path in scalar".
|
||||
// Fix: `($N::text)::jsonb` double cast forces postgres-js to send param
|
||||
// verbatim as TEXT, then SQL re-parses to object at column boundary.
|
||||
// Verified empirically on D-LXC fixture 189 (2026-05-07).
|
||||
test('sources INSERT writes config as object, not double-encoded string', async () => {
|
||||
const sql = getConn();
|
||||
const { runAdd } = await import('../../src/commands/sources.ts') as any;
|
||||
const engine = getEngine();
|
||||
const testId = 'jsonb-sources-add-' + Math.floor(Math.random() * 1e6);
|
||||
await runAdd(engine, [testId, '--federated', '--slug-prefix', 'test-prefix/']);
|
||||
const [row] = await sql`
|
||||
SELECT jsonb_typeof(config) AS t,
|
||||
config -> 'federated' AS federated,
|
||||
config -> 'slug_prefix_rules' AS rules
|
||||
FROM sources WHERE id = ${testId}
|
||||
`;
|
||||
expect(row.t).toBe('object');
|
||||
expect(row.federated).toBe(true);
|
||||
expect(row.rules).toEqual(['test-prefix/']);
|
||||
await sql`DELETE FROM sources WHERE id = ${testId}`;
|
||||
});
|
||||
|
||||
test('sources UPDATE (federate/unfederate) preserves config as object', async () => {
|
||||
const sql = getConn();
|
||||
const { runAdd } = await import('../../src/commands/sources.ts') as any;
|
||||
const { runFederate } = await import('../../src/commands/sources.ts') as any;
|
||||
const engine = getEngine();
|
||||
const testId = 'jsonb-sources-update-' + Math.floor(Math.random() * 1e6);
|
||||
await runAdd(engine, [testId, '--federated']);
|
||||
// Toggle to isolated then back — exercises the runFederate UPDATE path.
|
||||
if (runFederate) {
|
||||
await runFederate(engine, [testId], false);
|
||||
const [row] = await sql`
|
||||
SELECT jsonb_typeof(config) AS t, config -> 'federated' AS federated
|
||||
FROM sources WHERE id = ${testId}
|
||||
`;
|
||||
expect(row.t).toBe('object');
|
||||
expect(row.federated).toBe(false);
|
||||
}
|
||||
await sql`DELETE FROM sources WHERE id = ${testId}`;
|
||||
});
|
||||
|
||||
test('no $N::jsonb pattern (without ::text intermediate) remains in sources.ts', async () => {
|
||||
const source = await Bun.file(new URL('../../src/commands/sources.ts', import.meta.url)).text();
|
||||
// Match `$<digit>::jsonb` not preceded by `::text)` — the bad pattern.
|
||||
// Allow `($N::text)::jsonb` (the fix). Strip the safe pattern first then check.
|
||||
const safePattern = /\(\$\d+::text\)::jsonb/g;
|
||||
const stripped = source.replace(safePattern, '<SAFE_DOUBLE_CAST>');
|
||||
const bad = /\$\d+::jsonb/;
|
||||
expect(stripped.match(bad)?.[0] ?? null).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
98
test/ingest-log-source-id.test.ts
Normal file
98
test/ingest-log-source-id.test.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — migration v25 ingest_log.source_id.
|
||||
*
|
||||
* Closes the upstream Step 5 deferral noted at schema-embedded.ts:202-204:
|
||||
*
|
||||
* "ingest_log.source_id is NOT added yet — lands in v17 alongside the
|
||||
* sync rewrite (Step 5)"
|
||||
*
|
||||
* Verifies:
|
||||
* - migration v25 adds the column with NOT NULL DEFAULT 'default'
|
||||
* - existing rows backfill to 'default' (the schema seed exists)
|
||||
* - new rows can be written with explicit source_id
|
||||
* - logIngest signature accepts entry.source_id and threads it through
|
||||
* - omitting source_id falls back to schema DEFAULT 'default'
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('memory-dashboard', 'memory-dashboard', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO NOTHING`,
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
describe('v25 — ingest_log.source_id schema', () => {
|
||||
test('source_id column exists with NOT NULL DEFAULT default', async () => {
|
||||
const rows = await engine.executeRaw<{ column_default: string | null; is_nullable: string }>(
|
||||
`SELECT column_default, is_nullable FROM information_schema.columns
|
||||
WHERE table_name = 'ingest_log' AND column_name = 'source_id'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].is_nullable).toBe('NO');
|
||||
expect(rows[0].column_default).toContain('default');
|
||||
});
|
||||
|
||||
test('idx_ingest_log_source_id index exists', async () => {
|
||||
const rows = await engine.executeRaw<{ indexname: string }>(
|
||||
`SELECT indexname FROM pg_indexes WHERE indexname = 'idx_ingest_log_source_id'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
});
|
||||
|
||||
test('FK to sources(id) is enforced (insert with bogus source rejected)', async () => {
|
||||
let threw = false;
|
||||
try {
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO ingest_log (source_id, source_type, source_ref, summary)
|
||||
VALUES ('does-not-exist', 'directory', '/tmp', '')`,
|
||||
);
|
||||
} catch {
|
||||
threw = true;
|
||||
}
|
||||
expect(threw).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('v25 — logIngest write-through', () => {
|
||||
test('logIngest with source_id writes to that source', async () => {
|
||||
await engine.logIngest({
|
||||
source_type: 'directory',
|
||||
source_ref: '/tmp/md',
|
||||
pages_updated: ['a', 'b'],
|
||||
summary: 'test ingest md',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM ingest_log WHERE source_ref = '/tmp/md'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('logIngest without source_id falls back to schema DEFAULT default', async () => {
|
||||
await engine.logIngest({
|
||||
source_type: 'directory',
|
||||
source_ref: '/tmp/legacy',
|
||||
pages_updated: [],
|
||||
summary: 'legacy single-source caller',
|
||||
});
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM ingest_log WHERE source_ref = '/tmp/legacy'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('default');
|
||||
});
|
||||
});
|
||||
157
test/longest-prefix-match.test.ts
Normal file
157
test/longest-prefix-match.test.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — manifest slug-prefix matching algorithm tests.
|
||||
*
|
||||
* Pure resolver tests (no put_page handler). Verifies:
|
||||
* - longest literal prefix wins over shorter overlapping rules
|
||||
* - tie-break on prefix length goes alphabetical on source id
|
||||
* - trailing single-level glob `*` is treated as literal prefix
|
||||
* (cosmetic only, scored without the `*`)
|
||||
* - empty rules / no rules / no match return null
|
||||
* - cache is hit within 60s TTL, refetched after invalidation
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import {
|
||||
resolveBySlugPrefix,
|
||||
__invalidateSlugPrefixCache,
|
||||
} from '../src/core/source-resolver.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
// Seed sources with overlapping prefix rules so all branch combinations
|
||||
// are exercised.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('alpha-design', 'alpha-design', '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb),
|
||||
('beta-design', 'beta-design', '{"federated": true, "slug_prefix_rules": ["design/memory-dashboard/"]}'::jsonb),
|
||||
('multi-prefix', 'multi-prefix', '{"federated": true, "slug_prefix_rules": ["projects/", "builder-journey"]}'::jsonb),
|
||||
('glob-form', 'glob-form', '{"federated": true, "slug_prefix_rules": ["wedding-planning/*"]}'::jsonb),
|
||||
('no-rules', 'no-rules', '{"federated": true}'::jsonb),
|
||||
('empty-rules', 'empty-rules', '{"federated": true, "slug_prefix_rules": []}'::jsonb),
|
||||
('alpha-tie', 'alpha-tie', '{"federated": true, "slug_prefix_rules": ["tied/"]}'::jsonb),
|
||||
('zeta-tie', 'zeta-tie', '{"federated": true, "slug_prefix_rules": ["tied/"]}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
__invalidateSlugPrefixCache();
|
||||
});
|
||||
|
||||
describe('Longest-prefix selection', () => {
|
||||
test('longer rule wins over shorter overlapping rule', async () => {
|
||||
// 'design/' (alpha-design, score 7) vs 'design/memory-dashboard/' (beta-design, score 24)
|
||||
const result = await resolveBySlugPrefix(engine, 'design/memory-dashboard/notes/x');
|
||||
expect(result).toBe('beta-design');
|
||||
});
|
||||
|
||||
test('shorter prefix wins when longer rule does not match', async () => {
|
||||
// 'design/stock-dashboard/x' matches alpha-design only (beta requires 'design/memory-dashboard/')
|
||||
const result = await resolveBySlugPrefix(engine, 'design/stock-dashboard/x');
|
||||
expect(result).toBe('alpha-design');
|
||||
});
|
||||
|
||||
test('exact-prefix match (no trailing content) still routes', async () => {
|
||||
const result = await resolveBySlugPrefix(engine, 'design/');
|
||||
expect(result).toBe('alpha-design');
|
||||
});
|
||||
|
||||
test('non-prefixed slug returns null (no match)', async () => {
|
||||
const result = await resolveBySlugPrefix(engine, 'unrelated/random');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Multi-prefix per source', () => {
|
||||
test('first prefix in rules array matches → routes to that source', async () => {
|
||||
const result = await resolveBySlugPrefix(engine, 'projects/foo/bar');
|
||||
expect(result).toBe('multi-prefix');
|
||||
});
|
||||
|
||||
test('second prefix in rules array matches → still routes to same source', async () => {
|
||||
const result = await resolveBySlugPrefix(engine, 'builder-journey');
|
||||
expect(result).toBe('multi-prefix');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Glob `*` form', () => {
|
||||
test('trailing `*` is normalized to literal prefix (same matching semantics)', async () => {
|
||||
// Rule was 'wedding-planning/*'; should match same as 'wedding-planning/'.
|
||||
const result = await resolveBySlugPrefix(engine, 'wedding-planning/budget');
|
||||
expect(result).toBe('glob-form');
|
||||
});
|
||||
|
||||
test('`*` does not change scoring — competing literal of same length still ties', async () => {
|
||||
// 'wedding-planning/*' normalizes to 'wedding-planning/' (score 17).
|
||||
// No other source has a longer match → glob-form wins.
|
||||
const result = await resolveBySlugPrefix(engine, 'wedding-planning/X');
|
||||
expect(result).toBe('glob-form');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Tie-break: alphabetical on source id', () => {
|
||||
test('two sources with identical prefix → alpha-tie wins (alphabetical)', async () => {
|
||||
const result = await resolveBySlugPrefix(engine, 'tied/some-page');
|
||||
expect(result).toBe('alpha-tie');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Sources without rules / empty rules', () => {
|
||||
test('source with no slug_prefix_rules key in config does not match anything', async () => {
|
||||
// 'no-rules' source exists but has no rules → can't claim any slug.
|
||||
const result = await resolveBySlugPrefix(engine, 'no-rules/x');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test('source with empty rules array does not match anything', async () => {
|
||||
const result = await resolveBySlugPrefix(engine, 'empty-rules/x');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Cache TTL behaviour', () => {
|
||||
test('cache hit on second call within TTL — DB content change is invisible', async () => {
|
||||
// First call populates cache.
|
||||
const r1 = await resolveBySlugPrefix(engine, 'design/x');
|
||||
expect(r1).toBe('alpha-design');
|
||||
|
||||
// Mutate sources directly without invalidating cache. resolver should
|
||||
// still see cached snapshot.
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": []}'::jsonb WHERE id = 'alpha-design'`,
|
||||
);
|
||||
const r2 = await resolveBySlugPrefix(engine, 'design/x');
|
||||
expect(r2).toBe('alpha-design');
|
||||
|
||||
// Restore for next tests.
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb WHERE id = 'alpha-design'`,
|
||||
);
|
||||
});
|
||||
|
||||
test('explicit invalidation forces refetch', async () => {
|
||||
// Prime cache.
|
||||
await resolveBySlugPrefix(engine, 'design/x');
|
||||
// Mutate then invalidate.
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": []}'::jsonb WHERE id = 'alpha-design'`,
|
||||
);
|
||||
__invalidateSlugPrefixCache();
|
||||
const r = await resolveBySlugPrefix(engine, 'design/x');
|
||||
expect(r).toBeNull();
|
||||
// Restore.
|
||||
await engine.executeRaw(
|
||||
`UPDATE sources SET config = '{"federated": true, "slug_prefix_rules": ["design/"]}'::jsonb WHERE id = 'alpha-design'`,
|
||||
);
|
||||
});
|
||||
});
|
||||
127
test/manifest-edge-cases.test.ts
Normal file
127
test/manifest-edge-cases.test.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — manifest edge-cases (per /plan-eng-review Issue #9).
|
||||
*
|
||||
* Verifies the resolver gracefully handles cases that could only show up
|
||||
* once real production data shape diverges from the happy path:
|
||||
*
|
||||
* - Malformed jsonb in sources.config (manually edited, partial corruption)
|
||||
* → safe-skip the bad row, continue evaluating other sources
|
||||
* - slug_prefix_rules: 'not_an_array' (string instead of string[])
|
||||
* → safe-skip
|
||||
* - slug_prefix_rules contains a non-string entry (mixed array)
|
||||
* → skip the non-string entries, keep valid ones
|
||||
* - Concurrent put_page on same slug across two distinct sources
|
||||
* → both rows succeed (composite UNIQUE allows; no race on schema-level)
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import {
|
||||
resolveBySlugPrefix,
|
||||
__invalidateSlugPrefixCache,
|
||||
} from '../src/core/source-resolver.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('valid-source', 'valid-source', '{"federated": true, "slug_prefix_rules": ["valid/"]}'::jsonb),
|
||||
('side-a', 'side-a', '{"federated": true}'::jsonb),
|
||||
('side-b', 'side-b', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
__invalidateSlugPrefixCache();
|
||||
});
|
||||
|
||||
describe('Malformed jsonb safe-skip', () => {
|
||||
test('slug_prefix_rules = "not_an_array" (string) → resolver ignores that row, valid sources still match', async () => {
|
||||
// Manually corrupt one source's config without going through CLI.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('bad-string', 'bad-string', '{"slug_prefix_rules": "not_an_array"}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
__invalidateSlugPrefixCache();
|
||||
// Valid source should still match its own prefix.
|
||||
const r1 = await resolveBySlugPrefix(engine, 'valid/page');
|
||||
expect(r1).toBe('valid-source');
|
||||
// Bad source claims nothing — no slug routes there.
|
||||
const r2 = await resolveBySlugPrefix(engine, 'not-an-array/x');
|
||||
expect(r2).toBeNull();
|
||||
});
|
||||
|
||||
test('slug_prefix_rules contains mixed-type entries → string entries kept, non-strings skipped', async () => {
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('mixed-types', 'mixed-types',
|
||||
'{"slug_prefix_rules": ["good-prefix/", 42, null, "another-good/"]}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
__invalidateSlugPrefixCache();
|
||||
const r1 = await resolveBySlugPrefix(engine, 'good-prefix/x');
|
||||
expect(r1).toBe('mixed-types');
|
||||
const r2 = await resolveBySlugPrefix(engine, 'another-good/y');
|
||||
expect(r2).toBe('mixed-types');
|
||||
});
|
||||
|
||||
test('config = null jsonb → safe-skip (NOT NULL constraint prevents in practice, but defensive)', async () => {
|
||||
// PGLite's NOT NULL on sources.config will reject the literal NULL,
|
||||
// so we test the edge by writing 'null' (jsonb null literal) which
|
||||
// is allowed.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('json-null', 'json-null', 'null'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
__invalidateSlugPrefixCache();
|
||||
// Resolver should skip cleanly — no slug routes to json-null.
|
||||
const r = await resolveBySlugPrefix(engine, 'anything/x');
|
||||
expect(r).toBeNull();
|
||||
// And the valid source still works.
|
||||
const r2 = await resolveBySlugPrefix(engine, 'valid/page');
|
||||
expect(r2).toBe('valid-source');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Concurrent put_page same slug across sources', () => {
|
||||
test('same slug written to two different sources → both rows persist (composite UNIQUE)', async () => {
|
||||
// Run both writes "concurrently" via Promise.all. PGLite is
|
||||
// single-process so they serialize at the engine layer, but the SQL
|
||||
// semantics still validate: composite UNIQUE on (source_id, slug)
|
||||
// means both INSERTs fit without conflicting.
|
||||
await Promise.all([
|
||||
engine.putPage('shared-slug', {
|
||||
type: 'note',
|
||||
title: 'Side A',
|
||||
compiled_truth: 'A side',
|
||||
source_id: 'side-a',
|
||||
}),
|
||||
engine.putPage('shared-slug', {
|
||||
type: 'note',
|
||||
title: 'Side B',
|
||||
compiled_truth: 'B side',
|
||||
source_id: 'side-b',
|
||||
}),
|
||||
]);
|
||||
const rows = await engine.executeRaw<{ source_id: string; title: string }>(
|
||||
`SELECT source_id, title FROM pages WHERE slug = 'shared-slug' ORDER BY source_id`,
|
||||
);
|
||||
expect(rows.length).toBe(2);
|
||||
expect(rows[0].source_id).toBe('side-a');
|
||||
expect(rows[0].title).toBe('Side A');
|
||||
expect(rows[1].source_id).toBe('side-b');
|
||||
expect(rows[1].title).toBe('Side B');
|
||||
});
|
||||
});
|
||||
147
test/manifest-routing.test.ts
Normal file
147
test/manifest-routing.test.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — manifest routing end-to-end via put_page handler.
|
||||
*
|
||||
* Verifies the full chain works through the operations layer:
|
||||
*
|
||||
* put_page(slug='memory-dashboard/foo', no source_id)
|
||||
* → resolveSourceId(engine, null, cwd, 'memory-dashboard/foo')
|
||||
* → manifest matches 'memory-dashboard/' prefix
|
||||
* → page row's source_id = 'memory-dashboard'
|
||||
*
|
||||
* put_page(slug='memory-dashboard/foo', source_id='stock-dashboard')
|
||||
* → explicit param wins, manifest skipped
|
||||
*
|
||||
* put_page(slug='random-content', no source_id, no manifest match)
|
||||
* → falls to brain-default (config 'sources.default')
|
||||
*
|
||||
* Subagent slug routing is also exercised: writes from a subagent context
|
||||
* use slug `wiki/agents/<id>/...`. When the manifest declares a rule for
|
||||
* `wiki/agents/`, those writes get carved into a separate source.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { operationsByName, type OperationContext } from '../src/core/operations.ts';
|
||||
import { __invalidateSlugPrefixCache } from '../src/core/source-resolver.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('memory-dashboard', 'memory-dashboard', '{"federated": true, "slug_prefix_rules": ["memory-dashboard/"]}'::jsonb),
|
||||
('stock-dashboard', 'stock-dashboard', '{"federated": true, "slug_prefix_rules": ["stock-dashboard/"]}'::jsonb),
|
||||
('subagent-writes', 'subagent-writes', '{"federated": true, "slug_prefix_rules": ["wiki/agents/"]}'::jsonb),
|
||||
('personal-knowledge', 'personal-knowledge', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
|
||||
// Brain-level fallback target — set so unmanifested writes have a clear home.
|
||||
await engine.setConfig('sources.default', 'personal-knowledge');
|
||||
|
||||
__invalidateSlugPrefixCache();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
// Minimal OperationContext stub. Tests don't exercise config or logger
|
||||
// branches in put_page; both can be cheap stubs.
|
||||
const noopLogger = {
|
||||
info: () => {},
|
||||
warn: () => {},
|
||||
error: () => {},
|
||||
debug: () => {},
|
||||
};
|
||||
const stubConfig = {} as never; // GBrainConfig: tests don't read from it.
|
||||
|
||||
const baseCtx = (overrides: Partial<OperationContext> = {}): OperationContext => ({
|
||||
engine,
|
||||
config: stubConfig,
|
||||
logger: noopLogger,
|
||||
remote: false,
|
||||
dryRun: false,
|
||||
...overrides,
|
||||
});
|
||||
|
||||
const md = `---
|
||||
title: Test Page
|
||||
type: note
|
||||
---
|
||||
Some body content.
|
||||
`;
|
||||
|
||||
describe('Manifest routing — slug prefix → source', () => {
|
||||
test('slug=memory-dashboard/foo, no source_id → routes to memory-dashboard', async () => {
|
||||
await operationsByName.put_page.handler(baseCtx(), { slug: 'memory-dashboard/foo', content: md });
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'memory-dashboard/foo'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('slug=stock-dashboard/foo → routes to stock-dashboard', async () => {
|
||||
await operationsByName.put_page.handler(baseCtx(), { slug: 'stock-dashboard/quote', content: md });
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'stock-dashboard/quote'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('stock-dashboard');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Manifest routing — explicit source_id wins over manifest', () => {
|
||||
test('slug=memory-dashboard/x with source_id=stock-dashboard → stock-dashboard', async () => {
|
||||
await operationsByName.put_page.handler(
|
||||
baseCtx(),
|
||||
{ slug: 'memory-dashboard/manual-override', content: md, source_id: 'stock-dashboard' },
|
||||
);
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'memory-dashboard/manual-override' AND source_id = 'stock-dashboard'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Manifest routing — no match falls to brain-default', () => {
|
||||
test('slug=random-thought → personal-knowledge (brain-level default)', async () => {
|
||||
await operationsByName.put_page.handler(baseCtx(), { slug: 'random-thought', content: md });
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'random-thought'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('personal-knowledge');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Manifest routing — subagent slug carve-out (wiki/agents/)', () => {
|
||||
test('subagent put_page slug=wiki/agents/3/note → routes to subagent-writes via manifest', async () => {
|
||||
await operationsByName.put_page.handler(
|
||||
baseCtx({ viaSubagent: true, subagentId: 3 }),
|
||||
{ slug: 'wiki/agents/3/note', content: md },
|
||||
);
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'wiki/agents/3/note'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('subagent-writes');
|
||||
});
|
||||
|
||||
test('subagent escape attempt (slug not under wiki/agents/<id>) is rejected', async () => {
|
||||
let threw = false;
|
||||
try {
|
||||
await operationsByName.put_page.handler(
|
||||
baseCtx({ viaSubagent: true, subagentId: 3 }),
|
||||
{ slug: 'wiki/random/escape', content: md },
|
||||
);
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
expect(msg).toContain('wiki/agents/3/');
|
||||
}
|
||||
expect(threw).toBe(true);
|
||||
});
|
||||
});
|
||||
215
test/migration-v26.test.ts
Normal file
215
test/migration-v26.test.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — migration v26 source taxonomy rewrite.
|
||||
*
|
||||
* Verifies:
|
||||
* - default-ambiguous source created if absent
|
||||
* - gstack-meta source created with rules [retros/, analytics/]
|
||||
* - gstack-meta UPSERT path: existing source gets rules updated
|
||||
* - memory-dashboard / stock-dashboard get the longer per-project prefixes
|
||||
* when they pre-exist; UPDATE no-ops on brains that never created them
|
||||
* - Pages on legacy gstack-brain reclassify correctly:
|
||||
* slug `projects/triton6564685-stock-dashboard/...` → stock-dashboard
|
||||
* slug `projects/triton6564685-memory-dashboard/...` → memory-dashboard
|
||||
* slug `retros/...` or `analytics/...` → gstack-meta
|
||||
* slug not matching any of the above → default-ambiguous (tombstone)
|
||||
* - gstack-brain source DELETED after all pages move out
|
||||
* - Idempotency: running the migration SQL again is a no-op (CR-6)
|
||||
*
|
||||
* The migration runs automatically on initSchema() (it's part of MIGRATIONS).
|
||||
* We re-execute the SQL string a second time to test idempotency, mimicking
|
||||
* an apply-migrations re-run after a partial failure.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { MIGRATIONS } from '../src/core/migrate.ts';
|
||||
|
||||
const v26 = MIGRATIONS.find((m) => m.version === 26);
|
||||
if (!v26) throw new Error('migration v26 missing from MIGRATIONS array');
|
||||
const v26Sql = v26.sql;
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
// Pre-v26 fixture: install legacy gstack-brain (overly broad) + per-project
|
||||
// sources WITHOUT the new longer rules + sample pages. Simulates a brain
|
||||
// mid-upgrade where v26 needs to do real work.
|
||||
// Note: initSchema already ran v26 once (creating default-ambiguous + gstack-meta).
|
||||
// Inserting gstack-brain back + writing pages to it lets us exercise the
|
||||
// re-run path that v26 is designed to be idempotent across.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('gstack-brain', 'gstack-brain', '{"federated": true, "slug_prefix_rules": ["projects/", "builder-journey"]}'::jsonb),
|
||||
('memory-dashboard', 'memory-dashboard', '{"federated": true, "slug_prefix_rules": ["memory-dashboard/"]}'::jsonb),
|
||||
('stock-dashboard', 'stock-dashboard', '{"federated": true, "slug_prefix_rules": ["stock-dashboard/"]}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO pages (slug, source_id, type, title, compiled_truth, content_hash) VALUES
|
||||
('projects/triton6564685-stock-dashboard/checkpoints/foo', 'gstack-brain', 'note', 'foo', 'x', 'h1'),
|
||||
('projects/triton6564685-memory-dashboard/checkpoints/bar', 'gstack-brain', 'note', 'bar', 'x', 'h2'),
|
||||
('retros/2026-05-07-week-recap', 'gstack-brain', 'note', 'recap', 'x', 'h3'),
|
||||
('analytics/skill-usage', 'gstack-brain', 'note', 'usage', 'x', 'h4'),
|
||||
('builder-journey/q1', 'gstack-brain', 'note', 'q1', 'x', 'h5'),
|
||||
('projects/some-other-project/note', 'gstack-brain', 'note', 'other', 'x', 'h6')
|
||||
ON CONFLICT (source_id, slug) DO NOTHING`,
|
||||
);
|
||||
|
||||
// Re-execute v26 SQL to reclassify the gstack-brain pages we just added.
|
||||
// Use runMigration (db.exec) — PGLite's prepared-statement query() rejects
|
||||
// multi-statement SQL with "cannot insert multiple commands into a prepared
|
||||
// statement" (42601).
|
||||
await engine.runMigration(26, v26Sql);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
describe('v26 — source rows', () => {
|
||||
test('default-ambiguous source exists', async () => {
|
||||
const rows = await engine.executeRaw<{ id: string }>(
|
||||
`SELECT id FROM sources WHERE id = 'default-ambiguous'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
});
|
||||
|
||||
test('gstack-meta source exists with rules [retros/, analytics/]', async () => {
|
||||
const rows = await engine.executeRaw<{ config: string | Record<string, unknown> }>(
|
||||
`SELECT config FROM sources WHERE id = 'gstack-meta'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
const cfg = typeof rows[0].config === 'string' ? JSON.parse(rows[0].config) : rows[0].config;
|
||||
expect(cfg.slug_prefix_rules).toEqual(['retros/', 'analytics/']);
|
||||
});
|
||||
|
||||
test('memory-dashboard rules now include projects/triton6564685-memory-dashboard/', async () => {
|
||||
const rows = await engine.executeRaw<{ config: string | Record<string, unknown> }>(
|
||||
`SELECT config FROM sources WHERE id = 'memory-dashboard'`,
|
||||
);
|
||||
const cfg = typeof rows[0].config === 'string' ? JSON.parse(rows[0].config) : rows[0].config;
|
||||
expect(cfg.slug_prefix_rules).toContain('projects/triton6564685-memory-dashboard/');
|
||||
expect(cfg.slug_prefix_rules).toContain('memory-dashboard/');
|
||||
});
|
||||
|
||||
test('stock-dashboard rules now include projects/triton6564685-stock-dashboard/', async () => {
|
||||
const rows = await engine.executeRaw<{ config: string | Record<string, unknown> }>(
|
||||
`SELECT config FROM sources WHERE id = 'stock-dashboard'`,
|
||||
);
|
||||
const cfg = typeof rows[0].config === 'string' ? JSON.parse(rows[0].config) : rows[0].config;
|
||||
expect(cfg.slug_prefix_rules).toContain('projects/triton6564685-stock-dashboard/');
|
||||
expect(cfg.slug_prefix_rules).toContain('stock-dashboard/');
|
||||
});
|
||||
|
||||
test('gstack-brain source DELETED (all pages moved out)', async () => {
|
||||
const rows = await engine.executeRaw<{ id: string }>(
|
||||
`SELECT id FROM sources WHERE id = 'gstack-brain'`,
|
||||
);
|
||||
expect(rows.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('v26 — page reclassification', () => {
|
||||
test('stock-dashboard project page → stock-dashboard source', async () => {
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'projects/triton6564685-stock-dashboard/checkpoints/foo'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('stock-dashboard');
|
||||
});
|
||||
|
||||
test('memory-dashboard project page → memory-dashboard source', async () => {
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'projects/triton6564685-memory-dashboard/checkpoints/bar'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('retros/* page → gstack-meta', async () => {
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'retros/2026-05-07-week-recap'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('gstack-meta');
|
||||
});
|
||||
|
||||
test('analytics/* page → gstack-meta', async () => {
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'analytics/skill-usage'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('gstack-meta');
|
||||
});
|
||||
|
||||
test('builder-journey/* page → default-ambiguous (no new rule covers it)', async () => {
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'builder-journey/q1'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('default-ambiguous');
|
||||
});
|
||||
|
||||
test('projects/some-other-project page → default-ambiguous (catch-all tombstone)', async () => {
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'projects/some-other-project/note'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('default-ambiguous');
|
||||
});
|
||||
});
|
||||
|
||||
describe('v26 — string-encoded config heal (regression)', () => {
|
||||
test('migration unwraps jsonb string scalar configs to objects before jsonb_set', async () => {
|
||||
// Reproduces prod LXC 107 (2026-05-07) data corruption: gbrain CLI's
|
||||
// sources.ts:211 INSERT via $::jsonb on JSON.stringify() output produces
|
||||
// a JSON STRING scalar, not an object. jsonb_set on a scalar throws
|
||||
// SQLSTATE 22023 'cannot set path in scalar'. v26 step 0 unwraps before
|
||||
// the rest of the migration touches config.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('regression-string-cfg', 'regression-string-cfg',
|
||||
'"{\\"federated\\":true,\\"slug_prefix_rules\\":[\\"regression/\\"]}"'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
// Sanity: confirm we set up the bug condition.
|
||||
const before = await engine.executeRaw<{ type: string }>(
|
||||
`SELECT jsonb_typeof(config) AS type FROM sources WHERE id = 'regression-string-cfg'`,
|
||||
);
|
||||
expect(before[0].type).toBe('string');
|
||||
|
||||
// Re-run v26: step 0 should unwrap, then the remaining steps proceed cleanly.
|
||||
await engine.runMigration(26, v26Sql);
|
||||
|
||||
const after = await engine.executeRaw<{ type: string; rules: string[] | null }>(
|
||||
`SELECT jsonb_typeof(config) AS type, config->'slug_prefix_rules' AS rules FROM sources WHERE id = 'regression-string-cfg'`,
|
||||
);
|
||||
expect(after[0].type).toBe('object');
|
||||
// Contents preserved byte-for-byte after unwrap.
|
||||
expect(after[0].rules).toEqual(['regression/']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('v26 — idempotency (CR-6)', () => {
|
||||
test('re-running migration is a no-op: source distribution unchanged', async () => {
|
||||
const before = await engine.executeRaw<{ source_id: string; n: bigint }>(
|
||||
`SELECT source_id, COUNT(*)::bigint AS n FROM pages GROUP BY source_id ORDER BY source_id`,
|
||||
);
|
||||
await engine.runMigration(26, v26Sql);
|
||||
const after = await engine.executeRaw<{ source_id: string; n: bigint }>(
|
||||
`SELECT source_id, COUNT(*)::bigint AS n FROM pages GROUP BY source_id ORDER BY source_id`,
|
||||
);
|
||||
expect(after.length).toBe(before.length);
|
||||
for (let i = 0; i < before.length; i++) {
|
||||
expect(after[i].source_id).toBe(before[i].source_id);
|
||||
expect(after[i].n).toBe(before[i].n);
|
||||
}
|
||||
});
|
||||
|
||||
test('running on a brain with no gstack-brain source does not fail', async () => {
|
||||
// gstack-brain is already gone. v26 should still execute cleanly because
|
||||
// every UPDATE/DELETE is guarded by source_id = 'gstack-brain' = empty set.
|
||||
await engine.runMigration(26, v26Sql);
|
||||
// No exception thrown = pass.
|
||||
});
|
||||
});
|
||||
232
test/multi-source-write-path.test.ts
Normal file
232
test/multi-source-write-path.test.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
/**
|
||||
* v0.18.0 Step 5 — multi-source write path tests.
|
||||
*
|
||||
* Verifies that source_id threads end-to-end through every write surface:
|
||||
*
|
||||
* PageInput.source_id → putPage() INSERT (engine direct)
|
||||
* importFromContent({sourceId}) → putPage() (parse + transaction)
|
||||
* importFromFile({sourceId}) → importFromContent
|
||||
* runImport({sourceId}) → importFile loop
|
||||
*
|
||||
* Both PGLite (this file) and Postgres (parity in test/e2e/mechanical.test.ts
|
||||
* when DATABASE_URL is set) must agree on the per-row source_id outcome.
|
||||
*
|
||||
* Step-2-through-Step-4 schema invariants (default seed, composite UNIQUE,
|
||||
* source_id col exists) are already covered in multi-source-integration.test.ts;
|
||||
* this file focuses purely on the WRITE-THROUGH semantics that Step 5 introduces.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { importFromContent, importFromFile } from '../src/core/import-file.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
let tmpRoot: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
// Pre-seed the named sources we'll route writes to. The 'default' row is
|
||||
// seeded by migration v16; the rest we add explicitly so resolveSourceId
|
||||
// / explicit threading have valid FK targets.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('memory-dashboard', 'memory-dashboard', '{"federated": true}'::jsonb),
|
||||
('stock-dashboard', 'stock-dashboard', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO NOTHING`,
|
||||
);
|
||||
|
||||
tmpRoot = mkdtempSync(join(tmpdir(), 'gbrain-step5-'));
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
rmSync(tmpRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('Step 5 — engine.putPage threading', () => {
|
||||
test('putPage with explicit source_id writes to that source', async () => {
|
||||
await engine.putPage('write-path/explicit-md', {
|
||||
type: 'note',
|
||||
title: 'Explicit',
|
||||
compiled_truth: 'goes to memory-dashboard',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'write-path/explicit-md'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('putPage without source_id falls back to schema DEFAULT default', async () => {
|
||||
await engine.putPage('write-path/implicit-md', {
|
||||
type: 'note',
|
||||
title: 'Implicit',
|
||||
compiled_truth: 'no source_id passed',
|
||||
});
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'write-path/implicit-md'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('default');
|
||||
});
|
||||
|
||||
test('putPage twice on same (source, slug) upserts in place', async () => {
|
||||
await engine.putPage('write-path/upsert-key', {
|
||||
type: 'note',
|
||||
title: 'First',
|
||||
compiled_truth: 'v1',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
await engine.putPage('write-path/upsert-key', {
|
||||
type: 'note',
|
||||
title: 'Second',
|
||||
compiled_truth: 'v2',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
const rows = await engine.executeRaw<{ title: string; compiled_truth: string }>(
|
||||
`SELECT title, compiled_truth FROM pages
|
||||
WHERE source_id = 'memory-dashboard' AND slug = 'write-path/upsert-key'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].title).toBe('Second');
|
||||
expect(rows[0].compiled_truth).toBe('v2');
|
||||
});
|
||||
|
||||
test('putPage with same slug across two sources keeps both rows distinct', async () => {
|
||||
await engine.putPage('write-path/same-slug', {
|
||||
type: 'note',
|
||||
title: 'In MD',
|
||||
compiled_truth: 'memory-dashboard side',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
await engine.putPage('write-path/same-slug', {
|
||||
type: 'note',
|
||||
title: 'In SD',
|
||||
compiled_truth: 'stock-dashboard side',
|
||||
source_id: 'stock-dashboard',
|
||||
});
|
||||
const rows = await engine.executeRaw<{ source_id: string; title: string }>(
|
||||
`SELECT source_id, title FROM pages
|
||||
WHERE slug = 'write-path/same-slug'
|
||||
ORDER BY source_id`,
|
||||
);
|
||||
expect(rows.length).toBe(2);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
expect(rows[1].source_id).toBe('stock-dashboard');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Step 5 — importFromContent threading', () => {
|
||||
test('importFromContent({sourceId}) writes via the threaded source', async () => {
|
||||
const md = `---
|
||||
title: From Content
|
||||
type: note
|
||||
---
|
||||
# From Content
|
||||
|
||||
Hello world.
|
||||
`;
|
||||
const result = await importFromContent(engine, 'write-path/from-content', md, {
|
||||
noEmbed: true,
|
||||
sourceId: 'memory-dashboard',
|
||||
});
|
||||
expect(result.status).toBe('imported');
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'write-path/from-content'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('importFromContent without sourceId opt → DEFAULT default', async () => {
|
||||
const md = `---
|
||||
title: From Content Default
|
||||
type: note
|
||||
---
|
||||
Default-targeted body.
|
||||
`;
|
||||
const result = await importFromContent(engine, 'write-path/from-content-default', md, {
|
||||
noEmbed: true,
|
||||
});
|
||||
expect(result.status).toBe('imported');
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'write-path/from-content-default'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('default');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Step 5 — importFromFile threading', () => {
|
||||
test('importFromFile({sourceId}) reads disk + writes to source', async () => {
|
||||
const repoDir = join(tmpRoot, 'repo-a');
|
||||
mkdirSync(repoDir, { recursive: true });
|
||||
const filePath = join(repoDir, 'write-path-from-file.md');
|
||||
writeFileSync(
|
||||
filePath,
|
||||
`---
|
||||
title: From File
|
||||
type: note
|
||||
---
|
||||
On-disk content routed to stock-dashboard.
|
||||
`,
|
||||
);
|
||||
const result = await importFromFile(engine, filePath, 'write-path/from-file', {
|
||||
noEmbed: true,
|
||||
sourceId: 'stock-dashboard',
|
||||
});
|
||||
expect(result.status).toBe('imported');
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'write-path/from-file'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('stock-dashboard');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Step 5 — content_hash idempotency unaffected by source_id', () => {
|
||||
test('rewriting identical content to same source returns skipped', async () => {
|
||||
const md = `---
|
||||
title: Idempotent
|
||||
type: note
|
||||
---
|
||||
Stable body.
|
||||
`;
|
||||
const r1 = await importFromContent(engine, 'write-path/idempotent', md, {
|
||||
noEmbed: true,
|
||||
sourceId: 'memory-dashboard',
|
||||
});
|
||||
expect(r1.status).toBe('imported');
|
||||
|
||||
const r2 = await importFromContent(engine, 'write-path/idempotent', md, {
|
||||
noEmbed: true,
|
||||
sourceId: 'memory-dashboard',
|
||||
});
|
||||
expect(r2.status).toBe('skipped');
|
||||
});
|
||||
|
||||
test('same slug in different source counts as a separate page (not skip)', async () => {
|
||||
const md = `---
|
||||
title: Cross-Source Slug
|
||||
type: note
|
||||
---
|
||||
Same body, different source.
|
||||
`;
|
||||
const r1 = await importFromContent(engine, 'write-path/cross-slug', md, {
|
||||
noEmbed: true,
|
||||
sourceId: 'memory-dashboard',
|
||||
});
|
||||
expect(r1.status).toBe('imported');
|
||||
|
||||
const r2 = await importFromContent(engine, 'write-path/cross-slug', md, {
|
||||
noEmbed: true,
|
||||
sourceId: 'stock-dashboard',
|
||||
});
|
||||
// Different (source_id, slug) row → must be a fresh import, not a skip.
|
||||
expect(r2.status).toBe('imported');
|
||||
});
|
||||
});
|
||||
156
test/sources-update-slug-prefix.test.ts
Normal file
156
test/sources-update-slug-prefix.test.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — `gbrain sources add --slug-prefix` + new `update` subcommand.
|
||||
*
|
||||
* Verifies:
|
||||
* - runAdd accepts --slug-prefix '<rule>,<rule>' and writes config.slug_prefix_rules
|
||||
* - runUpdate replaces config.slug_prefix_rules in-place
|
||||
* - runUpdate '' clears the rules
|
||||
* - runUpdate on missing source errors out (exit 3)
|
||||
* - prefix validator rejects: underscores, uppercase, mid-string '*',
|
||||
* multi-level glob '**', empty, whitespace, oversize
|
||||
* - validator accepts: literal prefix, trailing '*', '/'-separated paths
|
||||
* - runUpdate preserves other config keys (federated stays put)
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { runSources } from '../src/commands/sources.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
// Reset fixture sources between tests so write-then-update doesn't leak across cases.
|
||||
await engine.executeRaw(
|
||||
`DELETE FROM sources WHERE id LIKE 'test-%'`,
|
||||
);
|
||||
});
|
||||
|
||||
async function readConfig(id: string): Promise<Record<string, unknown>> {
|
||||
const rows = await engine.executeRaw<{ config: string | Record<string, unknown> }>(
|
||||
`SELECT config FROM sources WHERE id = $1`,
|
||||
[id],
|
||||
);
|
||||
if (rows.length === 0) return {};
|
||||
const cfg = rows[0].config;
|
||||
return typeof cfg === 'string' ? JSON.parse(cfg) : cfg;
|
||||
}
|
||||
|
||||
describe('runAdd --slug-prefix', () => {
|
||||
test('accepts comma-separated rules and persists to config.slug_prefix_rules', async () => {
|
||||
await runSources(engine, ['add', 'test-md', '--slug-prefix', 'memory-dashboard/,builder-journey']);
|
||||
const cfg = await readConfig('test-md');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['memory-dashboard/', 'builder-journey']);
|
||||
});
|
||||
|
||||
test('accepts trailing-glob form', async () => {
|
||||
await runSources(engine, ['add', 'test-glob', '--slug-prefix', 'wedding-planning/*']);
|
||||
const cfg = await readConfig('test-glob');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['wedding-planning/*']);
|
||||
});
|
||||
|
||||
test('add without --slug-prefix leaves config without the key', async () => {
|
||||
await runSources(engine, ['add', 'test-bare']);
|
||||
const cfg = await readConfig('test-bare');
|
||||
expect(cfg.slug_prefix_rules).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('runUpdate --slug-prefix', () => {
|
||||
test('replaces rules in-place on existing source', async () => {
|
||||
await runSources(engine, ['add', 'test-update', '--slug-prefix', 'old-prefix/']);
|
||||
await runSources(engine, ['update', 'test-update', '--slug-prefix', 'new-a/,new-b/']);
|
||||
const cfg = await readConfig('test-update');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['new-a/', 'new-b/']);
|
||||
});
|
||||
|
||||
test("update --slug-prefix '' clears rules entirely", async () => {
|
||||
await runSources(engine, ['add', 'test-clear', '--slug-prefix', 'foo/']);
|
||||
await runSources(engine, ['update', 'test-clear', '--slug-prefix', '']);
|
||||
const cfg = await readConfig('test-clear');
|
||||
expect(cfg.slug_prefix_rules).toBeUndefined();
|
||||
});
|
||||
|
||||
test('preserves other config keys (federated)', async () => {
|
||||
await runSources(engine, ['add', 'test-fed', '--federated', '--slug-prefix', 'a/']);
|
||||
await runSources(engine, ['update', 'test-fed', '--slug-prefix', 'b/']);
|
||||
const cfg = await readConfig('test-fed');
|
||||
expect(cfg.federated).toBe(true);
|
||||
expect(cfg.slug_prefix_rules).toEqual(['b/']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Prefix grammar validator (Issue #6 — fail-fast at write time)', () => {
|
||||
// Validator runs both at runAdd write time and runUpdate write time.
|
||||
// We test via runAdd since it's the canonical surface; same code path.
|
||||
|
||||
const expectReject = async (rule: string, hint: string) => {
|
||||
let threw = false;
|
||||
let msg = '';
|
||||
try {
|
||||
await runSources(engine, ['add', `test-reject-${Math.random().toString(36).slice(2, 8)}`, '--slug-prefix', rule]);
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
msg = e instanceof Error ? e.message : String(e);
|
||||
}
|
||||
expect(threw).toBe(true);
|
||||
if (hint) expect(msg).toContain(hint);
|
||||
};
|
||||
|
||||
test('accept underscore (chezmoi-style prefixes like dot_claude/)', async () => {
|
||||
await runSources(engine, ['add', 'test-accept-under', '--slug-prefix', 'dot_claude/,foo_bar/']);
|
||||
const cfg = await readConfig('test-accept-under');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['dot_claude/', 'foo_bar/']);
|
||||
});
|
||||
test('reject uppercase', async () => {
|
||||
await expectReject('MemoryDashboard/', 'Invalid slug-prefix');
|
||||
});
|
||||
test('reject mid-string glob', async () => {
|
||||
await expectReject('foo*bar/', "'*' may only appear as the final character");
|
||||
});
|
||||
test('reject multi-level glob', async () => {
|
||||
await expectReject('a/**', "Multi-level glob");
|
||||
});
|
||||
test('reject empty after split', async () => {
|
||||
let threw = false;
|
||||
try {
|
||||
await runSources(engine, ['add', 'test-reject-empty', '--slug-prefix', ',,,']);
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
expect(msg).toContain('--slug-prefix value is empty');
|
||||
}
|
||||
expect(threw).toBe(true);
|
||||
});
|
||||
test('reject whitespace inside rule', async () => {
|
||||
await expectReject('foo bar/', 'Invalid slug-prefix');
|
||||
});
|
||||
test('reject oversized rule', async () => {
|
||||
await expectReject('a'.repeat(65) + '/', 'too long');
|
||||
});
|
||||
|
||||
test('accept literal prefix', async () => {
|
||||
await runSources(engine, ['add', 'test-accept-lit', '--slug-prefix', 'memory-dashboard/']);
|
||||
const cfg = await readConfig('test-accept-lit');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['memory-dashboard/']);
|
||||
});
|
||||
test('accept trailing star', async () => {
|
||||
await runSources(engine, ['add', 'test-accept-star', '--slug-prefix', 'projects/*']);
|
||||
const cfg = await readConfig('test-accept-star');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['projects/*']);
|
||||
});
|
||||
test('accept hyphen-segments and slashes', async () => {
|
||||
await runSources(engine, ['add', 'test-accept-segments', '--slug-prefix', 'design/memory-dashboard/internal/']);
|
||||
const cfg = await readConfig('test-accept-segments');
|
||||
expect(cfg.slug_prefix_rules).toEqual(['design/memory-dashboard/internal/']);
|
||||
});
|
||||
});
|
||||
179
test/sync-resolveSourceId-unconditional-regression.test.ts
Normal file
179
test/sync-resolveSourceId-unconditional-regression.test.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
/**
|
||||
* v0.18.0 Step 5 — REGRESSION test for sync.ts:549 unconditional resolveSourceId.
|
||||
*
|
||||
* IRON-RULE regression coverage. Pre-Step-5, sync.ts only invoked
|
||||
* resolveSourceId when --source or GBRAIN_SOURCE was set. The dotfile and
|
||||
* cwd-prefix branches of resolveSourceId were therefore dead in practice
|
||||
* for `gbrain sync` (alive only for direct `gbrain put` and similar).
|
||||
*
|
||||
* Step 5 lifts that guard so dotfile + cwd-prefix fire for plain
|
||||
* `gbrain sync`. The risk: pre-v0.17 brains (no sources.default config,
|
||||
* no .gbrain-source dotfile, no flag, no env) MUST still flow through the
|
||||
* legacy global-config sync path with `sourceId = undefined`. If we naively
|
||||
* pass the resolver's literal 'default' fallback through, the per-source
|
||||
* anchor on the 'default' row gets read instead of the legacy
|
||||
* sync.repo_path/last_commit config — which is NULL for never-migrated
|
||||
* brains and breaks sync silently.
|
||||
*
|
||||
* The safety net in sync.ts:
|
||||
*
|
||||
* let sourceId = await resolveSourceId(engine, explicitSource);
|
||||
* if (!explicitSource && !process.env.GBRAIN_SOURCE && sourceId === 'default') {
|
||||
* sourceId = undefined;
|
||||
* }
|
||||
*
|
||||
* This file verifies (a) the resolver chain returns the expected id under
|
||||
* each input scenario, and (b) the safety-net rule preserves backward compat.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { resolveSourceId } from '../src/core/source-resolver.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
let tmpRoot: string;
|
||||
let originalEnv: string | undefined;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
tmpRoot = mkdtempSync(join(tmpdir(), 'gbrain-step5-regr-'));
|
||||
|
||||
// Register two sources with concrete local_paths so cwd-prefix matches
|
||||
// are testable. memory-dashboard owns ${tmpRoot}/proj-mem; stock-dashboard
|
||||
// owns ${tmpRoot}/proj-stock.
|
||||
mkdirSync(join(tmpRoot, 'proj-mem'), { recursive: true });
|
||||
mkdirSync(join(tmpRoot, 'proj-stock'), { recursive: true });
|
||||
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, local_path, config) VALUES
|
||||
('memory-dashboard', 'memory-dashboard', $1, '{"federated": true}'::jsonb),
|
||||
('stock-dashboard', 'stock-dashboard', $2, '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET local_path = EXCLUDED.local_path`,
|
||||
[join(tmpRoot, 'proj-mem'), join(tmpRoot, 'proj-stock')],
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
rmSync(tmpRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
originalEnv = process.env.GBRAIN_SOURCE;
|
||||
delete process.env.GBRAIN_SOURCE;
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (originalEnv !== undefined) process.env.GBRAIN_SOURCE = originalEnv;
|
||||
});
|
||||
|
||||
describe('Resolver chain — unconditional invocation outcomes', () => {
|
||||
test('pre-v0.17 brain shape: no flag, no env, no dotfile, no cwd-prefix → returns literal default', async () => {
|
||||
// CWD outside any registered source's local_path; no dotfile in tree.
|
||||
const isolatedCwd = mkdtempSync(join(tmpdir(), 'gbrain-isolated-'));
|
||||
try {
|
||||
const result = await resolveSourceId(engine, null, isolatedCwd);
|
||||
expect(result).toBe('default');
|
||||
} finally {
|
||||
rmSync(isolatedCwd, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('cwd inside registered source local_path → resolves to that source', async () => {
|
||||
const cwd = join(tmpRoot, 'proj-mem', 'subdir');
|
||||
mkdirSync(cwd, { recursive: true });
|
||||
const result = await resolveSourceId(engine, null, cwd);
|
||||
expect(result).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('.gbrain-source dotfile pinned to source → resolves to that source even outside local_path', async () => {
|
||||
const cwd = mkdtempSync(join(tmpdir(), 'gbrain-dotfile-'));
|
||||
try {
|
||||
writeFileSync(join(cwd, '.gbrain-source'), 'stock-dashboard\n');
|
||||
const result = await resolveSourceId(engine, null, cwd);
|
||||
expect(result).toBe('stock-dashboard');
|
||||
} finally {
|
||||
rmSync(cwd, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('explicit flag wins over cwd-prefix that would have matched', async () => {
|
||||
const cwd = join(tmpRoot, 'proj-mem');
|
||||
const result = await resolveSourceId(engine, 'stock-dashboard', cwd);
|
||||
expect(result).toBe('stock-dashboard');
|
||||
});
|
||||
|
||||
test('GBRAIN_SOURCE env var wins over cwd-prefix that would have matched', async () => {
|
||||
process.env.GBRAIN_SOURCE = 'stock-dashboard';
|
||||
try {
|
||||
const cwd = join(tmpRoot, 'proj-mem');
|
||||
const result = await resolveSourceId(engine, null, cwd);
|
||||
expect(result).toBe('stock-dashboard');
|
||||
} finally {
|
||||
delete process.env.GBRAIN_SOURCE;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('sync.ts safety net — drop literal default to undefined when no signal', () => {
|
||||
/**
|
||||
* Reproduces the exact sync.ts:549-560 logic so the safety-net invariant
|
||||
* is locked into a unit test. If anyone removes the "drop to undefined"
|
||||
* branch in a future refactor, this test fails immediately rather than
|
||||
* silently breaking pre-v0.17 sync.
|
||||
*/
|
||||
async function syncResolveCwd(
|
||||
explicitSource: string | null,
|
||||
envSource: string | null,
|
||||
cwd: string,
|
||||
): Promise<string | undefined> {
|
||||
if (envSource !== null) process.env.GBRAIN_SOURCE = envSource;
|
||||
else delete process.env.GBRAIN_SOURCE;
|
||||
let sourceId: string | undefined = await resolveSourceId(engine, explicitSource, cwd);
|
||||
if (!explicitSource && !process.env.GBRAIN_SOURCE && sourceId === 'default') {
|
||||
sourceId = undefined;
|
||||
}
|
||||
return sourceId;
|
||||
}
|
||||
|
||||
test('REGRESSION: pre-v0.17 brain pattern (no signal, no match) → sourceId = undefined', async () => {
|
||||
const isolatedCwd = mkdtempSync(join(tmpdir(), 'gbrain-isolated-2-'));
|
||||
try {
|
||||
const result = await syncResolveCwd(null, null, isolatedCwd);
|
||||
expect(result).toBeUndefined();
|
||||
} finally {
|
||||
rmSync(isolatedCwd, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('cwd-prefix match returns named source (not undefined)', async () => {
|
||||
const result = await syncResolveCwd(null, null, join(tmpRoot, 'proj-mem'));
|
||||
expect(result).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('explicit --source default still returns default (signal present, do not drop)', async () => {
|
||||
const isolatedCwd = mkdtempSync(join(tmpdir(), 'gbrain-isolated-3-'));
|
||||
try {
|
||||
const result = await syncResolveCwd('default', null, isolatedCwd);
|
||||
expect(result).toBe('default');
|
||||
} finally {
|
||||
rmSync(isolatedCwd, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('GBRAIN_SOURCE=default still returns default (signal present, do not drop)', async () => {
|
||||
const isolatedCwd = mkdtempSync(join(tmpdir(), 'gbrain-isolated-4-'));
|
||||
try {
|
||||
const result = await syncResolveCwd(null, 'default', isolatedCwd);
|
||||
expect(result).toBe('default');
|
||||
} finally {
|
||||
rmSync(isolatedCwd, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
157
test/sync-up-to-date-stamping.test.ts
Normal file
157
test/sync-up-to-date-stamping.test.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — sync.ts up_to_date path advances last_sync_at.
|
||||
*
|
||||
* Pre-fix: when `lastCommit === headCommit` (no new commits since last sync),
|
||||
* performSync early-returned without touching the source row. Quiet sources
|
||||
* (read-mostly repos like claude-config / personal-knowledge / subagent-writes)
|
||||
* kept stale `last_sync_at` forever; drift monitor (gbrain-projects-drift.sh)
|
||||
* false-flagged them as "stale (Nmin ago, threshold 60min)" even though sync
|
||||
* cron was firing every 10 min and pulling correctly.
|
||||
*
|
||||
* Fix: advance last_sync_at on up_to_date path so drift's contract holds:
|
||||
* "is the sync cron alive?" (real behavior), not "did the remote add commits?"
|
||||
* (red-herring proxy).
|
||||
*
|
||||
* Surfaced 2026-05-07 PW 1 part 2 prod deploy on LXC 107 — first drift tick
|
||||
* post-deploy reported stock-dashboard "stale 6197min ago" 30 seconds after
|
||||
* a successful sync tick.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { performSync } from '../src/commands/sync.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
const cleanupDirs: string[] = [];
|
||||
|
||||
function git(cwd: string, ...args: string[]): string {
|
||||
return execFileSync('git', ['-C', cwd, ...args], { encoding: 'utf-8' }).trim();
|
||||
}
|
||||
|
||||
function makeFixtureRepo(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'gbrain-syncstamp-'));
|
||||
cleanupDirs.push(dir);
|
||||
git(dir, 'init', '--quiet', '--initial-branch=main');
|
||||
git(dir, 'config', 'user.email', 'test@example.com');
|
||||
git(dir, 'config', 'user.name', 'test');
|
||||
writeFileSync(
|
||||
join(dir, 'note.md'),
|
||||
`---\ntitle: note\ntype: note\n---\nbody\n`,
|
||||
);
|
||||
git(dir, 'add', '.');
|
||||
git(dir, 'commit', '--quiet', '-m', 'init');
|
||||
return dir;
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
for (const d of cleanupDirs) {
|
||||
try { rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
}
|
||||
});
|
||||
|
||||
describe('sync up_to_date path — advances last_sync_at', () => {
|
||||
test('quiet repo (no new commits) bumps last_sync_at on subsequent sync', async () => {
|
||||
const repo = makeFixtureRepo();
|
||||
const headCommit = git(repo, 'rev-parse', 'HEAD');
|
||||
const sourceId = 'syncstamp-quiet';
|
||||
|
||||
// Provision a source row anchored at HEAD with a stale last_sync_at one
|
||||
// hour in the past. Mirrors the prod state for quiet sources where the
|
||||
// last actual write happened well before the next idle tick.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, local_path, last_commit, last_sync_at, config)
|
||||
VALUES ($1, $1, $2, $3, now() - interval '1 hour', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
local_path = EXCLUDED.local_path,
|
||||
last_commit = EXCLUDED.last_commit,
|
||||
last_sync_at = EXCLUDED.last_sync_at`,
|
||||
[sourceId, repo, headCommit],
|
||||
);
|
||||
|
||||
const before = await engine.executeRaw<{ last_sync_at: Date | string }>(
|
||||
`SELECT last_sync_at FROM sources WHERE id = $1`,
|
||||
[sourceId],
|
||||
);
|
||||
const beforeMs = new Date(before[0].last_sync_at as string).getTime();
|
||||
|
||||
// Trigger a sync where HEAD is unchanged from anchor → up_to_date branch.
|
||||
const result = await performSync(engine, {
|
||||
repoPath: repo,
|
||||
noPull: true,
|
||||
noEmbed: true,
|
||||
sourceId,
|
||||
});
|
||||
expect(result.status).toBe('up_to_date');
|
||||
|
||||
const after = await engine.executeRaw<{ last_sync_at: Date | string }>(
|
||||
`SELECT last_sync_at FROM sources WHERE id = $1`,
|
||||
[sourceId],
|
||||
);
|
||||
const afterMs = new Date(after[0].last_sync_at as string).getTime();
|
||||
|
||||
// last_sync_at must advance — was 1h stale, should now be within seconds of now.
|
||||
expect(afterMs).toBeGreaterThan(beforeMs);
|
||||
expect(Date.now() - afterMs).toBeLessThan(10_000); // < 10s old
|
||||
});
|
||||
|
||||
test('up_to_date does NOT touch last_commit (commit anchor stable)', async () => {
|
||||
const repo = makeFixtureRepo();
|
||||
const headCommit = git(repo, 'rev-parse', 'HEAD');
|
||||
const sourceId = 'syncstamp-anchor';
|
||||
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, local_path, last_commit, last_sync_at, config)
|
||||
VALUES ($1, $1, $2, $3, now() - interval '1 hour', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
local_path = EXCLUDED.local_path,
|
||||
last_commit = EXCLUDED.last_commit,
|
||||
last_sync_at = EXCLUDED.last_sync_at`,
|
||||
[sourceId, repo, headCommit],
|
||||
);
|
||||
|
||||
await performSync(engine, {
|
||||
repoPath: repo,
|
||||
noPull: true,
|
||||
noEmbed: true,
|
||||
sourceId,
|
||||
});
|
||||
|
||||
const after = await engine.executeRaw<{ last_commit: string }>(
|
||||
`SELECT last_commit FROM sources WHERE id = $1`,
|
||||
[sourceId],
|
||||
);
|
||||
expect(after[0].last_commit).toBe(headCommit);
|
||||
});
|
||||
|
||||
test('non-sourceId (legacy global config) path: no source UPDATE attempted', async () => {
|
||||
const repo = makeFixtureRepo();
|
||||
const headCommit = git(repo, 'rev-parse', 'HEAD');
|
||||
|
||||
// Seed legacy global config.
|
||||
await engine.setConfig('sync.last_commit', headCommit);
|
||||
await engine.setConfig('sync.repo_path', repo);
|
||||
|
||||
// No sourceId → legacy path. Should not throw, should still record sync.last_run.
|
||||
const result = await performSync(engine, {
|
||||
repoPath: repo,
|
||||
noPull: true,
|
||||
noEmbed: true,
|
||||
});
|
||||
expect(result.status).toBe('up_to_date');
|
||||
|
||||
const lastRun = await engine.getConfig('sync.last_run');
|
||||
expect(lastRun).not.toBeNull();
|
||||
expect(Date.now() - new Date(lastRun!).getTime()).toBeLessThan(10_000);
|
||||
});
|
||||
});
|
||||
171
test/sync-walk-dispatch.test.ts
Normal file
171
test/sync-walk-dispatch.test.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — CR-7 (MANDATORY) — Patch #3 sync-walk-dispatch.
|
||||
*
|
||||
* Without this patch `gbrain sync --repo X` (no `--source` flag) silently
|
||||
* mis-dispatches every file to source `default` (or undefined → legacy global
|
||||
* config path), because resolveSourceId skips priority 5 (manifest slug-prefix)
|
||||
* when slug is undefined. Recon-verified against the fork's source-resolver.ts
|
||||
* line 117-125 comment.
|
||||
*
|
||||
* Patch #3 makes sync.ts performSync + import.ts runImport thread per-file
|
||||
* slug to `resolveBySlugPrefix(engine, slug)` so manifest priority 5 fires
|
||||
* once per file. Slug no-match falls back to `default-ambiguous` (tombstone).
|
||||
*
|
||||
* Coverage:
|
||||
* - manifestMode=true: per-file dispatch via slug-prefix → correct source
|
||||
* - cross-prefix collision: longest-prefix wins (TEN-6)
|
||||
* - slug no-match → default-ambiguous tombstone
|
||||
* - manifestMode=false: preserves explicit sourceId for all files (no
|
||||
* regression on the existing repo-wide attribution path)
|
||||
* - Allowlist gate (Patch #2) interacts cleanly: filtered files don't
|
||||
* land at all, regardless of dispatch
|
||||
*
|
||||
* Failure mode this guards: HTTP 200 from sync, gbrain logs imported
|
||||
* successfully, but pages all land in `default`. User won't notice until
|
||||
* Stage 2 graph displays wrong groupings.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { runImport } from '../src/commands/import.ts';
|
||||
import { __invalidateSlugPrefixCache } from '../src/core/source-resolver.ts';
|
||||
import { __invalidateAllowlistCache } from '../src/core/allowlist-resolver.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
const cleanupDirs: string[] = [];
|
||||
|
||||
function mkRepoFixture(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'gbrain-cr7-'));
|
||||
cleanupDirs.push(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
function writeMd(repoRoot: string, relPath: string, body = 'content'): void {
|
||||
const full = join(repoRoot, relPath);
|
||||
mkdirSync(join(full, '..'), { recursive: true });
|
||||
writeFileSync(
|
||||
full,
|
||||
`---\ntitle: ${relPath}\ntype: note\n---\n${body}\n`,
|
||||
);
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
// Mirror the v26 post-migration source taxonomy. v26 already ran via
|
||||
// initSchema (it's in MIGRATIONS), creating gstack-meta + default-ambiguous.
|
||||
// Add per-project sources so manifest priority 5 has rules to match.
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('memory-dashboard', 'memory-dashboard',
|
||||
'{"federated": true, "slug_prefix_rules": ["memory-dashboard/", "projects/triton6564685-memory-dashboard/"]}'::jsonb),
|
||||
('stock-dashboard', 'stock-dashboard',
|
||||
'{"federated": true, "slug_prefix_rules": ["stock-dashboard/", "projects/triton6564685-stock-dashboard/"]}'::jsonb)
|
||||
ON CONFLICT (id) DO UPDATE SET config = EXCLUDED.config`,
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
for (const d of cleanupDirs) {
|
||||
try { rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
__invalidateSlugPrefixCache();
|
||||
__invalidateAllowlistCache();
|
||||
});
|
||||
|
||||
describe('CR-7 — manifestMode=true dispatches per-file via slug-prefix', () => {
|
||||
test('three files in three different prefixes → three correct sources', async () => {
|
||||
const repo = mkRepoFixture();
|
||||
writeMd(repo, 'retros/2026-05-07-cr7-a.md');
|
||||
writeMd(repo, 'projects/triton6564685-stock-dashboard/checkpoints/cr7-a.md');
|
||||
writeMd(repo, 'projects/triton6564685-memory-dashboard/notes/cr7-a.md');
|
||||
|
||||
await runImport(engine, [repo, '--no-embed'], { manifestMode: true });
|
||||
|
||||
const rows = await engine.executeRaw<{ slug: string; source_id: string }>(
|
||||
`SELECT slug, source_id FROM pages WHERE slug LIKE '%cr7-a%' ORDER BY slug`,
|
||||
);
|
||||
const map = Object.fromEntries(rows.map((r) => [r.slug, r.source_id]));
|
||||
|
||||
expect(map['retros/2026-05-07-cr7-a']).toBe('gstack-meta');
|
||||
expect(map['projects/triton6564685-stock-dashboard/checkpoints/cr7-a']).toBe('stock-dashboard');
|
||||
expect(map['projects/triton6564685-memory-dashboard/notes/cr7-a']).toBe('memory-dashboard');
|
||||
});
|
||||
|
||||
test('slug-no-match → default-ambiguous (tombstone fallback)', async () => {
|
||||
const repo = mkRepoFixture();
|
||||
writeMd(repo, 'unknown-prefix/cr7-b.md');
|
||||
|
||||
await runImport(engine, [repo, '--no-embed'], { manifestMode: true });
|
||||
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'unknown-prefix/cr7-b'`,
|
||||
);
|
||||
expect(rows.length).toBe(1);
|
||||
expect(rows[0].source_id).toBe('default-ambiguous');
|
||||
});
|
||||
|
||||
test('cross-prefix collision: longest-prefix wins (TEN-6)', async () => {
|
||||
const repo = mkRepoFixture();
|
||||
// `projects/triton6564685-memory-dashboard/` (39 chars) wins over substring `retros/`
|
||||
writeMd(repo, 'projects/triton6564685-memory-dashboard/retros/cr7-c.md');
|
||||
|
||||
await runImport(engine, [repo, '--no-embed'], { manifestMode: true });
|
||||
|
||||
const rows = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'projects/triton6564685-memory-dashboard/retros/cr7-c'`,
|
||||
);
|
||||
expect(rows[0].source_id).toBe('memory-dashboard');
|
||||
});
|
||||
});
|
||||
|
||||
describe('CR-7 — manifestMode=false preserves explicit sourceId (no regression)', () => {
|
||||
test('all files land in opts.sourceId regardless of slug', async () => {
|
||||
const repo = mkRepoFixture();
|
||||
writeMd(repo, 'retros/2026-05-07-cr7-d.md');
|
||||
writeMd(repo, 'projects/triton6564685-stock-dashboard/checkpoints/cr7-d.md');
|
||||
|
||||
// No manifestMode flag → legacy attribution: explicit sourceId wins.
|
||||
await runImport(engine, [repo, '--no-embed'], { sourceId: 'memory-dashboard' });
|
||||
|
||||
const rows = await engine.executeRaw<{ slug: string; source_id: string }>(
|
||||
`SELECT slug, source_id FROM pages WHERE slug LIKE '%cr7-d%' ORDER BY slug`,
|
||||
);
|
||||
expect(rows.length).toBe(2);
|
||||
for (const r of rows) {
|
||||
expect(r.source_id).toBe('memory-dashboard');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('CR-7 — allowlist (Patch #2) interaction', () => {
|
||||
test('non-allowlisted files dont land regardless of manifest dispatch', async () => {
|
||||
const repo = mkRepoFixture();
|
||||
// Strict allowlist: only retros/* allowed.
|
||||
writeFileSync(join(repo, '.gbrain-allowlist'), 'retros/*.md\n');
|
||||
writeMd(repo, 'retros/2026-05-07-cr7-e.md');
|
||||
writeMd(repo, 'projects/triton6564685-stock-dashboard/notes/cr7-e.md');
|
||||
|
||||
await runImport(engine, [repo, '--no-embed'], { manifestMode: true });
|
||||
|
||||
const allowed = await engine.executeRaw<{ source_id: string }>(
|
||||
`SELECT source_id FROM pages WHERE slug = 'retros/2026-05-07-cr7-e'`,
|
||||
);
|
||||
expect(allowed.length).toBe(1);
|
||||
expect(allowed[0].source_id).toBe('gstack-meta');
|
||||
|
||||
const blocked = await engine.executeRaw<{ slug: string }>(
|
||||
`SELECT slug FROM pages WHERE slug = 'projects/triton6564685-stock-dashboard/notes/cr7-e'`,
|
||||
);
|
||||
expect(blocked.length).toBe(0);
|
||||
});
|
||||
});
|
||||
214
test/vanilla-rollback-safety.test.ts
Normal file
214
test/vanilla-rollback-safety.test.ts
Normal file
@@ -0,0 +1,214 @@
|
||||
/**
|
||||
* v0.18.2.fork.1 — Phase 0 backup/restore rollback safety drill (lite).
|
||||
*
|
||||
* This is the "Phase 0 outside-voice T3" assertion expressed at the SQL
|
||||
* semantics level. The throwaway-LXC + rclone-Drive-restore + age-decrypt
|
||||
* full ritual is deferred (Drive backup pipeline gets covered by the
|
||||
* separate quarterly Drill 3 per design doc); this file proves the
|
||||
* IMPORTANT SQL invariants directly:
|
||||
*
|
||||
* IF the fork has shipped + written rows with non-default source_id,
|
||||
* AND we then roll back to vanilla v0.18.2 (via Phase -1 vendored image),
|
||||
* THEN vanilla `gbrain sync` MUST NOT delete or overwrite those rows.
|
||||
*
|
||||
* Vanilla v0.18.2 code path:
|
||||
* sync.ts → importFile → importFromContent → tx.putPage(slug, {...no source_id})
|
||||
*
|
||||
* In our fork, omitting source_id falls back to schema DEFAULT 'default' —
|
||||
* the vanilla code path is byte-identical at the putPage SQL level. So we
|
||||
* simulate vanilla writes by calling engine.putPage with source_id omitted.
|
||||
*
|
||||
* We DO NOT use importFromContent here for two reasons:
|
||||
* 1. Test setup: importFromContent runs inside a transaction that calls
|
||||
* tx.getTags(slug) which uses a slug-only subquery — that fails with
|
||||
* SQL 21000 "more than one row returned by a subquery" on multi-source
|
||||
* same-slug data. This is itself part of the safety property: vanilla
|
||||
* cannot successfully re-import a multi-source slug, which means it
|
||||
* can't accidentally write competing data either. The failure is
|
||||
* transaction-local, original rows are preserved by ROLLBACK.
|
||||
* 2. Test purity: we want to isolate the SQL-level invariant, not the
|
||||
* tag-reconciliation interaction.
|
||||
*
|
||||
* Key SQL property: composite UNIQUE (source_id, slug) means an INSERT at
|
||||
* ('default', slug) does not collide with an existing ('memory-dashboard',
|
||||
* slug) row. Vanilla writes land at default; non-default rows persist.
|
||||
*
|
||||
* Acceptable side-effect: parallel rows in 'default' and the original
|
||||
* source (cleanable post-rollback). The IRON property is no data loss
|
||||
* for the original non-default content.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
||||
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
||||
import { importFromContent } from '../src/core/import-file.ts';
|
||||
|
||||
let engine: PGLiteEngine;
|
||||
|
||||
beforeAll(async () => {
|
||||
engine = new PGLiteEngine();
|
||||
await engine.connect({ type: 'pglite' } as never);
|
||||
await engine.initSchema();
|
||||
|
||||
await engine.executeRaw(
|
||||
`INSERT INTO sources (id, name, config) VALUES
|
||||
('memory-dashboard', 'memory-dashboard', '{"federated": true}'::jsonb),
|
||||
('stock-dashboard', 'stock-dashboard', '{"federated": true}'::jsonb)
|
||||
ON CONFLICT (id) DO NOTHING`,
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await engine.disconnect();
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
await engine.executeRaw(`DELETE FROM pages WHERE slug LIKE 'rollback-drill/%'`);
|
||||
});
|
||||
|
||||
describe('Vanilla rollback safety — IRON: non-default source_id rows are preserved', () => {
|
||||
test('Vanilla putPage at default does not delete existing non-default row', async () => {
|
||||
// Step 1: simulate post-fork-deploy state — fork wrote a row at memory-dashboard.
|
||||
await engine.putPage('rollback-drill/architecture', {
|
||||
type: 'note',
|
||||
title: 'Architecture (fork-written)',
|
||||
compiled_truth: 'Original fork content under memory-dashboard.',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
|
||||
// Step 2: simulate vanilla v0.18.2 putPage without source_id
|
||||
// (= vanilla sync.ts → importFromContent → tx.putPage path's SQL effect).
|
||||
// Schema DEFAULT 'default' applies. ON CONFLICT (source_id='default', slug)
|
||||
// does NOT match existing ('memory-dashboard', slug), so this INSERTs a
|
||||
// NEW row, leaving the memory-dashboard row untouched.
|
||||
await engine.putPage('rollback-drill/architecture', {
|
||||
type: 'note',
|
||||
title: 'Architecture (vanilla re-import)',
|
||||
compiled_truth: 'Vanilla sync re-imported content lands here.',
|
||||
});
|
||||
|
||||
// Step 3: assert both rows coexist; original fork content unchanged.
|
||||
const rows = await engine.executeRaw<{ source_id: string; title: string; compiled_truth: string }>(
|
||||
`SELECT source_id, title, compiled_truth FROM pages
|
||||
WHERE slug = 'rollback-drill/architecture'
|
||||
ORDER BY source_id`,
|
||||
);
|
||||
expect(rows.length).toBe(2);
|
||||
|
||||
const defaultRow = rows.find(r => r.source_id === 'default');
|
||||
const mdRow = rows.find(r => r.source_id === 'memory-dashboard');
|
||||
expect(defaultRow).toBeDefined();
|
||||
expect(mdRow).toBeDefined();
|
||||
|
||||
// IRON RULE: original non-default row content unchanged.
|
||||
expect(mdRow!.title).toBe('Architecture (fork-written)');
|
||||
expect(mdRow!.compiled_truth).toBe('Original fork content under memory-dashboard.');
|
||||
|
||||
// Side-effect: vanilla wrote its content into default. Acceptable.
|
||||
expect(defaultRow!.title).toBe('Architecture (vanilla re-import)');
|
||||
});
|
||||
|
||||
test('Vanilla writes do not corrupt cross-source slug isolation across multiple sources', async () => {
|
||||
await engine.putPage('rollback-drill/notes', {
|
||||
type: 'note',
|
||||
title: 'Notes in MD',
|
||||
compiled_truth: 'memory-dashboard content',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
await engine.putPage('rollback-drill/notes', {
|
||||
type: 'note',
|
||||
title: 'Notes in SD',
|
||||
compiled_truth: 'stock-dashboard content',
|
||||
source_id: 'stock-dashboard',
|
||||
});
|
||||
|
||||
await engine.putPage('rollback-drill/notes', {
|
||||
type: 'note',
|
||||
title: 'Notes (vanilla)',
|
||||
compiled_truth: 'Vanilla content lands at default.',
|
||||
});
|
||||
|
||||
const rows = await engine.executeRaw<{ source_id: string; title: string }>(
|
||||
`SELECT source_id, title FROM pages
|
||||
WHERE slug = 'rollback-drill/notes'
|
||||
ORDER BY source_id`,
|
||||
);
|
||||
expect(rows.length).toBe(3);
|
||||
expect(rows.find(r => r.source_id === 'memory-dashboard')!.title).toBe('Notes in MD');
|
||||
expect(rows.find(r => r.source_id === 'stock-dashboard')!.title).toBe('Notes in SD');
|
||||
expect(rows.find(r => r.source_id === 'default')!.title).toBe('Notes (vanilla)');
|
||||
});
|
||||
|
||||
test('IRON RULE: composite UNIQUE (source_id, slug) constraint installed (the schema backstop)', async () => {
|
||||
// Belt-and-suspenders: confirm the schema constraint backing the safety
|
||||
// property. If composite UNIQUE were ever loosened back to plain
|
||||
// UNIQUE(slug), vanilla sync's UPSERT would clobber non-default rows.
|
||||
const rows = await engine.executeRaw<{ conname: string; constraint_def: string }>(
|
||||
`SELECT conname, pg_get_constraintdef(oid) AS constraint_def
|
||||
FROM pg_constraint
|
||||
WHERE conrelid = 'pages'::regclass
|
||||
AND contype = 'u'`,
|
||||
);
|
||||
const composite = rows.find(r => r.conname === 'pages_source_slug_key');
|
||||
expect(composite).toBeDefined();
|
||||
expect(composite!.constraint_def).toContain('source_id');
|
||||
expect(composite!.constraint_def).toContain('slug');
|
||||
|
||||
// No competing global UNIQUE(slug) should remain post-v17.
|
||||
const globalUniq = rows.filter(
|
||||
r => /\(\s*slug\s*\)/.test(r.constraint_def) && !r.constraint_def.includes('source_id'),
|
||||
);
|
||||
expect(globalUniq.length).toBe(0);
|
||||
});
|
||||
|
||||
test('Additional safety surface: vanilla full-flow re-import on multi-source slug bails out (transaction rollback)', async () => {
|
||||
// Document a SECONDARY safety property exposed during this drill:
|
||||
// vanilla's importFromContent → tx.getTags(slug) uses a slug-only
|
||||
// subquery. On a multi-source same-slug brain, that subquery returns
|
||||
// multiple page_ids → SQL 21000 → transaction rollback. Net effect:
|
||||
// vanilla cannot write through importFromContent on these slugs, so
|
||||
// even if the operator tries to sync after rollback, multi-source rows
|
||||
// are physically prevented from being touched.
|
||||
//
|
||||
// This is GOOD news for safety, BAD news for ergonomics — vanilla
|
||||
// operator must either (a) accept the slug is "frozen" until
|
||||
// forking again, OR (b) manually remove cross-source data first.
|
||||
await engine.putPage('rollback-drill/blocked', {
|
||||
type: 'note',
|
||||
title: 'In MD',
|
||||
compiled_truth: 'fork-written',
|
||||
source_id: 'memory-dashboard',
|
||||
});
|
||||
// Add a tag so getTags has data to attempt to read.
|
||||
await engine.addTag('rollback-drill/blocked', 'docs', 'memory-dashboard');
|
||||
|
||||
let threw = false;
|
||||
try {
|
||||
await importFromContent(
|
||||
engine,
|
||||
'rollback-drill/blocked',
|
||||
`---
|
||||
title: Vanilla attempt
|
||||
type: note
|
||||
---
|
||||
Should fail in tag reconciliation.
|
||||
`,
|
||||
{ noEmbed: true /* no sourceId — vanilla code path */ },
|
||||
);
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
// PGLite error wrapper or driver string — match the SQL state.
|
||||
expect(msg.toLowerCase()).toContain('subquery');
|
||||
}
|
||||
expect(threw).toBe(true);
|
||||
|
||||
// Original row intact (transaction rolled back).
|
||||
const intact = await engine.executeRaw<{ title: string; compiled_truth: string }>(
|
||||
`SELECT title, compiled_truth FROM pages
|
||||
WHERE source_id = 'memory-dashboard' AND slug = 'rollback-drill/blocked'`,
|
||||
);
|
||||
expect(intact.length).toBe(1);
|
||||
expect(intact[0].title).toBe('In MD');
|
||||
expect(intact[0].compiled_truth).toBe('fork-written');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user