* fix: security hardening — search DoS, slug hijack, symlink traversal, content bombs, stdin guard 4 security vulnerabilities closed: - Search limit clamped to 100 (MAX_SEARCH_LIMIT) with statement_timeout 8s - Frontmatter slug authority enforced (path-derived, mismatch rejected) - Symlink traversal blocked (lstatSync in walker + importFromFile) - Content size guard on importFromContent (Buffer.byteLength, 5MB) - Stdin size guard in parseOpArgs (5MB cap) Search pagination added (--offset param on search + query operations). Clamp warning emitted when limit is capped. Co-Authored-By: garagon <garagon@users.noreply.github.com> * fix: PGLite concurrent access lock — prevent Aborted() crash File-based advisory lock using atomic mkdir with PID tracking and 5-minute stale detection. Clear error messages show which process holds the lock and how to recover. Co-Authored-By: danbr <danbr@users.noreply.github.com> * fix: 12 data integrity fixes + stale embedding prevention CTE searchKeyword rewrite (SQL-level LIMIT, not JS splice). Write validation on addLink/addTag/addTimelineEntry/putRawData/createVersion. Health metrics now measure real problems (stale_pages, orphan_pages, dead_links). Orphan chunk cleanup on empty pages. Embedding error logging. contentHash now covers all PageInput fields. Stale embedding NULL'd when chunk_text changes (prevents wrong vector on new text). hybridSearch stops double-embedding query. MCP param validation. type/exclude_slugs search filters now work. pgcrypto extension for Postgres <13. Co-Authored-By: win4r <win4r@users.noreply.github.com> * perf: 30x embedAll speedup + O(n²) fix + ask alias Sliding worker pool (concurrency 20, tunable via GBRAIN_EMBED_CONCURRENCY). O(n²) chunk lookup in embedPage replaced with Map. gbrain ask alias for query (CLI-only, not in MCP tools-json). .idea added to .gitignore. Co-Authored-By: stephenhungg <stephenhungg@users.noreply.github.com> Co-Authored-By: sharziki <sharziki@users.noreply.github.com> Co-Authored-By: hnshah <hnshah@users.noreply.github.com> Co-Authored-By: doguabaris <doguabaris@users.noreply.github.com> * chore: bump version and changelog (v0.9.1) Community fix wave: 10 PRs, 7 contributors. 4 security fixes, PGLite crash fix, 12 data integrity fixes, 30x embed speedup, search pagination, ask alias. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: garagon <garagon@users.noreply.github.com> Co-authored-by: danbr <danbr@users.noreply.github.com> Co-authored-by: win4r <win4r@users.noreply.github.com> Co-authored-by: stephenhungg <stephenhungg@users.noreply.github.com> Co-authored-by: sharziki <sharziki@users.noreply.github.com> Co-authored-by: hnshah <hnshah@users.noreply.github.com> Co-authored-by: doguabaris <doguabaris@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
90 lines
4.0 KiB
TypeScript
90 lines
4.0 KiB
TypeScript
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
|
import { mkdirSync, writeFileSync, symlinkSync, rmSync, mkdtempSync } from 'fs';
|
|
import { tmpdir } from 'os';
|
|
import { join } from 'path';
|
|
import { collectMarkdownFiles } from '../src/commands/import.ts';
|
|
|
|
// These tests exercise the filesystem walker that feeds `gbrain import`.
|
|
// They target L002 (report/findings.md): a malicious symlink inside a shared
|
|
// brain directory must not cause the walker to read files outside the brain
|
|
// root. See src/commands/import.ts:collectMarkdownFiles.
|
|
|
|
describe('collectMarkdownFiles — symlink containment', () => {
|
|
let root: string;
|
|
let secretDir: string;
|
|
|
|
beforeEach(() => {
|
|
// Fresh directories per test so symlinks can't cross-contaminate runs.
|
|
root = mkdtempSync(join(tmpdir(), 'gbrain-walker-root-'));
|
|
secretDir = mkdtempSync(join(tmpdir(), 'gbrain-walker-secret-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
rmSync(root, { recursive: true, force: true });
|
|
rmSync(secretDir, { recursive: true, force: true });
|
|
});
|
|
|
|
test('includes real markdown files inside the root', () => {
|
|
writeFileSync(join(root, 'legit.md'), '# legit\n');
|
|
mkdirSync(join(root, 'notes'));
|
|
writeFileSync(join(root, 'notes', 'other.md'), '# other\n');
|
|
|
|
const files = collectMarkdownFiles(root);
|
|
expect(files).toContain(join(root, 'legit.md'));
|
|
expect(files).toContain(join(root, 'notes', 'other.md'));
|
|
});
|
|
|
|
test('skips a symlink file pointing outside the brain root', () => {
|
|
// Plant a real secret outside the brain root
|
|
const secretFile = join(secretDir, 'secret.md');
|
|
writeFileSync(secretFile, '# secret — must not be ingested\n');
|
|
|
|
// Inside the brain, create a symlink that points at the secret.
|
|
// Before the fix, statSync followed the link and reported it as
|
|
// a regular file, so it ended up in the walker's output and got
|
|
// fed to importFile — chunked, embedded, and indexed in the brain.
|
|
writeFileSync(join(root, 'legit.md'), '# legit\n');
|
|
symlinkSync(secretFile, join(root, 'innocent.md'));
|
|
|
|
const files = collectMarkdownFiles(root);
|
|
expect(files).toContain(join(root, 'legit.md'));
|
|
// The symlink itself must not appear — this is the security guarantee.
|
|
expect(files).not.toContain(join(root, 'innocent.md'));
|
|
// And the canonical secret path must definitely not be in the results.
|
|
expect(files).not.toContain(secretFile);
|
|
});
|
|
|
|
test('does not descend into a symlinked directory', () => {
|
|
// Create a directory outside the root with a markdown file inside it.
|
|
const outsideSub = join(secretDir, 'sub');
|
|
mkdirSync(outsideSub);
|
|
writeFileSync(join(outsideSub, 'external.md'), '# external\n');
|
|
|
|
// Plant a symlink inside the brain pointing at that directory.
|
|
// Before the fix, walk() would follow it and emit external.md.
|
|
// With lstatSync, stat.isSymbolicLink() is true and we refuse
|
|
// to descend — this also blocks circular-symlink DoS as a side effect.
|
|
writeFileSync(join(root, 'legit.md'), '# legit\n');
|
|
symlinkSync(outsideSub, join(root, 'linked-notes'));
|
|
|
|
const files = collectMarkdownFiles(root);
|
|
expect(files).toContain(join(root, 'legit.md'));
|
|
expect(files).not.toContain(join(root, 'linked-notes', 'external.md'));
|
|
expect(files).not.toContain(join(outsideSub, 'external.md'));
|
|
});
|
|
|
|
test('skips broken symlinks without crashing', () => {
|
|
// A dangling symlink — the target never existed. Pre-existing behavior
|
|
// (PR #26 / PR #38) handled this via try/catch around statSync. The
|
|
// L002 fix must not regress it: lstatSync succeeds on a dangling link
|
|
// (it reports on the link itself, not the target), so we reach the
|
|
// isSymbolicLink() branch and skip cleanly, no throw.
|
|
writeFileSync(join(root, 'legit.md'), '# legit\n');
|
|
symlinkSync('/nonexistent/path/to/nowhere', join(root, 'dangling.md'));
|
|
|
|
const files = collectMarkdownFiles(root);
|
|
expect(files).toContain(join(root, 'legit.md'));
|
|
expect(files).not.toContain(join(root, 'dangling.md'));
|
|
});
|
|
});
|