Files
gbrain/scripts/build-llms.ts
Garry Tan 7f156c8873 feat: v0.15.0 llms.txt + llms-full.txt + AGENTS.md (#294)
* feat: llms.txt + llms-full.txt + AGENTS.md (v0.15.0)

Ship three new public artifacts at the repo root so agents that aren't
Claude Code can discover GBrain documentation cleanly:

- AGENTS.md — ~45-line install + operating protocol for non-Claude agents
  (Codex, Cursor, OpenClaw, Aider). Covers install, read order, trust
  boundary, config/debug/migration pointers, fork regeneration. Uses
  relative links so it survives fork/rename.
- llms.txt — llmstxt.org-spec index (H1 + blockquote + Core entry points /
  Configuration / Debugging / Migrations / Philosophy / Optional H2s).
- llms-full.txt — same index with core docs inlined for single-fetch
  ingestion. ~225KB, well under the 600KB FULL_SIZE_BUDGET.

Generator-driven via scripts/build-llms.ts + scripts/llms-config.ts.
LLMS_REPO_BASE env var makes it fork-friendly. bun run build:llms
regenerates both outputs deterministically.

test/build-llms.test.ts has 7 cases: paths resolve on disk, generator
idempotent, llms.txt spec shape, checked-in files match generator output
(drift guard), content contract (RESOLVER / AGENTS / INSTALL referenced),
AGENTS mirrors README + INSTALL_FOR_AGENTS install path, llms-full.txt
under size budget.

Leverage point per Codex review: README.md + INSTALL_FOR_AGENTS.md
install prompts now tell agents to fetch AGENTS.md first. Without this,
the new files were invisible.

Drive-by fix: INSTALL_FOR_AGENTS.md:136 had `git pull origin main` while
the repo's default branch is master (origin/HEAD -> master). Corrected.

Plan + reviews: /plan-eng-review CLEARED, /codex adversarial review
found 15 issues — 7 folded in directly, 3 user tension decisions, 5
stayed as NOT-in-scope with reasoning.

Version bumps to 0.15.0 (new public-artifact feature surface per Step 12
of /ship feature-signal heuristic).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* chore: normalize VERSION to 3-digit to match master

master uses 3-digit semver (0.14.2); my earlier /ship bumped VERSION to
the 4-digit gstack format (0.15.0.0). Revert to 0.15.0 to match
package.json (already 3-digit) and master's convention.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 11:51:32 -07:00

194 lines
5.5 KiB
TypeScript

#!/usr/bin/env bun
/**
* build-llms — generate llms.txt + llms-full.txt from scripts/llms-config.ts.
*
* Run: `bun run build:llms` (or `bun run scripts/build-llms.ts`).
*
* Outputs:
* - llms.txt — llmstxt.org-spec index (H1 / blockquote / H2 sections).
* - llms-full.txt — concatenated full content of non-optional entries.
*
* Deterministic: no timestamps, sorted within categories by config order.
* Warns (does not fail) if llms-full.txt exceeds FULL_SIZE_BUDGET. CI catches
* drift via test/build-llms.test.ts.
*
* Fork override: set LLMS_REPO_BASE to regenerate with a different URL base.
*/
import { existsSync, readFileSync, statSync, writeFileSync } from "node:fs";
import { join, dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
import {
FULL_SIZE_BUDGET,
INLINE_TIPS,
PROJECT,
SECTIONS,
type DocEntry,
type DocSection,
} from "./llms-config";
const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
function urlFor(entry: DocEntry): string {
return `${PROJECT.rawBaseUrl}/${entry.path}`;
}
function isDirectoryPath(path: string): boolean {
return path.endsWith("/");
}
function renderLlmsTxt(): string {
const lines: string[] = [];
lines.push(`# ${PROJECT.name}`);
lines.push("");
lines.push(`> ${PROJECT.summary}`);
lines.push("");
lines.push(`Repo: ${PROJECT.repoUrl}`);
lines.push("");
for (const section of SECTIONS) {
lines.push(`## ${section.heading}`);
lines.push("");
for (const entry of section.entries) {
lines.push(
`- [${entry.title}](${urlFor(entry)}): ${entry.description}`,
);
}
lines.push("");
}
lines.push("## Operational tips");
lines.push("");
for (const tip of INLINE_TIPS) {
lines.push(`- ${tip}`);
}
lines.push("");
return lines.join("\n");
}
function renderLlmsFullTxt(): { content: string; sizes: Array<{ path: string; bytes: number }> } {
const lines: string[] = [];
const sizes: Array<{ path: string; bytes: number }> = [];
lines.push(`# ${PROJECT.name} — Full Context`);
lines.push("");
lines.push(`> ${PROJECT.summary}`);
lines.push("");
lines.push(
`This file concatenates core GBrain documentation for single-fetch ingestion.`,
);
lines.push(
`For the link-only index, see \`llms.txt\`. Source of truth: ${PROJECT.repoUrl}.`,
);
lines.push("");
for (const section of SECTIONS) {
if (section.optional) continue;
lines.push(`# ${section.heading}`);
lines.push("");
for (const entry of section.entries) {
if (entry.includeInFull === false) continue;
if (isDirectoryPath(entry.path)) continue;
const absPath = join(repoRoot, entry.path);
if (!existsSync(absPath)) {
// build-llms won't silently skip — surface the problem. Test case 1
// catches this too, but fail fast for manual runs.
throw new Error(
`llms-config references missing file: ${entry.path}`,
);
}
const body = readFileSync(absPath, "utf8");
const bytes = Buffer.byteLength(body, "utf8");
sizes.push({ path: entry.path, bytes });
lines.push(`## ${entry.path}`);
lines.push("");
lines.push(`Source: ${urlFor(entry)}`);
lines.push("");
lines.push(body.trimEnd());
lines.push("");
lines.push("---");
lines.push("");
}
}
return { content: lines.join("\n"), sizes };
}
function validateConfig(): void {
for (const section of SECTIONS) {
for (const entry of section.entries) {
const absPath = join(repoRoot, entry.path);
if (!existsSync(absPath)) {
throw new Error(
`llms-config references missing path: ${entry.path}`,
);
}
const st = statSync(absPath);
if (isDirectoryPath(entry.path) && !st.isDirectory()) {
throw new Error(
`llms-config path ends with '/' but is a file: ${entry.path}`,
);
}
if (!isDirectoryPath(entry.path) && !st.isFile()) {
throw new Error(
`llms-config path is a directory but missing trailing '/': ${entry.path}`,
);
}
}
}
}
export function buildLlmsFiles(): {
llmsTxt: string;
llmsFullTxt: string;
sizes: Array<{ path: string; bytes: number }>;
} {
validateConfig();
const llmsTxt = renderLlmsTxt();
const { content: llmsFullTxt, sizes } = renderLlmsFullTxt();
return { llmsTxt, llmsFullTxt, sizes };
}
function main(): void {
const { llmsTxt, llmsFullTxt, sizes } = buildLlmsFiles();
const llmsPath = join(repoRoot, "llms.txt");
const llmsFullPath = join(repoRoot, "llms-full.txt");
writeFileSync(llmsPath, llmsTxt);
writeFileSync(llmsFullPath, llmsFullTxt);
const fullBytes = Buffer.byteLength(llmsFullTxt, "utf8");
console.log(`wrote ${llmsPath} (${Buffer.byteLength(llmsTxt, "utf8")} bytes)`);
console.log(`wrote ${llmsFullPath} (${fullBytes} bytes)`);
if (fullBytes > FULL_SIZE_BUDGET) {
console.warn("");
console.warn(
`WARN: llms-full.txt (${fullBytes} bytes) exceeds FULL_SIZE_BUDGET (${FULL_SIZE_BUDGET} bytes).`,
);
console.warn(
"Add `includeInFull: false` to the biggest entries in scripts/llms-config.ts:",
);
const sorted = [...sizes].sort((a, b) => b.bytes - a.bytes);
for (const entry of sorted.slice(0, 5)) {
console.warn(` ${entry.bytes} bytes ${entry.path}`);
}
}
}
const isMainModule = fileURLToPath(import.meta.url) === process.argv[1];
if (isMainModule) {
try {
main();
} catch (err) {
console.error(err instanceof Error ? err.message : err);
process.exit(1);
}
}