* fix(subagent): bind Anthropic SDK messages.create() correctly The makeSubagentHandler was casting `new Anthropic()` directly to MessagesClient, but MessagesClient.create() maps to sdk.messages.create(), not sdk.create(). Every subagent job immediately died with: client.create is not a function Fix: wrap the SDK instance so .create() delegates to .messages.create() with proper `this` binding via .bind(sdk.messages). Discovered on first production run of gbrain agent against Supabase. Co-Authored-By: Wintermute <wintermute@openclaw.ai> * chore(ci): add typescript typecheck to test pipeline + clean up baseline errors Root cause infra gap that let the v0.16.0 subagent bug ship: CI ran only `bun test`, which transpiles types without checking them. Type errors only surfaced at runtime, in production. Changes: - Add `typescript` devDep and a `typecheck` npm script (`tsc --noEmit`). - Chain `bun run typecheck` into `bun run test` so developers get the same pipeline locally that CI runs. - Flip `.github/workflows/test.yml` to invoke `bun run test` (the npm script, including typecheck) instead of `bun test` (runner only). - Clean up 100+ pre-existing type errors across 30+ files so the first run of `tsc --noEmit` is green. Root causes were: - `databaseUrl` → `database_url` rename drift in test fixtures (9 files) - `PageType` union missing `'meeting'` / `'note'` entries that are already used in both src and tests (link-extraction.ts comments acknowledged the gap) - `GBrainConfig.storage` field never declared despite being read in files.ts and operations.ts - `ErrorCode` union missing `'permission_denied'` - `OrchestratorOpts` shape changed; test callers not updated - Dead-code comparisons in migration orchestrators against narrowed status types - postgres.js `Row`-callback type drift on several `.map()` calls - Buffer-as-BodyInit assignment in supabase.ts (real but non-fatal runtime bug; Uint8Array slice works and is type-correct) - Various `as X` single-step casts that now need `as unknown as X` per TS's stricter structural-conversion rules - Bump `beforeAll` hook timeout to 30s on four PGLite-heavy tests that were flaky under parallel test execution: wait-for-completion, extract-fs, e2e/search-quality, e2e/graph-quality. All pass in isolation; timeouts only happened when dozens of PGLite instances init'd simultaneously. The new CI pipeline now fails on any type error across src/ or test/, giving us the compile-time regression guard the subagent fix depends on. * fix(subagent): bind Anthropic SDK messages.create() correctly Shipped bug: v0.16.0 cast `new Anthropic()` to `MessagesClient`, but `.create()` lives at `sdk.messages.create`, not on the top-level client. Every subagent job in production died on first LLM call with `client.create is not a function`. Discovered on the first `gbrain agent run` against Supabase. Fix: assign `sdk.messages` directly to the `MessagesClient` slot. `sdk.messages` IS the object with a callable `.create()`; the original bug was picking the wrong entry point on the SDK. No helper, no wrapper, no `.bind()` — JS method-call semantics preserve `this` at the call site because `subagent.ts:336` invokes `client.create(...)` with `client === sdk.messages`. The one-line assignment also typechecks cleanly against the existing `MessagesClient` interface (SDK's first `create` overload: `(MessageCreateParamsNonStreaming, Core.RequestOptions?) => APIPromise<Message>` is assignable structurally). This gives us compile-time regression protection: anyone reverting to `new Anthropic()` would fail tsc because `Anthropic` has no top-level `.create`. (The companion chore commit puts `tsc --noEmit` in CI so this guard is enforced.) Also adds a `makeAnthropic?: () => Anthropic` dep-injection seam so the factory default construction branch is testable without real API calls. Regression test drives one handler turn through a fake SDK, asserting `sdk.messages.create` is actually called. If someone later reverts to `new Anthropic()`, both guards fire: tsc fails AND the test fails. Co-Authored-By: Wintermute <wintermute@garrytan.com> * chore(tests): add bunfig.toml + 60s hook timeouts to stabilize PGLite-heavy suites After turning on tsc in CI (previous commit), running the full `bun run test` suite in one shot triggered flaky `beforeEach/afterEach hook timed out` failures on 8+ test files. Every failure traced to PGLite WASM init contention when many test files spin up fresh PGLite instances in parallel; each one alone passes in isolation. - `bunfig.toml` sets the global test hook timeout to 60s (default is 5s), covering every test file without per-file edits. - Individual `beforeAll(fn, 60_000)` / `beforeEach(fn, 15_000)` calls on the 8 tests that flaked most stay in place as explicit safety nets so a future bunfig config change doesn't silently re-introduce the flake. Result: 1997 pass, 0 fail on `bun run test` (117 tests added since the prior baseline by picking up typecheck-gated passes). No infrastructure flake tolerated in CI. * chore: bump version and changelog (v0.16.3) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Wintermute <wintermute@garrytan.com> Co-authored-by: Wintermute <wintermute@openclaw.ai> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
168 lines
6.6 KiB
TypeScript
168 lines
6.6 KiB
TypeScript
/**
|
|
* transcript renderer tests. Uses PGLite in-memory to round-trip messages +
|
|
* tool executions through the actual schema so the loader path is exercised.
|
|
*/
|
|
|
|
import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test';
|
|
import { PGLiteEngine } from '../src/core/pglite-engine.ts';
|
|
import { MinionQueue } from '../src/core/minions/queue.ts';
|
|
import {
|
|
loadTranscriptRows,
|
|
renderTranscript,
|
|
} from '../src/core/minions/transcript.ts';
|
|
import type { ContentBlock } from '../src/core/minions/types.ts';
|
|
|
|
let engine: PGLiteEngine;
|
|
let queue: MinionQueue;
|
|
let jobId: number;
|
|
|
|
beforeAll(async () => {
|
|
engine = new PGLiteEngine();
|
|
await engine.connect({ database_url: '' });
|
|
await engine.initSchema();
|
|
queue = new MinionQueue(engine);
|
|
});
|
|
|
|
afterAll(async () => {
|
|
await engine.disconnect();
|
|
});
|
|
|
|
beforeEach(async () => {
|
|
await engine.executeRaw('DELETE FROM subagent_messages');
|
|
await engine.executeRaw('DELETE FROM subagent_tool_executions');
|
|
await engine.executeRaw('DELETE FROM minion_jobs');
|
|
const j = await queue.add(
|
|
'subagent',
|
|
{ prompt: 'hi' },
|
|
{},
|
|
{ allowProtectedSubmit: true },
|
|
);
|
|
jobId = j.id;
|
|
});
|
|
|
|
async function insertMessage(
|
|
idx: number,
|
|
role: 'user' | 'assistant',
|
|
blocks: ContentBlock[],
|
|
tokens: { in?: number; out?: number; cache_read?: number; cache_create?: number } = {},
|
|
model = 'claude-sonnet-4-6',
|
|
) {
|
|
await engine.executeRaw(
|
|
`INSERT INTO subagent_messages (job_id, message_idx, role, content_blocks, tokens_in, tokens_out, tokens_cache_read, tokens_cache_create, model)
|
|
VALUES ($1, $2, $3, $4::jsonb, $5, $6, $7, $8, $9)`,
|
|
[jobId, idx, role, JSON.stringify(blocks), tokens.in ?? null, tokens.out ?? null, tokens.cache_read ?? null, tokens.cache_create ?? null, model],
|
|
);
|
|
}
|
|
|
|
async function insertTool(
|
|
idx: number,
|
|
toolUseId: string,
|
|
toolName: string,
|
|
input: unknown,
|
|
status: 'pending' | 'complete' | 'failed',
|
|
output: unknown = null,
|
|
error: string | null = null,
|
|
) {
|
|
await engine.executeRaw(
|
|
`INSERT INTO subagent_tool_executions (job_id, message_idx, tool_use_id, tool_name, input, status, output, error)
|
|
VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7::jsonb, $8)`,
|
|
[jobId, idx, toolUseId, toolName, JSON.stringify(input), status, output == null ? null : JSON.stringify(output), error],
|
|
);
|
|
}
|
|
|
|
describe('loadTranscriptRows', () => {
|
|
test('empty job returns empty arrays', async () => {
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
expect(messages).toEqual([]);
|
|
expect(tools).toEqual([]);
|
|
});
|
|
|
|
test('returns messages in message_idx order', async () => {
|
|
await insertMessage(1, 'assistant', [{ type: 'text', text: 'second' }]);
|
|
await insertMessage(0, 'user', [{ type: 'text', text: 'first' }]);
|
|
const { messages } = await loadTranscriptRows(engine, jobId);
|
|
expect(messages.map(m => m.message_idx)).toEqual([0, 1]);
|
|
});
|
|
|
|
test('parses content_blocks from JSONB', async () => {
|
|
const block: ContentBlock = { type: 'tool_use', id: 'tu_1', name: 'brain_search', input: { q: 'x' } };
|
|
await insertMessage(0, 'assistant', [block]);
|
|
const { messages } = await loadTranscriptRows(engine, jobId);
|
|
expect(messages[0]!.content_blocks[0]!.type).toBe('tool_use');
|
|
});
|
|
});
|
|
|
|
describe('renderTranscript', () => {
|
|
test('empty messages produce a "no messages" placeholder', () => {
|
|
const md = renderTranscript([], []);
|
|
expect(md).toContain('# Subagent transcript');
|
|
expect(md).toContain('_(no messages)_');
|
|
});
|
|
|
|
test('renders text content under role headers', async () => {
|
|
await insertMessage(0, 'user', [{ type: 'text', text: 'hello' }]);
|
|
await insertMessage(1, 'assistant', [{ type: 'text', text: 'hi back' }], { in: 5, out: 3 });
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
const md = renderTranscript(messages, tools);
|
|
expect(md).toContain('## Message 0 — user');
|
|
expect(md).toContain('hello');
|
|
expect(md).toContain('## Message 1 — assistant');
|
|
expect(md).toContain('hi back');
|
|
expect(md).toContain('tokens:');
|
|
expect(md).toContain('in=5');
|
|
});
|
|
|
|
test('renders tool_use with matching execution row', async () => {
|
|
await insertMessage(0, 'assistant', [
|
|
{ type: 'tool_use', id: 'tu_42', name: 'brain_get_page', input: { slug: 'foo' } },
|
|
]);
|
|
await insertTool(0, 'tu_42', 'brain_get_page', { slug: 'foo' }, 'complete', { title: 'Foo' });
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
const md = renderTranscript(messages, tools);
|
|
expect(md).toContain('**tool_use** `brain_get_page`');
|
|
expect(md).toContain('status: **complete**');
|
|
expect(md).toContain('"title": "Foo"');
|
|
});
|
|
|
|
test('renders tool_use with failed execution row shows error', async () => {
|
|
await insertMessage(0, 'assistant', [
|
|
{ type: 'tool_use', id: 'tu_43', name: 'brain_put_page', input: { slug: 'bad' } },
|
|
]);
|
|
await insertTool(0, 'tu_43', 'brain_put_page', { slug: 'bad' }, 'failed', null, 'permission_denied');
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
const md = renderTranscript(messages, tools);
|
|
expect(md).toContain('status: **failed**');
|
|
expect(md).toContain('permission_denied');
|
|
});
|
|
|
|
test('pending tool execution is shown as pending', async () => {
|
|
await insertMessage(0, 'assistant', [
|
|
{ type: 'tool_use', id: 'tu_44', name: 'brain_search', input: { q: 'x' } },
|
|
]);
|
|
await insertTool(0, 'tu_44', 'brain_search', { q: 'x' }, 'pending');
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
const md = renderTranscript(messages, tools);
|
|
expect(md).toContain('pending (no resolution recorded yet)');
|
|
});
|
|
|
|
test('truncates huge tool outputs per maxOutputBytes', async () => {
|
|
await insertMessage(0, 'assistant', [
|
|
{ type: 'tool_use', id: 'tu_big', name: 'brain_search', input: {} },
|
|
]);
|
|
const huge = 'x'.repeat(8000);
|
|
await insertTool(0, 'tu_big', 'brain_search', {}, 'complete', { body: huge });
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
const md = renderTranscript(messages, tools, { maxOutputBytes: 1024 });
|
|
expect(md).toContain('[truncated at 1024 bytes]');
|
|
expect(md.length).toBeLessThan(huge.length);
|
|
});
|
|
|
|
test('unknown block types fall through to a JSON dump', async () => {
|
|
await insertMessage(0, 'assistant', [{ type: 'some_future_block_type', extra: 42 } as any]);
|
|
const { messages, tools } = await loadTranscriptRows(engine, jobId);
|
|
const md = renderTranscript(messages, tools);
|
|
expect(md).toContain('**some_future_block_type**');
|
|
expect(md).toContain('"extra": 42');
|
|
});
|
|
});
|