From 76556232ddf0a70fb6a095957b0b0e5472277926 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Fri, 19 Jun 2026 17:23:21 +0200 Subject: [PATCH] bung in first version of brunch subagents --- package.json | 2 +- src/.pi/extensions/README.md | 4 +- src/.pi/extensions/subagents/README.md | 251 +++++++++++ src/.pi/extensions/subagents/agents.ts | 140 ++++++ .../extensions/subagents/agents/proposer.md | 25 ++ .../extensions/subagents/agents/researcher.md | 33 ++ src/.pi/extensions/subagents/agents/scout.md | 35 ++ src/.pi/extensions/subagents/config.ts | 52 +++ src/.pi/extensions/subagents/index.ts | 170 ++++++++ src/.pi/extensions/subagents/session.ts | 253 +++++++++++ .../extensions/subagents/subagents.test.ts | 409 ++++++++++++++++++ src/app/pi-extensions.ts | 28 +- src/app/pi-subagents.ts | 40 ++ 13 files changed, 1436 insertions(+), 6 deletions(-) create mode 100644 src/.pi/extensions/subagents/README.md create mode 100644 src/.pi/extensions/subagents/agents.ts create mode 100644 src/.pi/extensions/subagents/agents/proposer.md create mode 100644 src/.pi/extensions/subagents/agents/researcher.md create mode 100644 src/.pi/extensions/subagents/agents/scout.md create mode 100644 src/.pi/extensions/subagents/config.ts create mode 100644 src/.pi/extensions/subagents/index.ts create mode 100644 src/.pi/extensions/subagents/session.ts create mode 100644 src/.pi/extensions/subagents/subagents.test.ts create mode 100644 src/app/pi-subagents.ts diff --git a/package.json b/package.json index 93493e4de..876928bd5 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "build": "tsc -p tsconfig.build.json && npm run build:info && npm run build:pi-assets && npm run build:web", "build:info": "node scripts/write-build-info.mjs", "prepack": "RELEASE=true npm run build", - "build:pi-assets": "mkdir -p dist/.pi/components/workspace-dialog dist/.pi/agents dist/.pi/skills && cp -R src/.pi/components/workspace-dialog/assets dist/.pi/components/workspace-dialog/ && cp -R src/.pi/agents/elicitor dist/.pi/agents/ && find src/.pi/agents -maxdepth 1 -name '*.md' ! -name README.md -exec cp {} dist/.pi/agents/ \\; && cp -R src/.pi/skills/strategies src/.pi/skills/lenses src/.pi/skills/methods dist/.pi/skills/", + "build:pi-assets": "mkdir -p dist/.pi/components/workspace-dialog dist/.pi/agents dist/.pi/skills && cp -R src/.pi/components/workspace-dialog/assets dist/.pi/components/workspace-dialog/ && cp -R src/.pi/agents/elicitor dist/.pi/agents/ && find src/.pi/agents -maxdepth 1 -name '*.md' ! -name README.md -exec cp {} dist/.pi/agents/ \\; && cp -R src/.pi/skills/strategies src/.pi/skills/lenses src/.pi/skills/methods dist/.pi/skills/ && mkdir -p dist/.pi/extensions/subagents/agents && cp src/.pi/extensions/subagents/agents/*.md dist/.pi/extensions/subagents/agents/ && cp src/.pi/extensions/subagents/config.json dist/.pi/extensions/subagents/", "build:web": "vite build", "seed": "tsx src/graph/seed-fixtures.ts", "db:generate": "drizzle-kit generate", diff --git a/src/.pi/extensions/README.md b/src/.pi/extensions/README.md index d8751bbce..6f8ffaac7 100644 --- a/src/.pi/extensions/README.md +++ b/src/.pi/extensions/README.md @@ -1,6 +1,6 @@ # .pi/extensions/ — Pi adapter registrars -SPEC decisions: D34-L, D35-L, D37-L, D39-L, D40-L, D52-L, D69-L, D71-L +SPEC decisions: D34-L, D35-L, D37-L, D39-L, D40-L, D44-L, D52-L, D69-L, D71-L ## Owns @@ -38,7 +38,7 @@ extensions/ ├── system-prompts/ before_agent_start prompt append + gap-driven active-tool selection; prompt composition (compose.ts) + pushed seed contexts (seed/) ├── web/ web_fetch/web_search read tools for referenced-document acquisition ├── workspace/ spec/session picker command adapter -└── subagents/ future subagent config/tool surface +└── subagents/ D44-L `subagent` tool — sealed SDK child sessions (default-off, opt-in) ``` ## Boundary rules diff --git a/src/.pi/extensions/subagents/README.md b/src/.pi/extensions/subagents/README.md new file mode 100644 index 000000000..2015fd6f2 --- /dev/null +++ b/src/.pi/extensions/subagents/README.md @@ -0,0 +1,251 @@ +# subagents extension — D44-L + +> **Status (handoff doc):** mechanism **built + verified**, but **not yet wired +> into startup** — the `subagent` tool is present-but-dead in every build until a +> launch path passes `subagents` to `createBrunchPiExtensions(...)`. This README +> is intentionally fatter than the sibling topology READMEs because the feature +> is mid-integration; once it is wired and the SPEC is reconciled, trim it back +> to the short orientation-surface convention (ownership + SPEC refs + layout). + +SPEC decisions: D44-L (subagent), D39-L (sealed profile), D40-L (registration ≠ +advertisement). Frontier: PLAN.md `subagent-adoption`. + +--- + +## TL;DR for the next agent + +1. **It works and is fully tested.** `runSubagent` runs a sealed in-process SDK + child session over a faux provider in the tests; the full gate is green + (typecheck / lint / format / 334 unit tests / build). +2. **The only thing left is the startup wiring decision.** + `loadBrunchSubagents()` in [`src/app/pi-subagents.ts`](../../../app/pi-subagents.ts) + is **never called** yet. Pick a gate (recommended: `BRUNCH_DEV`, mirroring + introspection) and pass its result as `{ subagents }` into + `createBrunchPiExtensions(...)`. See [How to wire it in](#how-to-wire-it-in). +3. **SPEC drift to reconcile.** SPEC `D44-L` / `I29-L` still describe the + **superseded subprocess** model (`pi --mode json -p --no-session …`, argv-shape + tests). The implemented path is the **SDK sealed child session**. Reconcile + the SPEC text through the normal `/ln-sync` flow. +4. **Don't reintroduce** ambient `~/.pi` discovery, the `globalThis.__pi_subagents` + bridge, or a `pi` subprocess — all three conflict with D39-L sealing and were + deliberately dropped. + +--- + +## What this is + +The D44-L `subagent` tool: a main-agent-invoked, **blocking** Pi tool that +delegates an isolated reasoning task to a sealed Pi child session and returns the +child's last assistant message as tool-result content. Starter agents are +read-only (`scout`, `researcher`) or no-tools (`proposer`) — no write/worker +agent yet. + +It is the Brunch-native realization of the community "subagents" pattern +(`amosblomqvist/pi-subagents`, the canonical pi example, etc.), but using Pi's +**SDK** instead of spawning the `pi` binary. See +[Comparison to the original](#comparison-to-the-original-amosblomqvistpi-subagents). + +## Execution model — SDK child session, not a subprocess + +Each subagent runs as an in-process SDK `AgentSession` +(`createAgentSessionServices` → `createAgentSessionFromServices`), built from +**explicit sealed services** so it inherits nothing implicit (D39-L): + +```diagram +╭──────────────── foreground agent (elicitor) ────────────────╮ +│ subagent tool.execute({ agent, task } | { tasks:[…] }) │ +│ │ semaphore(maxConcurrency) + Promise.all + AbortSignal │ +│ ▼ │ +│ runSubagent ─ resolveSubagentModel ─ planSubagentTools ──╮ │ +╰───────────────────────────────────────────────────────────│─╯ + ▼ + ╭──────── sealed SDK child AgentSession ────────╮ + │ authStorage = AuthStorage.inMemory() │ no ambient auth.json + │ settings = inMemory(BRUNCH policy) │ injected per child + │ resourceLoader= sealed: noExtensions/noSkills/ │ no ambient discovery + │ noPromptTemplates/noThemes/ │ + │ noContextFiles │ + │ systemPrompt = agent .md body (REPLACES base) │ not Pi coding base + │ modelRegistry = parent's (resolved auth) │ no model bootstrap + │ sessionManager= SessionManager.inMemory(cwd) │ nothing persisted + │ tools = explicit allowlist only │ no bash/edit/write + ╰───────────────────────────────────────────────╯ + │ session.prompt(task) + ▼ getLastAssistantText() ──▶ tool-result content +``` + +The child has no conversation context (the `task` string must be +self-contained), no `CommandExecutor`, no graph access, and no Brunch RPC. Its +last assistant message is the only thing that crosses back to the parent. + +## File map + +| File | Responsibility | +| --- | --- | +| [`agents.ts`](./agents.ts) | Markdown agent loader: tiny frontmatter parser (no YAML dep), TypeBox-validated schema (`name`, `description`, `tools`, `model`, `thinking`), `loadSubagentDefinitions(dir)` → `Map`. Fails loud on malformed/duplicate agents. | +| [`config.ts`](./config.ts) | TypeBox loader for [`config.json`](./config.json) (`version`, `maxConcurrency`; tolerates `$comment`). | +| [`session.ts`](./session.ts) | The sealed child-session runner. `resolveSubagentModel`, `planSubagentTools`, `runSubagent`. Never throws — failures return as error results. **Injectable SDK builders** (`createServices`/`createSession`) for testing. | +| [`index.ts`](./index.ts) | `registerBrunchSubagents(pi, deps)` — registers the one `subagent` tool (single `{agent,task}` or parallel `{tasks:[…]}`), `createSemaphore` for bounded concurrency, result formatting. Re-exports the public surface. | +| [`agents/*.md`](./agents) | Declarative agent definitions (see below). | +| [`config.json`](./config.json) | Externalized concurrency cap (`maxConcurrency: 4`). | +| [`subagents.test.ts`](./subagents.test.ts) | 26 tests: parsing, config, model resolution, tool planning, semaphore, registrar, and **two end-to-end faux-provider child-session runs** asserting the sealing invariants. | +| [`../../../app/pi-subagents.ts`](../../../app/pi-subagents.ts) | **App composition root.** `loadBrunchSubagents({cwd, agentDir})` assembles `BrunchSubagentsDeps` using the sealed `pi-settings` helpers. Keeps `.pi/` free of `src/app` imports (deps are injected). | + +Boundary rule: `.pi/extensions/subagents/*` may import the SDK and `../web/` +(for `web_search`/`web_fetch`), but **never** `src/app/*`. The app layer injects +the sealed primitives. + +## Agent definitions (`agents/*.md`) + +Frontmatter is the registry contract; the markdown body is the child's system +prompt (used verbatim, replacing Pi's coding base). + +```yaml +--- +name: scout # required, unique +description: … # required (shown in the tool description/catalog) +tools: read, grep, find, ls # comma-separated; omit/empty ⇒ no tools +model: default # "default" (inherit parent) or "provider/model-id" +thinking: low # low | medium | high +--- + +``` + +Starter agents (read-only / no-write): + +| agent | tools | role | +| --- | --- | --- | +| `scout` | `read, grep, find, ls` | read-only codebase recon | +| `researcher` | `web_search, web_fetch` | external web research | +| `proposer` | _(none)_ | one candidate-proposal variant per call; fan out for diversity | + +Tool resolution (`planSubagentTools`): read-only filesystem tools come from the +SDK (`createReadToolDefinition(cwd)` etc., cwd-bound, override built-ins of the +same name); web tools come from Brunch's own `../web/` factories. Write/shell +built-ins (`bash`/`edit`/`write`) are never in the pool; an unknown tool name in +frontmatter **throws** at plan time (authoring bug → fail loud). + +## How to wire it in + +The feature is registered behind an opt-in in `createBrunchPiExtensions` already: +when its options carry `subagents`, the tool is registered **and** unioned into +the opt-in active-tool channel (alongside the dev introspection tools); when +omitted it is absent (default-off). See +[`src/app/pi-extensions.ts`](../../../app/pi-extensions.ts) — search for +`options.subagents` and `BRUNCH_SUBAGENT_TOOL`. + +What is missing is a **launch path that supplies those deps**. Mirror the +introspection precedent in +[`src/app/brunch-tui.ts`](../../../app/brunch-tui.ts): + +**Step 1 — import the composition root** (top of `brunch-tui.ts`): + +```ts +import { loadBrunchSubagents } from './pi-subagents.js'; +``` + +**Step 2 — load the deps inside the runtime factory.** In +`createBrunchAgentSessionRuntimeFactory`, the returned +`async ({ cwd, agentDir: runtimeAgentDir, sessionManager }) => { … }` already has +`cwd` and `runtimeAgentDir` in scope. Before the `createBrunchPiSettings({...})` +call, gate the load (recommended gate: `context.dev`, i.e. `BRUNCH_DEV`, per +D71-L): + +```ts +const subagents = context.dev + ? await loadBrunchSubagents({ cwd, agentDir: runtimeAgentDir }) + : undefined; +``` + +**Step 3 — pass it into the extensions options**, right next to the existing +introspection opt-in (search for `context.dev ? { introspection: … }`): + +```ts +...(context.dev ? { introspection: context.dev.introspection } : {}), +...(subagents ? { subagents } : {}), // ← add this line +``` + +That's the whole wiring. `createBrunchPiExtensions` does the rest: it registers +the `subagent` tool and adds `BRUNCH_SUBAGENT_TOOL` to the opt-in allowlist so +the operational-mode policy advertises it. + +### Choosing the gate (the real decision) + +`createBrunchPiExtensions` adds `subagent` to the **active** tool set whenever +`subagents` is present, so **do not** load it unconditionally — that would make +subagents live in every production `elicit` session, contradicting D44-L +("optional enhancement, not load-bearing") and the default-off design. + +- **Recommended (proving stage):** gate on `context.dev` / `BRUNCH_DEV`, exactly + like introspection. Exercisable in dev, dead in prod, no new switch. +- **Production trigger** ("when may the elicitor delegate acquisition?") is + explicitly **deferred** (D82-L successor seam). When that lands, the gate + becomes a posture/capability check rather than the dev switch. + +To also drive it from the dev/faux loop, wire `loadBrunchSubagents` into the +`src/dev/` front door / faux-harness the same way (D68-L). + +## Conceptual reference (preserved from the design discussion) + +**Isolation is total.** The child does **not** share the parent's thread. Own +in-memory session/auth/settings, own system prompt, returns only its last +assistant message. The faux-provider tests assert this: the child system prompt +is the agent body (not "coding agent"), only the declared tools are advertised to +the model, and the task is the sole conversational input. + +**Blocking: yes (D44-L).** The parent's turn awaits the child. Within one call, +multiple `tasks` fan out concurrently via `Promise.all` + `createSemaphore` +(capped by `config.json` `maxConcurrency`), and `AbortSignal` propagates parent +cancellation into `session.abort()`. For I/O-bound LLM calls this is the right +primitive set — `worker_threads` would add nothing (the work is network I/O, not +CPU), and the subprocess/RPC models trade that simplicity for ambient-discovery +coupling we explicitly rejected. + +**Nesting: deliberately not supported (yet).** The original grants `subagent` as +a *tool* to nest-capable agents (its `worker`), bounded by a `subagent_agents` → +`PI_SUBAGENT_ALLOWED` allowlist, no depth counter (bundled depth stops at 2). +Brunch children get an explicit allowlist that **excludes** `subagent`, so they +cannot recurse — a safety property, not an oversight. To enable nesting later: +add a recursion-bounded `subagent` tool to a child's pool in `planSubagentTools` +and carry a depth/allowlist bound; pairs naturally with the future write-capable +`worker` under an execute op-mode. + +## Comparison to the original (`amosblomqvist/pi-subagents`) + +| Aspect | Original | Brunch (this) | +| --- | --- | --- | +| Agent discovery | Bundled `agents/*.md` beside `index.ts` **+** `globalThis.__pi_subagents` runtime bridge for other extensions | Bundled `agents/*.md` via explicit `loadSubagentDefinitions(dir)`; **no** bridge, **no** ambient `~/.pi` scan | +| Frontmatter | Loose: string split + silent defaults; extra `subagent_agents` allowlist; `model` default `anthropic/claude-sonnet-4-6` | Strict TypeBox schema, **fails loud**; no `subagent_agents` (no nesting); `model: default` inherits parent | +| Execution | `spawn()` a child `pi` process (`--mode json -p --no-session --no-skills --no-extensions`, re-adds `--extension` paths, `--append-system-prompt` temp file) | In-process SDK `AgentSession` with sealed services | +| Isolation basis | OS process boundary + flags; depends on a resolvable `pi` binary on PATH | Sealed in-memory services; no binary, no ambient leakage | +| Nesting | Supported via `subagent`-as-tool + `PI_SUBAGENT_ALLOWED` | Not supported (children lack the tool) | + +The file-based bundled layout you liked is preserved; the parts that fight +sealing (the `globalThis` bridge and the `pi` subprocess) are what changed. + +## Verify + +```bash +# from repo root +npx tsc -p tsconfig.build.json --noEmit # typecheck (project) +npx oxlint --type-aware src/.pi/extensions/subagents src/app/pi-subagents.ts src/app/pi-extensions.ts +npx oxfmt --check src/.pi/extensions/subagents/*.ts src/app/pi-subagents.ts src/app/pi-extensions.ts +npx vitest --run src/.pi/extensions/subagents # 26 tests +npx vitest --run src/.pi # 271 tests (blast radius of the opt-in channel change) +npm run build # compiles + copies agents/*.md + config.json into dist +``` + +(Project convention: `npm run fix` inner loop, `npm run verify` gate — but those +mutate the whole tree; prefer the file-scoped commands above when other work is +in flight.) + +## Deferred / open + +- **Startup wiring + gate** — see [How to wire it in](#how-to-wire-it-in). The + mechanism is done; the gate is a product decision. +- **SPEC reconciliation** — D44-L / I29-L describe the subprocess model; update + to the SDK child-session reality via `/ln-sync`. +- **Nesting** and a **write-capable `worker`** — deferred until an execute + operational mode lands. +- **Progress UI** — NDJSON/`subagent.progress` streaming for TUI/web is deferred + (the SDK child runs in-process; surface its progress when bandwidth permits). diff --git a/src/.pi/extensions/subagents/agents.ts b/src/.pi/extensions/subagents/agents.ts new file mode 100644 index 000000000..850eb12ee --- /dev/null +++ b/src/.pi/extensions/subagents/agents.ts @@ -0,0 +1,140 @@ +/** + * Subagent agent definitions (D44-L). + * + * Agents are declarative markdown files with a small frontmatter block plus a + * system-prompt body. The frontmatter is the registry contract; the body is the + * subagent's standing instructions (it becomes the child session's system + * prompt). Frontmatter is validated through a TypeBox schema (D41-L) so a + * malformed agent fails loud at load time rather than producing a silently + * misconfigured child session. + * + * The format is intentionally tiny (scalar `key: value` lines plus a + * comma-separated `tools` list) so no YAML dependency is required; Brunch owns + * these files, so the parser only needs to handle the shapes Brunch authors. + */ + +import { readdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { Type, type Static } from 'typebox'; +import { Value } from 'typebox/value'; + +export const SUBAGENT_THINKING_LEVELS = ['low', 'medium', 'high'] as const; + +export const SubagentFrontmatterSchema = Type.Object({ + name: Type.String({ minLength: 1 }), + description: Type.String({ minLength: 1 }), + /** Allowlist of tool names the child session may use. Empty = no tools. */ + tools: Type.Array(Type.String({ minLength: 1 })), + /** `default` (inherit the parent's current model) or `provider/model-id`. */ + model: Type.String({ minLength: 1 }), + thinking: Type.Union(SUBAGENT_THINKING_LEVELS.map((level) => Type.Literal(level))), +}); + +export type SubagentFrontmatter = Static; + +export interface SubagentDefinition extends SubagentFrontmatter { + /** The markdown body — used verbatim as the child session's system prompt. */ + readonly systemPrompt: string; +} + +interface ParsedFrontmatter { + readonly fields: Record; + readonly body: string; +} + +const FRONTMATTER_PATTERN = /^\uFEFF?---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/; + +function parseFrontmatterBlock(source: string): ParsedFrontmatter { + const match = FRONTMATTER_PATTERN.exec(source); + if (!match) { + throw new Error('missing frontmatter block (expected a leading "---" delimited section)'); + } + const block = match[1] ?? ''; + const body = match[2] ?? ''; + const fields: Record = {}; + for (const rawLine of block.split(/\r?\n/)) { + const line = rawLine.trim(); + if (line.length === 0 || line.startsWith('#')) continue; + const separator = line.indexOf(':'); + if (separator === -1) { + throw new Error(`malformed frontmatter line (expected "key: value"): ${rawLine}`); + } + const key = line.slice(0, separator).trim(); + fields[key] = line.slice(separator + 1).trim(); + } + return { fields, body }; +} + +function parseToolList(value: string | undefined): string[] { + if (!value) return []; + return value + .split(',') + .map((entry) => entry.trim()) + .filter((entry) => entry.length > 0); +} + +export function parseSubagentMarkdown( + source: string, + options: { sourcePath?: string } = {}, +): SubagentDefinition { + const where = options.sourcePath ? ` in ${options.sourcePath}` : ''; + let parsed: ParsedFrontmatter; + try { + parsed = parseFrontmatterBlock(source); + } catch (error) { + throw new Error(`Invalid subagent definition${where}: ${(error as Error).message}`); + } + + const candidate = { + name: parsed.fields.name ?? '', + description: parsed.fields.description ?? '', + tools: parseToolList(parsed.fields.tools), + model: parsed.fields.model ?? 'default', + thinking: parsed.fields.thinking ?? 'medium', + }; + + if (!Value.Check(SubagentFrontmatterSchema, candidate)) { + const detail = [...Value.Errors(SubagentFrontmatterSchema, candidate)] + .map((issue) => `${issue.instancePath || '/'} ${issue.message}`) + .join('; '); + throw new Error(`Invalid subagent frontmatter${where}: ${detail}`); + } + + const body = parsed.body.trim(); + if (body.length === 0) { + throw new Error(`Invalid subagent definition${where}: empty system-prompt body`); + } + + return { ...candidate, systemPrompt: body }; +} + +/** Filesystem location of the bundled agent markdown resources. */ +export function subagentAgentsDir(): string { + return fileURLToPath(new URL('./agents', import.meta.url)); +} + +/** + * Load every `*.md` agent definition from a directory, keyed by agent name. + * Throws on a malformed definition or a duplicate name so misconfiguration is + * caught at registration time. + */ +export async function loadSubagentDefinitions(dir: string): Promise> { + const entries = await readdir(dir, { withFileTypes: true }); + const files = entries + .filter((entry) => entry.isFile() && entry.name.endsWith('.md') && entry.name !== 'README.md') + .map((entry) => entry.name) + .sort(); + + const definitions = new Map(); + for (const file of files) { + const source = await readFile(join(dir, file), 'utf8'); + const definition = parseSubagentMarkdown(source, { sourcePath: file }); + if (definitions.has(definition.name)) { + throw new Error(`Duplicate subagent name "${definition.name}" (from ${file})`); + } + definitions.set(definition.name, definition); + } + return definitions; +} diff --git a/src/.pi/extensions/subagents/agents/proposer.md b/src/.pi/extensions/subagents/agents/proposer.md new file mode 100644 index 000000000..3e89f2452 --- /dev/null +++ b/src/.pi/extensions/subagents/agents/proposer.md @@ -0,0 +1,25 @@ +--- +name: proposer +description: Generates exactly one well-formed candidate-proposal variant (no tools) +model: default +thinking: medium +--- + +You are a proposer: a system-prompt-only agent running in an isolated context +with no memory of any prior conversation and NO tools. Everything you need — the +grounding bundle and the framing — is in the task description. + +Your job: emit exactly ONE well-formed variant of the requested candidate +proposal, shaped by the specific framing or lens given in the task. + +Constraints: + +- Produce one variant, not a menu. The caller achieves diversity by invoking you + multiple times in parallel with intentionally distinct framings. +- Do not hedge across alternatives; commit to a single coherent proposal. +- Ground every claim in the material provided in the task. Do not invent facts; + if the grounding is insufficient, say exactly what is missing. +- You have no tools — do not ask to read files or search the web; reason only + over what the task provides. + +Return the single proposal variant only. diff --git a/src/.pi/extensions/subagents/agents/researcher.md b/src/.pi/extensions/subagents/agents/researcher.md new file mode 100644 index 000000000..a3c3dbdc0 --- /dev/null +++ b/src/.pi/extensions/subagents/agents/researcher.md @@ -0,0 +1,33 @@ +--- +name: researcher +description: Web research — external docs, APIs, and references +tools: web_search, web_fetch +model: default +thinking: medium +--- + +You are a researcher: a web-research agent running in an isolated context with no +memory of any prior conversation. Everything you need is in the task description. + +Your tools: + +- `web_search` — search the web; returns extracted, LLM-ready page content and + source URLs +- `web_fetch` — fetch a specific URL and extract readable markdown (HTML, PDFs, + plain text) + +You have no local filesystem or shell access. + +Method: + +1. Start with `web_search` to find authoritative sources; prefer official docs. +2. Use `web_fetch` to read the most promising results closely. +3. Cross-check claims across at least two sources when they matter. + +Report back a concise research digest: + +- The answer to the task, with the key facts. +- Source URLs for every nontrivial claim. +- Clearly mark anything uncertain or not found. + +Return the digest only — you are summarizing for another agent. diff --git a/src/.pi/extensions/subagents/agents/scout.md b/src/.pi/extensions/subagents/agents/scout.md new file mode 100644 index 000000000..ab4446ec5 --- /dev/null +++ b/src/.pi/extensions/subagents/agents/scout.md @@ -0,0 +1,35 @@ +--- +name: scout +description: Read-only codebase recon — locates where things live +tools: read, grep, find, ls +model: default +thinking: low +--- + +You are a scout: a fast, read-only reconnaissance agent running in an isolated +context with no memory of any prior conversation. Everything you need is in the +task description. + +Your tools (read-only): + +- `read` — read a file +- `grep` — search file contents by regex +- `find` — find files by name or glob +- `ls` — list a directory + +You cannot write, edit, or run shell commands. Do not attempt to. + +Method: + +1. Use `grep`/`find` to locate relevant files, then `read` the most relevant ones. +2. Go breadth-first (where things live), then depth (how they work) only as the + task requires. +3. Stop as soon as you can answer; do not over-explore. + +Report back a concise findings digest: + +- The specific files and symbols that answer the task, each as `path:line` when known. +- A short explanation of how they fit together. +- Anything the caller asked for that you could NOT find, stated plainly. + +Return findings only — you are summarizing for another agent, not editing. diff --git a/src/.pi/extensions/subagents/config.ts b/src/.pi/extensions/subagents/config.ts new file mode 100644 index 000000000..e5175c450 --- /dev/null +++ b/src/.pi/extensions/subagents/config.ts @@ -0,0 +1,52 @@ +/** + * Subagent extension config (D44-L). + * + * The concurrency cap lives in an externalized `config.json` so it can be + * reviewed and edited without SPEC churn. It is validated through a TypeBox + * schema (D41-L) when loaded. Unknown keys (e.g. a `$comment` documenting the + * file) are tolerated. + */ + +import { readFile } from 'node:fs/promises'; +import { fileURLToPath } from 'node:url'; + +import { Type, type Static } from 'typebox'; +import { Value } from 'typebox/value'; + +export const SubagentConfigSchema = Type.Object( + { + version: Type.Integer({ minimum: 1 }), + maxConcurrency: Type.Integer({ minimum: 1 }), + }, + { additionalProperties: true }, +); + +export type SubagentConfig = Static; + +export const DEFAULT_SUBAGENT_CONFIG: SubagentConfig = { version: 1, maxConcurrency: 4 }; + +/** Filesystem location of the bundled `config.json`. */ +export function subagentConfigPath(): string { + return fileURLToPath(new URL('./config.json', import.meta.url)); +} + +export function parseSubagentConfig(raw: unknown, options: { sourcePath?: string } = {}): SubagentConfig { + const where = options.sourcePath ? ` in ${options.sourcePath}` : ''; + if (!Value.Check(SubagentConfigSchema, raw)) { + const detail = [...Value.Errors(SubagentConfigSchema, raw)] + .map((issue) => `${issue.instancePath || '/'} ${issue.message}`) + .join('; '); + throw new Error(`Invalid subagent config${where}: ${detail}`); + } + return { version: raw.version, maxConcurrency: raw.maxConcurrency }; +} + +export async function loadSubagentConfig(path: string): Promise { + let raw: unknown; + try { + raw = JSON.parse(await readFile(path, 'utf8')); + } catch (error) { + throw new Error(`Invalid subagent config in ${path}: ${(error as Error).message}`); + } + return parseSubagentConfig(raw, { sourcePath: path }); +} diff --git a/src/.pi/extensions/subagents/index.ts b/src/.pi/extensions/subagents/index.ts new file mode 100644 index 000000000..5c2d132f7 --- /dev/null +++ b/src/.pi/extensions/subagents/index.ts @@ -0,0 +1,170 @@ +/** + * Brunch subagent registrar (D44-L). + * + * Registers a single `subagent` Pi tool that delegates an isolated, read-only + * reasoning task to a sealed SDK child session (see `session.ts`). Supports a + * single `{ agent, task }` call or a parallel `{ tasks: [...] }` fan-out, with + * concurrency bounded by a simple Promise semaphore. + * + * Registration is separate from advertisement (D40-L): the tool is registered + * here, but only becomes an active/advertised tool when the operational-mode + * policy opt-in includes it. It is never part of the base `elicit` allowlist. + */ + +import type { ExtensionAPI, ToolDefinition } from '@earendil-works/pi-coding-agent'; +import { Type, type Static } from 'typebox'; + +import type { SubagentDefinition } from './agents.js'; +import { + runSubagent as defaultRunSubagent, + type SubagentResult, + type SubagentSealedDeps, +} from './session.js'; + +export { + loadSubagentDefinitions, + parseSubagentMarkdown, + subagentAgentsDir, + type SubagentDefinition, +} from './agents.js'; +export { + DEFAULT_SUBAGENT_CONFIG, + loadSubagentConfig, + parseSubagentConfig, + subagentConfigPath, + type SubagentConfig, +} from './config.js'; +export { + planSubagentTools, + resolveSubagentModel, + runSubagent, + type SubagentResult, + type SubagentRunContext, + type SubagentSealedDeps, +} from './session.js'; + +export const BRUNCH_SUBAGENT_TOOL = 'subagent'; + +export interface BrunchSubagentsDeps extends SubagentSealedDeps { + readonly definitions: Map; + readonly maxConcurrency: number; + /** Injectable runner (defaults to the real sealed-session runner) for testing. */ + readonly runSubagent?: typeof defaultRunSubagent; +} + +/** Bounded-concurrency gate built from Promise primitives. */ +export function createSemaphore(max: number): (task: () => Promise) => Promise { + const limit = Math.max(1, Math.floor(max)); + let active = 0; + const waiters: Array<() => void> = []; + + const release = (): void => { + active -= 1; + const next = waiters.shift(); + if (next) next(); + }; + + return async (task: () => Promise): Promise => { + if (active >= limit) { + await new Promise((resolve) => waiters.push(resolve)); + } + active += 1; + try { + return await task(); + } finally { + release(); + } + }; +} + +function agentCatalog(definitions: Map): string { + return [...definitions.values()] + .map((definition) => `${definition.name} — ${definition.description}`) + .join('; '); +} + +function formatResults(results: readonly SubagentResult[]): string { + const [only] = results; + if (results.length === 1 && only) return only.text; + return results + .map((result) => `## ${result.agent}${result.status === 'error' ? ' (error)' : ''}\n\n${result.text}`) + .join('\n\n---\n\n'); +} + +export function registerBrunchSubagents(pi: ExtensionAPI, deps: BrunchSubagentsDeps): void { + const run = deps.runSubagent ?? defaultRunSubagent; + const limit = createSemaphore(deps.maxConcurrency); + const agentNames = [...deps.definitions.keys()]; + + const TaskSchema = Type.Object({ + agent: Type.String({ description: `Subagent to run. One of: ${agentNames.join(', ')}.` }), + task: Type.String({ + description: + 'Self-contained task. The subagent has no memory of this conversation, so include all needed context.', + }), + }); + + const ParamsSchema = Type.Object({ + agent: Type.Optional( + Type.String({ description: `Subagent to run (single mode). One of: ${agentNames.join(', ')}.` }), + ), + task: Type.Optional(Type.String({ description: 'Self-contained task for single mode.' })), + tasks: Type.Optional( + Type.Array(TaskSchema, { + minItems: 1, + description: 'Parallel mode: run several subagent tasks concurrently.', + }), + ), + }); + type Params = Static; + + const tool: ToolDefinition = { + name: BRUNCH_SUBAGENT_TOOL, + label: 'subagent', + description: + `Delegate an isolated, read-only reasoning task to a sealed child agent. ` + + `Each subagent runs in its own context with no memory of this conversation — put everything it needs in "task". ` + + `Use a single { agent, task } or fan out with { tasks: [{ agent, task }, ...] }. ` + + `Available agents: ${agentCatalog(deps.definitions)}.`, + parameters: ParamsSchema, + async execute(_toolCallId, params: Params, signal, _onUpdate, ctx) { + const requested = + params.tasks ?? (params.agent && params.task ? [{ agent: params.agent, task: params.task }] : []); + if (requested.length === 0) { + return { + content: [ + { + type: 'text' as const, + text: 'subagent requires either { agent, task } or { tasks: [{ agent, task }, ...] }.', + }, + ], + details: { results: [] }, + }; + } + + const runContext = { cwd: ctx.cwd, modelRegistry: ctx.modelRegistry, model: ctx.model, signal }; + const results = await Promise.all( + requested.map((entry) => + limit(async (): Promise => { + const definition = deps.definitions.get(entry.agent); + if (!definition) { + return { + agent: entry.agent, + status: 'error', + text: `Unknown subagent "${entry.agent}". Available: ${agentNames.join(', ')}.`, + }; + } + return run({ definition, task: entry.task, ctx: runContext, deps }); + }), + ), + ); + + return { + content: [{ type: 'text' as const, text: formatResults(results) }], + details: { results }, + }; + }, + }; + + pi.registerTool(tool as never); +} diff --git a/src/.pi/extensions/subagents/session.ts b/src/.pi/extensions/subagents/session.ts new file mode 100644 index 000000000..687eef8a7 --- /dev/null +++ b/src/.pi/extensions/subagents/session.ts @@ -0,0 +1,253 @@ +/** + * Sealed SDK child sessions for subagents (D44-L / I29-L). + * + * Each subagent runs as an in-process SDK `AgentSession` — NOT a `pi` + * subprocess and NOT ambient `~/.pi` discovery. The child is constructed from + * explicit, sealed services so it inherits nothing implicit: + * + * - sealed in-memory `SettingsManager` (injected from the app layer) + * - sealed `DefaultResourceLoader` options (no extensions/skills/prompts/ + * themes/context files) with the agent body as the system prompt + * - `AuthStorage.inMemory()` so ambient `auth.json` never leaks + * - the parent's `ModelRegistry` (carries resolved auth + registered + * providers) so the child needs no ambient model bootstrap + * - an in-memory `SessionManager` so nothing is persisted to disk + * - an explicit tool allowlist built from Brunch-owned tool definitions + * + * The child has no conversation context (the task string must be + * self-contained), no `CommandExecutor`, no graph access, and no Brunch RPC. + * Its last assistant message is returned to the caller as tool-result content. + */ + +import { + AuthStorage, + createAgentSessionFromServices, + createAgentSessionServices, + createFindToolDefinition, + createGrepToolDefinition, + createLsToolDefinition, + createReadToolDefinition, + SessionManager, + type CreateAgentSessionFromServicesOptions, + type CreateAgentSessionServicesOptions, + type ExtensionContext, + type SettingsManager, + type ToolDefinition, +} from '@earendil-works/pi-coding-agent'; + +import { createWebFetchTool } from '../web/web-fetch.js'; +import { createWebSearchTool } from '../web/web-search.js'; +import type { SubagentDefinition } from './agents.js'; + +type ChildModel = NonNullable; +type ChildModelRegistry = ExtensionContext['modelRegistry']; + +/** The subset of the tool execution context a subagent run needs. */ +export interface SubagentRunContext { + readonly cwd: string; + readonly modelRegistry: ChildModelRegistry; + readonly model: ExtensionContext['model']; + readonly signal?: AbortSignal | undefined; +} + +/** + * Sealed runtime primitives injected from the app composition root so this + * `.pi` module never imports `src/app`. + */ +export interface SubagentSealedDeps { + readonly agentDir: string; + /** Builds a fresh sealed in-memory settings manager per child session. */ + readonly createSettingsManager: () => SettingsManager; + /** Sealed resource-loader options (no ambient discovery), sans system prompt. */ + readonly resourceLoaderOptions: CreateAgentSessionServicesOptions['resourceLoaderOptions']; +} + +export interface RunSubagentInput { + readonly definition: SubagentDefinition; + readonly task: string; + readonly ctx: SubagentRunContext; + readonly deps: SubagentSealedDeps; + /** Injectable SDK builders (defaults to the real ones) for testing. */ + readonly createServices?: typeof createAgentSessionServices; + readonly createSession?: typeof createAgentSessionFromServices; +} + +export interface SubagentResult { + readonly agent: string; + readonly status: 'ok' | 'error'; + readonly text: string; +} + +export type ModelResolution = + | { readonly status: 'resolved'; readonly model: ChildModel } + | { readonly status: 'unresolved'; readonly reason: string }; + +/** + * Resolve a child model from the agent's `model` field. `default` inherits the + * parent's current model (falling back to the first available registered + * model); `provider/model-id` is looked up in the parent's registry. + */ +export function resolveSubagentModel( + definition: SubagentDefinition, + ctx: Pick, +): ModelResolution { + if (definition.model === 'default') { + const model = ctx.model ?? ctx.modelRegistry.getAvailable()[0]; + if (!model) return { status: 'unresolved', reason: 'no model is available for "default"' }; + return { status: 'resolved', model }; + } + + const separator = definition.model.indexOf('/'); + if (separator <= 0 || separator === definition.model.length - 1) { + return { + status: 'unresolved', + reason: `model "${definition.model}" must be "default" or "provider/model-id"`, + }; + } + const provider = definition.model.slice(0, separator); + const modelId = definition.model.slice(separator + 1); + const model = ctx.modelRegistry.find(provider, modelId); + if (!model) { + return { status: 'unresolved', reason: `model "${definition.model}" is not registered or available` }; + } + return { status: 'resolved', model }; +} + +export interface SubagentToolPlan { + readonly tools?: string[]; + readonly customTools?: ToolDefinition[]; + readonly noTools?: 'all'; +} + +/** + * Brunch-owned tool definitions a subagent may be granted. Read-only filesystem + * tools come from the SDK (cwd-bound; they override the built-ins of the same + * name); web tools come from Brunch's own factories. Write/shell built-ins + * (`bash`/`edit`/`write`) are never offered. + */ +function subagentToolPool(cwd: string): Map { + const pool = new Map(); + for (const definition of [ + createReadToolDefinition(cwd), + createGrepToolDefinition(cwd), + createFindToolDefinition(cwd), + createLsToolDefinition(cwd), + ]) { + pool.set(definition.name, definition as ToolDefinition); + } + for (const tool of [createWebSearchTool(), createWebFetchTool()]) { + pool.set(tool.name, tool as unknown as ToolDefinition); + } + return pool; +} + +/** + * Translate an agent's declared tool allowlist into SDK session options. + * Throws on an unknown tool name (a Brunch authoring bug — fail loud). + */ +export function planSubagentTools( + definition: SubagentDefinition, + ctx: Pick, +): SubagentToolPlan { + if (definition.tools.length === 0) return { noTools: 'all' }; + + const pool = subagentToolPool(ctx.cwd); + const customTools: ToolDefinition[] = []; + const unknown: string[] = []; + for (const name of definition.tools) { + const tool = pool.get(name); + if (tool) customTools.push(tool); + else unknown.push(name); + } + if (unknown.length > 0) { + throw new Error( + `subagent "${definition.name}" requests unknown tool(s): ${unknown.join(', ')}. ` + + `Available: ${[...pool.keys()].join(', ')}.`, + ); + } + return { tools: [...definition.tools], customTools }; +} + +function errorText(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +/** + * Run one subagent to completion in a sealed child session and return its last + * assistant message. Never throws: failures are returned as an error result so + * the foreground tool call always gets usable content. + */ +export async function runSubagent(input: RunSubagentInput): Promise { + const { definition, task, ctx, deps } = input; + const createServices = input.createServices ?? createAgentSessionServices; + const createSession = input.createSession ?? createAgentSessionFromServices; + + if (ctx.signal?.aborted) { + return { agent: definition.name, status: 'error', text: `Subagent "${definition.name}" was aborted.` }; + } + + const resolution = resolveSubagentModel(definition, ctx); + if (resolution.status === 'unresolved') { + return { + agent: definition.name, + status: 'error', + text: `Subagent "${definition.name}" could not start: ${resolution.reason}`, + }; + } + + let toolPlan: SubagentToolPlan; + try { + toolPlan = planSubagentTools(definition, ctx); + } catch (error) { + return { agent: definition.name, status: 'error', text: errorText(error) }; + } + + let dispose: (() => void) | undefined; + let onAbort: (() => void) | undefined; + try { + const services = await createServices({ + cwd: ctx.cwd, + agentDir: deps.agentDir, + authStorage: AuthStorage.inMemory(), + modelRegistry: ctx.modelRegistry, + settingsManager: deps.createSettingsManager(), + resourceLoaderOptions: { ...deps.resourceLoaderOptions, systemPrompt: definition.systemPrompt }, + }); + + const { session } = await createSession({ + services, + sessionManager: SessionManager.inMemory(ctx.cwd), + model: resolution.model, + thinkingLevel: definition.thinking, + ...(toolPlan.noTools ? { noTools: toolPlan.noTools } : {}), + ...(toolPlan.tools ? { tools: toolPlan.tools } : {}), + ...(toolPlan.customTools ? { customTools: toolPlan.customTools } : {}), + }); + dispose = () => session.dispose(); + + if (ctx.signal) { + onAbort = () => void session.abort(); + ctx.signal.addEventListener('abort', onAbort, { once: true }); + } + + await session.prompt(task, { expandPromptTemplates: false, source: 'rpc' }); + const text = session.getLastAssistantText()?.trim() ?? ''; + if (text.length === 0) { + return { + agent: definition.name, + status: 'error', + text: `Subagent "${definition.name}" returned no output.`, + }; + } + return { agent: definition.name, status: 'ok', text }; + } catch (error) { + return { + agent: definition.name, + status: 'error', + text: `Subagent "${definition.name}" failed: ${errorText(error)}`, + }; + } finally { + if (ctx.signal && onAbort) ctx.signal.removeEventListener('abort', onAbort); + dispose?.(); + } +} diff --git a/src/.pi/extensions/subagents/subagents.test.ts b/src/.pi/extensions/subagents/subagents.test.ts new file mode 100644 index 000000000..eaafd3bf6 --- /dev/null +++ b/src/.pi/extensions/subagents/subagents.test.ts @@ -0,0 +1,409 @@ +import { mkdtemp } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { fauxAssistantMessage, registerFauxProvider, type Context } from '@earendil-works/pi-ai'; +import { + AuthStorage, + ModelRegistry, + SettingsManager, + type CreateAgentSessionServicesOptions, + type ExtensionAPI, + type ToolDefinition, +} from '@earendil-works/pi-coding-agent'; +import { describe, expect, it } from 'vitest'; + +import { + BRUNCH_FAUX_HARNESS_API_KEY, + brunchFauxProviderConfig, + defaultBrunchFauxModel, +} from '../../../probes/faux-provider.js'; +import { + loadSubagentDefinitions, + parseSubagentMarkdown, + subagentAgentsDir, + type SubagentDefinition, +} from './agents.js'; +import { loadSubagentConfig, parseSubagentConfig, subagentConfigPath } from './config.js'; +import { + BRUNCH_SUBAGENT_TOOL, + createSemaphore, + registerBrunchSubagents, + type BrunchSubagentsDeps, +} from './index.js'; +import { + planSubagentTools, + resolveSubagentModel, + runSubagent, + type SubagentResult, + type SubagentRunContext, + type SubagentSealedDeps, +} from './session.js'; + +const SCOUT_MD = `--- +name: scout +description: Read-only recon +tools: read, grep, find, ls +model: default +thinking: low +--- + +You are a scout. +`; + +function sealedResourceLoaderOptions(): CreateAgentSessionServicesOptions['resourceLoaderOptions'] { + return { + noContextFiles: true, + noExtensions: true, + noPromptTemplates: true, + noSkills: true, + noThemes: true, + extensionFactories: [], + }; +} + +describe('parseSubagentMarkdown', () => { + it('parses frontmatter, a comma-separated tool list, and the body', () => { + const def = parseSubagentMarkdown(SCOUT_MD); + expect(def.name).toBe('scout'); + expect(def.description).toBe('Read-only recon'); + expect(def.tools).toEqual(['read', 'grep', 'find', 'ls']); + expect(def.model).toBe('default'); + expect(def.thinking).toBe('low'); + expect(def.systemPrompt).toBe('You are a scout.'); + }); + + it('defaults tools to empty, model to default, and thinking to medium', () => { + const def = parseSubagentMarkdown('---\nname: proposer\ndescription: One variant\n---\nBody.'); + expect(def.tools).toEqual([]); + expect(def.model).toBe('default'); + expect(def.thinking).toBe('medium'); + }); + + it('throws on a missing frontmatter block', () => { + expect(() => parseSubagentMarkdown('no frontmatter here')).toThrow(/frontmatter/); + }); + + it('throws on an invalid thinking level', () => { + expect(() => parseSubagentMarkdown('---\nname: x\ndescription: y\nthinking: turbo\n---\nBody.')).toThrow( + /Invalid subagent frontmatter/, + ); + }); + + it('throws on an empty body', () => { + expect(() => parseSubagentMarkdown('---\nname: x\ndescription: y\n---\n')).toThrow( + /empty system-prompt body/, + ); + }); +}); + +describe('loadSubagentDefinitions (bundled agents)', () => { + it('loads the scout, researcher, and proposer starter agents', async () => { + const definitions = await loadSubagentDefinitions(subagentAgentsDir()); + expect([...definitions.keys()].sort()).toEqual(['proposer', 'researcher', 'scout']); + expect(definitions.get('scout')?.tools).toEqual(['read', 'grep', 'find', 'ls']); + expect(definitions.get('researcher')?.tools).toEqual(['web_search', 'web_fetch']); + expect(definitions.get('proposer')?.tools).toEqual([]); + }); +}); + +describe('subagent config', () => { + it('validates version and maxConcurrency and tolerates a $comment', () => { + const config = parseSubagentConfig({ $comment: 'docs', version: 1, maxConcurrency: 4 }); + expect(config).toEqual({ version: 1, maxConcurrency: 4 }); + }); + + it('rejects a non-positive maxConcurrency', () => { + expect(() => parseSubagentConfig({ version: 1, maxConcurrency: 0 })).toThrow(/Invalid subagent config/); + }); + + it('loads the bundled config.json', async () => { + const config = await loadSubagentConfig(subagentConfigPath()); + expect(config.version).toBeGreaterThanOrEqual(1); + expect(config.maxConcurrency).toBeGreaterThanOrEqual(1); + }); +}); + +describe('resolveSubagentModel', () => { + const fakeModel = { provider: 'p', id: 'm' } as unknown as NonNullable; + + it('inherits the parent current model for "default"', () => { + const registry = { + getAvailable: () => [], + find: () => undefined, + } as unknown as SubagentRunContext['modelRegistry']; + const def = { name: 'x', model: 'default' } as SubagentDefinition; + expect(resolveSubagentModel(def, { model: fakeModel, modelRegistry: registry })).toEqual({ + status: 'resolved', + model: fakeModel, + }); + }); + + it('falls back to the first available model when there is no current model', () => { + const registry = { + getAvailable: () => [fakeModel], + find: () => undefined, + } as unknown as SubagentRunContext['modelRegistry']; + const def = { name: 'x', model: 'default' } as SubagentDefinition; + expect(resolveSubagentModel(def, { model: undefined, modelRegistry: registry })).toEqual({ + status: 'resolved', + model: fakeModel, + }); + }); + + it('reports unresolved when no model is available for "default"', () => { + const registry = { + getAvailable: () => [], + find: () => undefined, + } as unknown as SubagentRunContext['modelRegistry']; + const def = { name: 'x', model: 'default' } as SubagentDefinition; + expect(resolveSubagentModel(def, { model: undefined, modelRegistry: registry }).status).toBe( + 'unresolved', + ); + }); + + it('looks up an explicit provider/model-id', () => { + const registry = { + getAvailable: () => [], + find: (provider: string, id: string) => + provider === 'anthropic' && id === 'opus' ? fakeModel : undefined, + } as unknown as SubagentRunContext['modelRegistry']; + const def = { name: 'x', model: 'anthropic/opus' } as SubagentDefinition; + expect(resolveSubagentModel(def, { model: undefined, modelRegistry: registry })).toEqual({ + status: 'resolved', + model: fakeModel, + }); + }); + + it('reports unresolved for a malformed model string', () => { + const registry = { + getAvailable: () => [], + find: () => undefined, + } as unknown as SubagentRunContext['modelRegistry']; + const def = { name: 'x', model: 'bogus' } as SubagentDefinition; + expect(resolveSubagentModel(def, { model: undefined, modelRegistry: registry }).status).toBe( + 'unresolved', + ); + }); +}); + +describe('planSubagentTools', () => { + it('maps read-only filesystem tools to a cwd-bound custom-tool allowlist', () => { + const def = { name: 'scout', tools: ['read', 'grep', 'find', 'ls'] } as SubagentDefinition; + const plan = planSubagentTools(def, { cwd: '/tmp' }); + expect(plan.tools).toEqual(['read', 'grep', 'find', 'ls']); + expect((plan.customTools ?? []).map((tool: ToolDefinition) => tool.name).sort()).toEqual([ + 'find', + 'grep', + 'ls', + 'read', + ]); + expect(plan.noTools).toBeUndefined(); + }); + + it('maps web tools for the researcher', () => { + const def = { name: 'researcher', tools: ['web_search', 'web_fetch'] } as SubagentDefinition; + const plan = planSubagentTools(def, { cwd: '/tmp' }); + expect((plan.customTools ?? []).map((tool: ToolDefinition) => tool.name).sort()).toEqual([ + 'web_fetch', + 'web_search', + ]); + }); + + it('uses noTools for a tool-less agent', () => { + const def = { name: 'proposer', tools: [] } as unknown as SubagentDefinition; + expect(planSubagentTools(def, { cwd: '/tmp' })).toEqual({ noTools: 'all' }); + }); + + it('throws on an unknown tool name', () => { + const def = { name: 'rogue', tools: ['bash'] } as SubagentDefinition; + expect(() => planSubagentTools(def, { cwd: '/tmp' })).toThrow(/unknown tool/); + }); +}); + +describe('createSemaphore', () => { + it('bounds concurrency to the configured limit', async () => { + const limit = createSemaphore(2); + let active = 0; + let peak = 0; + const task = () => + limit(async () => { + active += 1; + peak = Math.max(peak, active); + await new Promise((resolve) => setTimeout(resolve, 5)); + active -= 1; + }); + await Promise.all([task(), task(), task(), task(), task()]); + expect(peak).toBe(2); + }); +}); + +describe('registerBrunchSubagents', () => { + function harness(): { + pi: ExtensionAPI; + getTool: () => ToolDefinition; + calls: Array<{ agent: string; task: string }>; + } { + const registered: ToolDefinition[] = []; + const pi = { registerTool: (tool: ToolDefinition) => registered.push(tool) } as unknown as ExtensionAPI; + const calls: Array<{ agent: string; task: string }> = []; + const deps: BrunchSubagentsDeps = { + definitions: new Map([ + ['scout', parseSubagentMarkdown(SCOUT_MD)], + ['proposer', parseSubagentMarkdown('---\nname: proposer\ndescription: One variant\n---\nBody.')], + ]), + maxConcurrency: 2, + agentDir: '/agent', + createSettingsManager: () => SettingsManager.inMemory({ quietStartup: true }), + resourceLoaderOptions: sealedResourceLoaderOptions(), + runSubagent: async ({ definition, task }): Promise => { + calls.push({ agent: definition.name, task }); + return { agent: definition.name, status: 'ok', text: `ran ${definition.name}: ${task}` }; + }, + }; + registerBrunchSubagents(pi, deps); + return { pi, getTool: () => registered[0]!, calls }; + } + + const ctx = { cwd: '/w', modelRegistry: {}, model: undefined } as never; + + it('registers a single "subagent" tool', () => { + const { getTool } = harness(); + expect(getTool().name).toBe(BRUNCH_SUBAGENT_TOOL); + }); + + it('runs a single { agent, task } call', async () => { + const { getTool, calls } = harness(); + const result = await getTool().execute( + 'id', + { agent: 'scout', task: 'find X' }, + undefined, + undefined, + ctx, + ); + expect(calls).toEqual([{ agent: 'scout', task: 'find X' }]); + expect(result.content[0]).toEqual({ type: 'text', text: 'ran scout: find X' }); + }); + + it('fans out a { tasks: [...] } call', async () => { + const { getTool } = harness(); + const result = await getTool().execute( + 'id', + { + tasks: [ + { agent: 'scout', task: 'a' }, + { agent: 'proposer', task: 'b' }, + ], + }, + undefined, + undefined, + ctx, + ); + const text = (result.content[0] as { text: string }).text; + expect(text).toContain('## scout'); + expect(text).toContain('## proposer'); + }); + + it('returns an error result for an unknown agent', async () => { + const { getTool } = harness(); + const result = await getTool().execute('id', { agent: 'ghost', task: 'x' }, undefined, undefined, ctx); + expect((result.content[0] as { text: string }).text).toContain('Unknown subagent "ghost"'); + }); + + it('explains usage when neither agent nor tasks is provided', async () => { + const { getTool } = harness(); + const result = await getTool().execute('id', {}, undefined, undefined, ctx); + expect((result.content[0] as { text: string }).text).toContain('subagent requires'); + }); +}); + +describe('runSubagent (sealed SDK child session over a faux provider)', () => { + interface FauxRig { + readonly ctx: SubagentRunContext; + readonly deps: SubagentSealedDeps; + readonly captured: { systemPrompt?: string; toolNames: string[]; messages: string }; + dispose(): void; + } + + async function fauxRig(reply: string): Promise { + const model = defaultBrunchFauxModel(); + const provider = registerFauxProvider({ + provider: model.provider, + api: `${model.api}-faux-source`, + models: [{ id: model.modelId, name: model.modelName, input: ['text'] }], + }); + const captured: FauxRig['captured'] = { toolNames: [], messages: '' }; + provider.setResponses([ + (context: Context) => { + captured.systemPrompt = context.systemPrompt; + captured.toolNames = (context.tools ?? []).map((tool) => tool.name); + captured.messages = JSON.stringify(context.messages); + return fauxAssistantMessage(reply); + }, + ]); + const authStorage = AuthStorage.inMemory({ + [model.provider]: { type: 'api_key', key: BRUNCH_FAUX_HARNESS_API_KEY }, + }); + const modelRegistry = ModelRegistry.inMemory(authStorage); + modelRegistry.registerProvider( + model.provider, + brunchFauxProviderConfig(model, provider, BRUNCH_FAUX_HARNESS_API_KEY), + ); + const registeredModel = modelRegistry.find(model.provider, model.modelId); + if (!registeredModel) throw new Error('faux model not registered'); + const cwd = await mkdtemp(join(tmpdir(), 'brunch-subagent-cwd-')); + const agentDir = await mkdtemp(join(tmpdir(), 'brunch-subagent-agent-')); + + return { + ctx: { cwd, modelRegistry, model: registeredModel, signal: undefined }, + deps: { + agentDir, + createSettingsManager: () => SettingsManager.inMemory({ quietStartup: true }), + resourceLoaderOptions: sealedResourceLoaderOptions(), + }, + captured, + dispose: () => provider.unregister(), + }; + } + + it('runs a tool-less proposer, owning the system prompt and returning its output', async () => { + const rig = await fauxRig('PROPOSED VARIANT'); + try { + const definition = parseSubagentMarkdown( + '---\nname: proposer\ndescription: One variant\nthinking: medium\n---\nYou are a proposer. Emit one variant.', + ); + const result = await runSubagent({ + definition, + task: 'Propose a name for the widget.', + ctx: rig.ctx, + deps: rig.deps, + }); + expect(result).toEqual({ agent: 'proposer', status: 'ok', text: 'PROPOSED VARIANT' }); + // Sealing: the child system prompt IS the agent body (not pi's coding base). + expect(rig.captured.systemPrompt).toContain('You are a proposer. Emit one variant.'); + expect(rig.captured.systemPrompt).not.toContain('coding agent'); + // No tools for a proposer. + expect(rig.captured.toolNames).toEqual([]); + // The task is delivered as the (only) conversational input. + expect(rig.captured.messages).toContain('Propose a name for the widget.'); + } finally { + rig.dispose(); + } + }); + + it('advertises exactly the scout tool allowlist to the model', async () => { + const rig = await fauxRig('done'); + try { + const result = await runSubagent({ + definition: parseSubagentMarkdown(SCOUT_MD), + task: 'Where is the auth code?', + ctx: rig.ctx, + deps: rig.deps, + }); + expect(result.status).toBe('ok'); + expect([...rig.captured.toolNames].sort()).toEqual(['find', 'grep', 'ls', 'read']); + } finally { + rig.dispose(); + } + }); +}); diff --git a/src/app/pi-extensions.ts b/src/app/pi-extensions.ts index ce6244031..299ead2d8 100644 --- a/src/app/pi-extensions.ts +++ b/src/app/pi-extensions.ts @@ -39,6 +39,11 @@ import { type BrunchSessionBoundaryHandler, type BrunchSessionBoundaryPipelineStep, } from '../.pi/extensions/session/lifecycle.js'; +import { + BRUNCH_SUBAGENT_TOOL, + registerBrunchSubagents, + type BrunchSubagentsDeps, +} from '../.pi/extensions/subagents/index.js'; import { registerBrunchPrompting, type BrunchPromptContextProvider, @@ -102,6 +107,11 @@ export { registerBrunchWebTools } from '../.pi/extensions/web/index.js'; export { registerBrunchGraph } from '../.pi/extensions/graph/index.js'; export { registerBrunchReconciliation } from '../.pi/extensions/reconciliation/index.js'; +export { + BRUNCH_SUBAGENT_TOOL, + registerBrunchSubagents, + type BrunchSubagentsDeps, +} from '../.pi/extensions/subagents/index.js'; export { BRUNCH_INTROSPECTION_COMMAND, createInMemoryBrunchIntrospectionStore, @@ -134,6 +144,12 @@ export interface BrunchPiExtensionsOptions extends Omit readonly ContinuityDrain[]; + /** + * Optional subagent registry (D44-L). When provided, the `subagent` tool is + * registered and opted into the active-tool set; when omitted it is absent + * (default-off), so production foreground sessions are unchanged. + */ + subagents?: BrunchSubagentsDeps; } export interface BrunchPiIntrospectionOptions extends BrunchIntrospectionOptions { @@ -164,9 +180,14 @@ export function createBrunchPiExtensions( const graphMentionSource = options.graphMentionSource ?? graphMentionSourceFromDeps(options.graph); const promptContext = options.promptContext; const introspectionOptions = options.introspection; - const devAllowedToolNames = introspectionOptions?.enabled - ? [BRUNCH_SESSION_QUERY_TOOL, BRUNCH_INTROSPECT_QUERY_TOOL] - : undefined; + // Opt-in tool channel: tools registered but kept out of the base `elicit` + // allowlist (D40-L) are made active only when explicitly opted in here — + // dev introspection query tools (D69-L) and the `subagent` tool (D44-L). + const optInAllowedToolNames = [ + ...(introspectionOptions?.enabled ? [BRUNCH_SESSION_QUERY_TOOL, BRUNCH_INTROSPECT_QUERY_TOOL] : []), + ...(options.subagents ? [BRUNCH_SUBAGENT_TOOL] : []), + ]; + const devAllowedToolNames = optInAllowedToolNames.length > 0 ? optInAllowedToolNames : undefined; const entryDebugCache = introspectionOptions?.enabled ? introspectionOptions.debugCache : undefined; const continuitySteps = options.graph ? [ @@ -198,6 +219,7 @@ export function createBrunchPiExtensions( (api) => registerBrunchOperationalModePolicy(api, { devAllowedToolNames }), registerBrunchContext, registerBrunchWebTools, + ...(options.subagents ? [(api: ExtensionAPI) => registerBrunchSubagents(api, options.subagents!)] : []), // Prompting registers immediately after operational-mode policy and // before mention autocomplete when prompt context is provided; its // position in this list is the registration order, not a splice index. diff --git a/src/app/pi-subagents.ts b/src/app/pi-subagents.ts new file mode 100644 index 000000000..f10f4958a --- /dev/null +++ b/src/app/pi-subagents.ts @@ -0,0 +1,40 @@ +/** + * App composition root for Brunch subagents (D44-L). + * + * Loads the bundled agent definitions + config and assembles the sealed runtime + * dependencies a subagent child session needs, using Brunch's sealed Pi profile + * (D39-L). Keeping this wiring in the app layer lets `.pi/extensions/subagents` + * stay free of `src/app` imports — it receives the sealed primitives by + * injection. + */ + +import { loadSubagentDefinitions, subagentAgentsDir } from '../.pi/extensions/subagents/agents.js'; +import { loadSubagentConfig, subagentConfigPath } from '../.pi/extensions/subagents/config.js'; +import type { BrunchSubagentsDeps } from '../.pi/extensions/subagents/index.js'; +import { brunchResourceLoaderOptions, createBrunchSettingsManager } from './pi-settings.js'; + +export interface LoadBrunchSubagentsOptions { + readonly cwd: string; + readonly agentDir: string; +} + +/** + * Load the bundled subagent registry and assemble its sealed dependencies. + * The result is passed to `createBrunchPiExtensions({ subagents })`, which + * registers the `subagent` tool default-off (it is only advertised when the + * operational-mode opt-in includes it). + */ +export async function loadBrunchSubagents(options: LoadBrunchSubagentsOptions): Promise { + const [definitions, config] = await Promise.all([ + loadSubagentDefinitions(subagentAgentsDir()), + loadSubagentConfig(subagentConfigPath()), + ]); + + return { + definitions, + maxConcurrency: config.maxConcurrency, + agentDir: options.agentDir, + createSettingsManager: () => createBrunchSettingsManager(options.cwd, options.agentDir), + resourceLoaderOptions: brunchResourceLoaderOptions([]), + }; +}