Testing Guide
Abbyfile has three levels of testing: unit tests for individual packages, integration tests against built binaries, and MCP bridge tests for the protocol layer.
Unit Testing Tools
CLI Tools
CLI tool tests use real system commands. The Executor runs them as subprocesses:
func TestExecutor_Run_CLI(t *testing.T) {
exec := tools.NewExecutor(5*time.Second, nil)
def := tools.CLI("echo", "echo", "Echo text")
def.Args = []string{"hello"}
result, err := exec.Run(context.Background(), def, nil)
if err != nil {
t.Fatalf("Run() error: %v", err)
}
if result != "hello" {
t.Errorf("Run() = %q, want %q", result, "hello")
}
}
Test argument passing via the args input field:
func TestExecutor_Run_WithArgs(t *testing.T) {
exec := tools.NewExecutor(5*time.Second, nil)
def := tools.CLI("echo", "echo", "Echo text")
result, err := exec.Run(context.Background(), def, map[string]any{
"args": "hello world",
})
// result == "hello world"
}
Builtin Tools
Builtin tool handlers are plain functions. Test them directly:
func TestBuiltinTool(t *testing.T) {
exec := tools.NewExecutor(5*time.Second, nil)
def := tools.BuiltinTool("test", "A test tool", nil,
func(input map[string]any) (string, error) {
return "builtin result", nil
},
)
result, err := exec.Run(context.Background(), def, nil)
// result == "builtin result"
}
Timeout Testing
func TestExecutor_Run_Timeout(t *testing.T) {
exec := tools.NewExecutor(100*time.Millisecond, nil)
def := tools.CLI("sleep", "sleep", "Sleep")
def.Args = []string{"10"}
_, err := exec.Run(context.Background(), def, nil)
// err contains "timed out"
}
Input Validation
Test schema validation without running the tool:
func TestValidateInput(t *testing.T) {
def := &tools.Definition{
Name: "test_tool",
InputSchema: map[string]any{
"type": "object",
"required": []any{"key"},
"properties": map[string]any{
"key": map[string]any{"type": "string"},
},
},
}
// Missing required field
err := def.ValidateInput(map[string]any{})
// err: missing required field "key"
// Wrong type
err = def.ValidateInput(map[string]any{"key": 123.0})
// err: field "key": expected string, got number
}
See pkg/tools/validate_test.go for the full table-driven test suite.
Unit Testing Memory
Use t.TempDir() for isolated file stores:
func TestMemory(t *testing.T) {
dir := filepath.Join(t.TempDir(), "memory")
store, err := memory.NewFileStoreAt(dir, memory.Limits{})
if err != nil {
t.Fatal(err)
}
store.Write("greeting", "Hello, world!")
got, _ := store.Read("greeting")
// got == "Hello, world!"
}
Test limits enforcement:
func TestMemory_MaxKeys(t *testing.T) {
dir := filepath.Join(t.TempDir(), "memory")
store, _ := memory.NewFileStoreAt(dir, memory.Limits{MaxKeys: 2})
store.Write("a", "1")
store.Write("b", "2")
err := store.Write("c", "3")
// err: key count 3 would exceed limit of 2 keys
}
Test the Manager’s built-in tool handlers:
func TestManager_HandleWrite_Read(t *testing.T) {
store, _ := memory.NewFileStoreAt(t.TempDir(), memory.Limits{})
mgr := memory.NewManager(store)
memTools := mgr.Tools()
toolMap := make(map[string]*tools.Definition)
for _, tool := range memTools {
toolMap[tool.Name] = tool
}
// Call the write handler directly
result, err := toolMap["memory_write"].Handler(map[string]any{
"key": "test",
"value": "hello world",
})
// result: "Stored 11 bytes under key \"test\""
// Call the read handler directly
result, err = toolMap["memory_read"].Handler(map[string]any{"key": "test"})
// result: "hello world"
}
Unit Testing Prompts
Use t.Setenv("HOME", ...) to isolate override behavior:
func TestLoader_Override(t *testing.T) {
tmpHome := t.TempDir()
t.Setenv("HOME", tmpHome)
overrideDir := filepath.Join(tmpHome, ".abbyfile", "test-agent")
os.MkdirAll(overrideDir, 0o755)
os.WriteFile(
filepath.Join(overrideDir, "override.md"),
[]byte("Override prompt content"),
0o644,
)
loader := prompt.NewLoader("test-agent", testFS, "testdata/system.md")
got, _ := loader.Load()
// got == "Override prompt content"
}
MCP Bridge Testing
Use gomcp.NewInMemoryTransports() for in-process client/server testing:
func TestBridge(t *testing.T) {
registry := tools.NewRegistry()
registry.Register(tools.BuiltinTool("echo", "Echo input",
map[string]any{
"type": "object",
"properties": map[string]any{
"message": map[string]any{"type": "string"},
},
"required": []string{"message"},
},
func(input map[string]any) (string, error) {
msg, _ := input["message"].(string)
return "echo: " + msg, nil
},
))
bridge := mcp.NewBridge(mcp.BridgeConfig{
Name: "test-agent",
Version: "v0.1.0",
Registry: registry,
Executor: tools.NewExecutor(30*time.Second, nil),
Loader: loader,
})
serverTransport, clientTransport := gomcp.NewInMemoryTransports()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
go func() { bridge.ServeTransport(ctx, serverTransport) }()
client := gomcp.NewClient(&gomcp.Implementation{
Name: "test-client", Version: "v0.1.0",
}, nil)
session, _ := client.Connect(ctx, clientTransport, nil)
defer session.Close()
// List tools
listResult, _ := session.ListTools(ctx, nil)
// Verify tool count, names, annotations
// Call a tool
callResult, _ := session.CallTool(ctx, &gomcp.CallToolParams{
Name: "echo",
Arguments: map[string]any{"message": "hello"},
})
// Verify result content
}
Test memory resources, prompts, and error handling in the same pattern. See pkg/mcp/bridge_test.go for the full test suite.
Integration Testing
Integration tests build the binary and exercise all subcommands. They live in internal/integration/ and use the //go:build integration tag.
# Run integration tests
make integration
# Equivalent to:
go test -tags integration -race -count=1 -timeout 60s ./internal/integration/
Integration tests are not included in the normal go test ./... run.
Test Setup
The TestMain function builds the binary once for all tests:
//go:build integration
func TestMain(m *testing.M) {
tmp, _ := os.MkdirTemp("", "abbyfile-integration-*")
defer os.RemoveAll(tmp)
binaryPath = filepath.Join(tmp, "my-agent")
cmd := exec.Command("go", "build", "-o", binaryPath, "./cmd/my-agent")
cmd.Dir = findProjectRoot()
cmd.Run()
os.Exit(m.Run())
}
Important: Use cmd.Output(), not CombinedOutput()
Agent binaries log to stderr via slog. When parsing stdout output (like JSON from --describe), use cmd.Output() to get only stdout. CombinedOutput() mixes in log lines and corrupts the output.
func runAgentStdout(t *testing.T, args ...string) string {
cmd := exec.CommandContext(ctx, binaryPath, args...)
out, err := cmd.Output() // stdout only
// ...
}
MCP Integration with CommandTransport
func TestServeMCP(t *testing.T) {
cmd := exec.CommandContext(ctx, binaryPath, "serve-mcp")
client := gomcp.NewClient(&gomcp.Implementation{
Name: "integration-test", Version: "v0.1.0",
}, nil)
session, _ := client.Connect(ctx, &gomcp.CommandTransport{Command: cmd}, nil)
defer session.Close()
// List tools, call tools, verify responses
}
The make all Pipeline
make all
Runs four stages in order:
- fmtcheck – verify all files are
gofmt-formatted - vet –
go vet ./...for static analysis - test –
go test -race ./...(all unit tests) - build – build the abby CLI
Individual stages:
make fmt # auto-format all files
make fmtcheck # check formatting (CI-friendly, fails on unformatted)
make vet # static analysis
make test # unit tests with race detector
make build # build all binaries
make integration # integration tests (builds binary first)
make clean # remove built binaries
CI Setup Patterns
A typical CI workflow:
steps:
- uses: actions/setup-go@v5
with:
go-version: '1.24'
- name: Lint and test
run: make all
- name: Integration tests
run: make integration
Key considerations:
- Always use
-raceflag (already set in the Makefile) - Integration tests need a working Go toolchain (they
go buildthe binary) - Memory tests are isolated via
t.TempDir()– no filesystem cleanup needed
Plugin Testing
Plugin generation is tested at two levels:
Unit Tests (pkg/plugin/plugin_test.go)
Test the plugin.Generate() function with a fake binary and in-memory definitions:
func TestGenerate_DirectoryStructure(t *testing.T) {
tmp := t.TempDir()
binaryPath := filepath.Join(tmp, "my-agent")
os.WriteFile(binaryPath, []byte("#!/bin/sh\necho hello"), 0o755)
def := &definition.AgentDef{
Name: "my-agent",
Version: "1.0.0",
}
skills := []plugin.SkillFile{
{Name: "review-pr", Description: "Review a PR", Content: "Review content."},
}
outputDir := filepath.Join(tmp, "build")
os.MkdirAll(outputDir, 0o755)
err := plugin.Generate(def, skills, plugin.GenerateConfig{
OutputDir: outputDir,
BinaryPath: binaryPath,
})
// Verify: .claude-plugin/plugin.json, .mcp.json, binary, skills/review-pr/SKILL.md
}
Tests cover directory structure, plugin.json content, .mcp.json content, SKILL.md frontmatter + body, no-skills case, and binary permissions.
Integration Tests (internal/integration/plugin_test.go)
End-to-end test that builds a real agent with --plugin and verifies the full output:
func TestBuildPlugin(t *testing.T) {
// Create Abbyfile, agent .md with skills, skill .md files
// Run: abby build --plugin
// Verify: plugin directory structure, plugin.json, .mcp.json, SKILL.md content, binary executable
}
Run with make integration.
Distribution Testing
The distribution layer (install, uninstall, list, publish) has integration tests in internal/integration/distribution_test.go:
func TestList(t *testing.T) {
// Verifies list shows "No agents installed." with empty registry.
// Uses isolated HOME via t.TempDir().
}
func TestInstallLocalWithRegistry(t *testing.T) {
// Installs a binary from ./build/, verifies:
// - Binary copied to .abbyfile/bin/
// - Registry entry created with source="local" and correct version
// - abby list shows the agent
}
func TestUninstall(t *testing.T) {
// Installs, then uninstalls. Verifies:
// - Binary removed
// - Registry cleaned up
// - List shows empty again
}
func TestPublishDryRun(t *testing.T) {
// Runs publish --dry-run. Verifies:
// - Cross-compiled binaries created for all 4 targets
// - No GitHub Release created
}
Distribution tests use HOME override for registry isolation and reuse the test agent binary built by TestMain.
Unit Testing the Registry
func TestSaveAndLoad(t *testing.T) {
path := filepath.Join(t.TempDir(), "registry.json")
r, _ := registry.Load(path)
r.Set(registry.Entry{
Name: "test", Source: "local", Version: "1.0.0",
Path: "/bin/test", Scope: "local",
})
r.Save()
r2, _ := registry.Load(path)
e, ok := r2.Get("test")
// ok == true, e.Version == "1.0.0"
}
Unit Testing the GitHub Client
Tests use httptest to mock the GitHub API:
func TestGetRelease(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(github.Release{
TagName: "myagent/v1.0.0",
Assets: []github.Asset,
})
}))
defer srv.Close()
c := &github.Client{HTTPClient: srv.Client(), BaseURL: srv.URL}
ref := github.ReleaseRef{Owner: "owner", Repo: "repo", Agent: "myagent", Version: "1.0.0"}
release, _ := c.GetRelease(context.Background(), ref)
// release.TagName == "myagent/v1.0.0"
}
Test Count
The project currently has 110+ passing tests across all packages:
pkg/agent -- agent creation, options, defaults
pkg/tools -- registry, executor, validation
pkg/memory -- file store, limits, concurrency, manager tools
pkg/prompt -- loader, override, paths
pkg/builtins -- builtin tool implementations
pkg/definition -- Abbyfile + agent .md parsing (including skills)
pkg/builder -- code generation templates
pkg/mcp -- bridge, tools, annotations, resources, prompts
pkg/plugin -- plugin directory generation
pkg/registry -- installed agents tracking, atomic save/load
pkg/github -- GitHub Releases client, version comparison, ref parsing
internal/cli -- root command, validate, flags
Plus integration tests that exercise the full binary end-to-end, including distribution commands (list, install, uninstall, publish –dry-run).