Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions cmd/exit_code_error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package cmd

import "fmt"

// ExitCodeError carries a specific process exit code through the Cobra
// RunE return path. Execute() unwraps it and calls os.Exit(Code) so
// wrapped subprocesses (e.g. the sandbox container) can propagate their
// real status to the shell without losing deferred cleanups — returning
// the error lets defers inside the command body run before the process
// exits.
type ExitCodeError struct {
Code int
}

func (e *ExitCodeError) Error() string {
return fmt.Sprintf("exited with status %d", e.Code)
}
26 changes: 26 additions & 0 deletions cmd/exit_code_error_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package cmd

import (
"errors"
"fmt"
"testing"
)

func TestExitCodeError_Error(t *testing.T) {
e := &ExitCodeError{Code: 42}
if got := e.Error(); got != "exited with status 42" {
t.Errorf("Error() = %q, want %q", got, "exited with status 42")
}
}

func TestExitCodeError_ErrorsAs(t *testing.T) {
inner := &ExitCodeError{Code: 7}
wrapped := fmt.Errorf("runContainer: %w", inner)
var got *ExitCodeError
if !errors.As(wrapped, &got) {
t.Fatal("errors.As failed to unwrap ExitCodeError through fmt.Errorf wrap")
}
if got.Code != 7 {
t.Errorf("unwrapped Code = %d, want 7", got.Code)
}
}
16 changes: 13 additions & 3 deletions cmd/root.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cmd

import (
"errors"
"fmt"
"os"

Expand Down Expand Up @@ -28,8 +29,17 @@ func init() {
}

func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, errorText(err.Error()))
os.Exit(1)
err := rootCmd.Execute()
if err == nil {
return
}
// An ExitCodeError carries the wrapped subprocess's exit status —
// propagate it verbatim. The subprocess already wrote to stderr;
// we stay quiet and exit with its code.
var ece *ExitCodeError
if errors.As(err, &ece) {
os.Exit(ece.Code)
}
fmt.Fprintln(os.Stderr, errorText(err.Error()))
os.Exit(1)
}
21 changes: 16 additions & 5 deletions cmd/run_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@
var hostAgentDir string
var hostUID, hostGID int
if shareAgentDir {
// Running as root on Linux would remap the in-container claude
// user to uid 0, combining with --cap-add NET_ADMIN/NET_RAW/
// SETUID/SETGID/KILL to give the agent a much larger blast
// radius than a normal user. --security-opt=no-new-privileges
// doesn't undo ambient caps on uid 0. Reject.
if runtime.GOOS == "linux" && os.Getuid() == 0 {
return errors.New("--share-agent-dir: refusing to map the in-container user to root; re-run agent-vault as a non-root user")
}
userHome, herr := os.UserHomeDir()
if herr != nil {
return fmt.Errorf("--share-agent-dir: resolve home dir: %w", herr)
Expand Down Expand Up @@ -264,15 +272,18 @@
child.Stdout = os.Stdout
child.Stderr = os.Stderr
err = child.Run()
// Exit-code propagation via fmt.Errorf would collapse everything to
// Cobra's generic exit 1. Return the ExitError unchanged so a caller
// wrapping us can inspect it; for now we also lose the exact code to
// keep defers (network teardown, signal.Stop) intact.
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
return fmt.Errorf("sandbox container exited with status %d", exitErr.ExitCode())
// Return an ExitCodeError so defers (network teardown,
// signal.Stop, forwarder close) run before Execute() exits
// with the container's actual status. Silence cobra's own
// printing for this path — the container already wrote
// whatever it had to say to stderr, and our sentinel's
// empty Msg keeps Execute() quiet too.
cmd.SilenceErrors = true
return &ExitCodeError{Code: exitErr.ExitCode()}
}

Check failure on line 286 in cmd/run_container.go

View check run for this annotation

Claude / Claude Code Review

Cobra dumps usage block on every non-zero container exit (missing SilenceUsage)

On the ExitCodeError path at `cmd/run_container.go:284`, the code sets `cmd.SilenceErrors = true` but not `cmd.SilenceUsage`. Cobra v1.10.2 gates the `UsageString()` print independently of `SilenceErrors`, so every non-zero container exit (e.g. `vault run -- pytest` returning 1/2/5, or the PR's own `exit 42` repro) will dump the full `Usage: agent-vault vault run [flags] -- <command> [args...]` help block to stderr before exiting with the right code — directly defeating the PR's stated CI motiva
Comment thread
dangtony98 marked this conversation as resolved.
return fmt.Errorf("docker run: %w", err)
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion internal/sandbox/assets/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ FROM node:22-bookworm-slim

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
iptables iproute2 ca-certificates curl git python3 gosu \
iptables iproute2 ca-certificates curl git python3 gosu iputils-ping \
&& rm -rf /var/lib/apt/lists/*

RUN npm install -g @anthropic-ai/claude-code
Expand Down
9 changes: 8 additions & 1 deletion internal/sandbox/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,19 @@

// reservedContainerDsts are bind-mount destinations agent-vault owns.
// A user --mount landing on one of these would silently replace our
// own mount and undo the sandbox guarantees.
// own mount and undo the sandbox guarantees. The entrypoint + firewall
// scripts are the image's trust path — overwriting either pre-entrypoint
// would be a direct break-out.
var reservedContainerDsts = []string{
"/",
"/etc",
"/workspace",
"/usr/local/sbin/init-firewall.sh",
"/usr/local/sbin/entrypoint.sh",
ContainerCAPath,
ContainerClaudeHome,
ContainerClaudeConfig,
}

Check failure on line 51 in internal/sandbox/docker.go

View check run for this annotation

Claude / Claude Code Review

Reserved-path mount check is bypassable via parent-directory mount

validateContainerDst at internal/sandbox/docker.go:276-283 only rejects mounts on a reserved path or its descendants — not on its ancestors. A user can run `agent-vault run --sandbox=container --mount /tmp/evil:/usr/local/sbin -- claude` to shadow the image's baked-in `/usr/local/sbin/entrypoint.sh` (and `init-firewall.sh`) with attacker-controlled scripts that ENTRYPOINT then runs as PID 1 (root) with NET_ADMIN/NET_RAW/SETUID/SETGID/KILL caps **before** init-firewall.sh installs the iptables eg
Comment thread
dangtony98 marked this conversation as resolved.

// BuildRunArgs produces the argv for `docker run …`. Pure apart from
// os.UserHomeDir + filepath.EvalSymlinks on user --mount sources.
Expand Down
22 changes: 21 additions & 1 deletion internal/sandbox/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,27 @@ func TestBuildRunArgs_RejectsCWDInsideAgentVaultDir(t *testing.T) {
func TestParseAndValidateMount_RejectReservedContainerDst(t *testing.T) {
tmp := t.TempDir()
home := t.TempDir()
for _, dst := range []string{"/workspace", "/workspace/sub", ContainerCAPath, "/home/claude/.claude", "/home/claude/.claude/x"} {
for _, dst := range []string{
"/",
"/workspace",
"/workspace/sub",
ContainerCAPath,
"/home/claude/.claude",
"/home/claude/.claude/x",
// ContainerClaudeConfig: --share-agent-dir bind-mounts ~/.claude.json
// here; a user --mount overriding it would replace real config
// with attacker-controlled content.
ContainerClaudeConfig,
// Entrypoint + firewall scripts: overwriting either pre-entrypoint
// replaces the trusted setup with attacker code run as root.
"/usr/local/sbin/init-firewall.sh",
"/usr/local/sbin/entrypoint.sh",
// /etc subtree is reserved wholesale; ContainerCAPath sits under it
// and we also don't want a user overwriting /etc/passwd, /etc/shadow,
// etc. pre-privilege-drop.
"/etc",
"/etc/passwd",
} {
t.Run(dst, func(t *testing.T) {
_, err := parseAndValidateMount(tmp+":"+dst, home)
if err == nil || !strings.Contains(err.Error(), "reserved") {
Expand Down
2 changes: 1 addition & 1 deletion internal/sandbox/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestAssetsHash_Format(t *testing.T) {
// Treat a diff on this constant as intentional. Update when changing
// Dockerfile / init-firewall.sh / entrypoint.sh.
func TestAssetsHash_Stable(t *testing.T) {
const want = "38b7b314cdf5"
const want = "94a43a01c1e0"
got, err := assetsHash()
if err != nil {
t.Fatalf("assetsHash: %v", err)
Expand Down
102 changes: 79 additions & 23 deletions internal/sandbox/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,22 +140,11 @@ func TestIntegration_ImageBuildCaches(t *testing.T) {
}
}

// TestIntegration_EgressBlockedEndToEnd is the big-hammer test: builds
// the image, runs a container on a per-invocation network with the
// forwarder up, and asserts curl to an arbitrary external IP fails
// while curl to the forwarder succeeds.
//
// This is slow (~60s cold, ~5s warm) and the most expensive integration
// in the suite. Skip in short mode.
func TestIntegration_EgressBlockedEndToEnd(t *testing.T) {
if testing.Short() {
t.Skip("skip in -short mode; builds the sandbox image")
}
requireDocker(t)

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
defer cancel()

// runInFirewalledContainer runs a one-off bash command inside the sandbox
// image on a per-invocation network with init-firewall.sh already applied.
// The network is cleaned up on test exit.
func runInFirewalledContainer(t *testing.T, ctx context.Context, shellCmd string) ([]byte, error) {
t.Helper()
imageRef, err := EnsureImage(ctx, "", io.Discard)
if err != nil {
t.Fatalf("EnsureImage: %v", err)
Expand All @@ -165,12 +154,8 @@ func TestIntegration_EgressBlockedEndToEnd(t *testing.T) {
if err != nil {
t.Fatalf("CreatePerInvocationNetwork: %v", err)
}
defer cleanupNetwork(t, n.Name)

// Try to curl a routable external IP. iptables DROP should cause
// the TCP SYN to be discarded; curl exits non-zero after the short
// --max-time.
out, err := exec.CommandContext(ctx, "docker", "run", "--rm",
t.Cleanup(func() { cleanupNetwork(t, n.Name) })
return exec.CommandContext(ctx, "docker", "run", "--rm",
"--network", n.Name,
"--cap-drop=ALL", "--cap-add=NET_ADMIN", "--cap-add=NET_RAW",
"--security-opt=no-new-privileges",
Expand All @@ -180,8 +165,24 @@ func TestIntegration_EgressBlockedEndToEnd(t *testing.T) {
"--entrypoint", "/bin/bash",
imageRef,
"-c",
"/usr/local/sbin/init-firewall.sh && curl --max-time 3 -fsS https://1.1.1.1 && echo SHOULD_NOT_REACH || echo BLOCKED",
"/usr/local/sbin/init-firewall.sh >/dev/null 2>&1 && "+shellCmd,
).CombinedOutput()
}

// TestIntegration_EgressBlockedEndToEnd is the big-hammer test: it also
// proves init-firewall.sh is actually executed (as opposed to
// runInFirewalledContainer's asserted-on-exit behavior) — curl to a
// routable IPv4 literal must fail because iptables DROPs the SYN.
func TestIntegration_EgressBlockedEndToEnd(t *testing.T) {
if testing.Short() {
t.Skip("skip in -short mode; builds the sandbox image")
}
requireDocker(t)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()

out, err := runInFirewalledContainer(t, ctx,
"curl --max-time 3 -fsS https://1.1.1.1 && echo SHOULD_NOT_REACH || echo BLOCKED")
if err != nil {
t.Fatalf("docker run: %v\n%s", err, string(out))
}
Expand All @@ -193,6 +194,61 @@ func TestIntegration_EgressBlockedEndToEnd(t *testing.T) {
}
}

// TestIntegration_EgressBlocked_Bypasses is the "bypasses the threat
// model actually cares about" suite. A non-cooperative sandbox has to
// block malicious escape attempts, not just well-behaved clients — each
// case probes a different channel a compromised agent might try.
//
// Each probe prints either REACHED (bypass worked; test fails) or
// BLOCKED (firewall held; test passes).
func TestIntegration_EgressBlocked_Bypasses(t *testing.T) {
if testing.Short() {
t.Skip("skip in -short mode; builds the sandbox image")
}
requireDocker(t)

// Python-based UDP probe: sendto may succeed locally (iptables drops
// silently), but recvfrom must time out. A functioning bypass would
// receive the DNS reply and print REACHED.
const udpProbe = `python3 -c 'import socket; s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM); ` +
`s.settimeout(2); ` +
`s.sendto(b"\x00\x00\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07example\x03com\x00\x00\x01\x00\x01",("8.8.8.8",53)); ` +
`print("REACHED" if s.recvfrom(4096) else "BLOCKED")' 2>&1 || echo BLOCKED`

cases := []struct {
name, cmd, reason string
}{
{"IPv6Literal",
"curl --max-time 3 -fsS 'https://[2606:4700:4700::1111]' >/dev/null && echo REACHED || echo BLOCKED",
"IPv6 egress dropped by ip6tables"},
{"UDP",
udpProbe,
"UDP dropped by iptables OUTPUT policy"},
{"ICMP",
"ping -c1 -W1 1.1.1.1 >/dev/null 2>&1 && echo REACHED || echo BLOCKED",
"ICMP has no OUTPUT ACCEPT rule"},
{"ExplicitNoProxy",
"curl --max-time 3 -fsS --noproxy '*' https://example.com >/dev/null && echo REACHED || echo BLOCKED",
"--noproxy bypass must still hit kernel-level block"},
{"ProxyEnvStripped",
"env -u HTTPS_PROXY -u https_proxy curl --max-time 3 -fsS https://example.com >/dev/null && echo REACHED || echo BLOCKED",
"env-stripped bypass must still hit kernel-level block"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
out, err := runInFirewalledContainer(t, ctx, tc.cmd)
if err != nil {
t.Fatalf("docker run: %v\n%s", err, string(out))
}
if !strings.Contains(string(out), "BLOCKED") || strings.Contains(string(out), "REACHED") {
t.Errorf("expected BLOCKED (%s), got:\n%s", tc.reason, string(out))
}
})
}
}

// TestIntegration_EntrypointDropsToClaudeUser proves that gosu actually
// drops privileges under --security-opt=no-new-privileges. If this
// breaks, every "container runs as claude, not root" claim in the
Expand Down