-
Notifications
You must be signed in to change notification settings - Fork 1
feat: add --sandbox=container mode to vault run #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
eab09d8
cb51669
bad1316
b170d46
0b377ce
c5b5467
23caa16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,171 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "net/url" | ||
| "os" | ||
| "os/exec" | ||
| "runtime" | ||
| "strconv" | ||
| "syscall" | ||
| "time" | ||
|
|
||
| "github.com/spf13/cobra" | ||
| "golang.org/x/term" | ||
|
|
||
| "github.com/Infisical/agent-vault/internal/sandbox" | ||
| ) | ||
|
|
||
| // containerOnlyFlags are no-ops in process mode; we reject them explicitly | ||
| // rather than silently ignoring them, which would be a foot-gun. | ||
| var containerOnlyFlags = []string{"image", "mount", "keep", "no-firewall", "home-volume-shared"} | ||
|
|
||
| func validateSandboxFlagConflicts(cmd *cobra.Command, mode SandboxMode) error { | ||
| if mode == SandboxContainer { | ||
| return nil | ||
| } | ||
| for _, name := range containerOnlyFlags { | ||
| f := cmd.Flags().Lookup(name) | ||
| if f == nil { | ||
| continue | ||
| } | ||
| if f.Changed { | ||
| return fmt.Errorf("--%s requires --sandbox=container", name) | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // runContainer launches the target agent inside a Docker container with | ||
| // egress locked to the agent-vault proxy via iptables. | ||
| func runContainer(cmd *cobra.Command, args []string, scopedToken, addr, vault string) error { | ||
| if runtime.GOOS != "linux" && runtime.GOOS != "darwin" { | ||
| return fmt.Errorf("--sandbox=container: only linux and darwin are supported in v1 (got %s)", runtime.GOOS) | ||
| } | ||
| if _, err := exec.LookPath("docker"); err != nil { | ||
| return errors.New("--sandbox=container: `docker` not found in PATH") | ||
| } | ||
|
|
||
| ctx := cmd.Context() | ||
| if ctx == nil { | ||
| ctx = context.Background() | ||
| } | ||
|
|
||
| // Housekeeping: trim old CA tempfiles and networks from crashed runs | ||
| // before we create new ones. Both are best-effort. | ||
| sandbox.PruneHostCAFiles() | ||
| _ = sandbox.PruneStaleNetworks(ctx, sandbox.DefaultPruneGrace) | ||
|
|
||
| // Pull the MITM CA from the server. Container mode always routes | ||
| // through MITM — --no-mitm is a process-mode-only escape hatch. | ||
| pem, mitmPort, mitmEnabled, mitmTLS, err := fetchMITMCA(addr) | ||
| if err != nil { | ||
| return fmt.Errorf("fetch MITM CA: %w", err) | ||
| } | ||
| if !mitmEnabled { | ||
| return errors.New("--sandbox=container requires the MITM proxy; server has it disabled") | ||
| } | ||
| if mitmPort == 0 { | ||
| mitmPort = DefaultMITMPort | ||
| } | ||
|
|
||
| // Upstream agent-vault HTTP port for the forwarder. Parsed from | ||
| // --address / session address, with DefaultPort as a fallback. | ||
| upstreamHTTPPort := DefaultPort | ||
| if u, perr := url.Parse(addr); perr == nil { | ||
| if p, cerr := strconv.Atoi(u.Port()); cerr == nil && p > 0 { | ||
| upstreamHTTPPort = p | ||
| } | ||
| } | ||
|
|
||
| sessionID, err := sandbox.NewSessionID() | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| hostCAPath, err := sandbox.WriteHostCAFile(pem, sessionID) | ||
| if err != nil { | ||
| return fmt.Errorf("write CA: %w", err) | ||
| } | ||
|
|
||
| network, err := sandbox.CreatePerInvocationNetwork(ctx, sessionID) | ||
| if err != nil { | ||
| return fmt.Errorf("create docker network: %w", err) | ||
| } | ||
| defer func() { | ||
| // Only runs on error arms — syscall.Exec below replaces the | ||
| // process, bypassing defers. Use a detached context so a | ||
| // parent ctx cancel doesn't skip the cleanup exec itself. | ||
| cleanup, cancel := context.WithTimeout(context.Background(), 5*time.Second) | ||
| defer cancel() | ||
| _ = sandbox.RemoveNetwork(cleanup, network.Name) | ||
| }() | ||
|
|
||
| bindIP := sandbox.HostBindIP(network) | ||
| if bindIP == nil { | ||
| return errors.New("could not determine host bind IP for forwarder") | ||
| } | ||
|
|
||
| fwd, err := sandbox.StartForwarder(ctx, bindIP, upstreamHTTPPort, mitmPort) | ||
| if err != nil { | ||
| return fmt.Errorf("start forwarder: %w", err) | ||
| } | ||
| defer func() { _ = fwd.Close() }() | ||
|
|
||
| image, _ := cmd.Flags().GetString("image") | ||
| imageRef, err := sandbox.EnsureImage(ctx, image, os.Stderr) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| workDir, err := os.Getwd() | ||
| if err != nil { | ||
| return fmt.Errorf("getwd: %w", err) | ||
| } | ||
|
|
||
|
Comment on lines
+146
to
+150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 The current working directory is bind-mounted read-write at /workspace without the same ~/.agent-vault protection applied to user-supplied --mount flags. If a user runs vault run --sandbox=container while CWD is inside ~/.agent-vault (or a symlink resolving there), the vault directory — containing the encrypted credential database, the MITM CA private key, and session tokens — is exposed read-write to the container. The fix is to call validateHostSrc(workDir, home) on the os.Getwd() result before passing it to BuildRunArgs, mirroring the protection already applied to --mount entries. Extended reasoning...The bug In cmd/run_container.go (lines 123–138), workDir is obtained via os.Getwd() and passed directly as Config.WorkDir to sandbox.BuildRunArgs. Inside BuildRunArgs (docker.go:103), it is added unconditionally as -v cfg.WorkDir:/workspace with no path validation whatsoever. Existing protection is asymmetric User-supplied --mount values flow through parseAndValidateMount → validateHostSrc (docker.go:152–175), which calls filepath.EvalSymlinks to resolve symlinks and then checks whether the resolved path equals or is nested under ~/.agent-vault. The workDir path skips this check entirely. The protection intent is explicit and deliberate in the --mount path; its absence on the workspace mount is an oversight. Step-by-step proof
The same scenario triggers if CWD is any subdirectory of ~/.agent-vault, or if a symlink in a normal-looking path resolves to somewhere under ~/.agent-vault. Impact In passwordless mode (DEK stored in plaintext — the documented default for local/PaaS use), the CA private key stored under ~/.agent-vault/ca/ is directly readable and the database is decryptable. In password-protected mode, write access allows an attacker to replace the CA key so future MITM intercepts use an attacker-controlled key. Both scenarios violate the core sandbox guarantee. The iptables egress lock is not relevant here — the attacker reads/writes the host filesystem via the bind mount, not the network. Fix Before passing workDir to BuildRunArgs, call the already-existing validateHostSrc (or an exported wrapper) with the os.UserHomeDir() result. This brings the workspace mount in line with the protection already applied to user-supplied mounts. Alternatively, BuildRunArgs itself could apply the check when cfg.WorkDir is set, since it already calls os.UserHomeDir() for the --mount path. |
||
| env := sandbox.BuildContainerEnv(scopedToken, vault, fwd.HTTPPort, fwd.MITMPort, mitmTLS) | ||
|
|
||
| mounts, _ := cmd.Flags().GetStringArray("mount") | ||
| keep, _ := cmd.Flags().GetBool("keep") | ||
| noFirewall, _ := cmd.Flags().GetBool("no-firewall") | ||
| homeShared, _ := cmd.Flags().GetBool("home-volume-shared") | ||
|
|
||
| dockerArgs, err := sandbox.BuildRunArgs(sandbox.Config{ | ||
| ImageRef: imageRef, | ||
| SessionID: sessionID, | ||
| WorkDir: workDir, | ||
| HostCAPath: hostCAPath, | ||
| NetworkName: network.Name, | ||
| AttachTTY: term.IsTerminal(int(os.Stdin.Fd())), | ||
| Keep: keep, | ||
| NoFirewall: noFirewall, | ||
| HomeVolumeShared: homeShared, | ||
| Mounts: mounts, | ||
| Env: env, | ||
| CommandArgs: args, | ||
| }) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| dockerBin, err := exec.LookPath("docker") | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if noFirewall { | ||
| fmt.Fprintln(os.Stderr, "agent-vault: WARNING --no-firewall active, container egress is unrestricted") | ||
| } | ||
| fmt.Fprintf(os.Stderr, "%s routing container HTTPS through MITM on %s:%d (container view: host.docker.internal:%d)\n", | ||
| successText("agent-vault:"), bindIP, fwd.MITMPort, fwd.MITMPort) | ||
| fmt.Fprintf(os.Stderr, "%s starting %s in sandbox (%s)...\n\n", | ||
| successText("agent-vault:"), boldText(args[0]), network.Name) | ||
|
|
||
| // Exec docker directly so the controlling TTY, SIGINT, SIGWINCH | ||
| // propagate naturally. Listeners are FD_CLOEXEC so they close at | ||
| // exec; per-conn forwarder goroutines die with the replaced process | ||
| // image. On success this never returns. | ||
| return syscall.Exec(dockerBin, append([]string{"docker"}, dockerArgs...), os.Environ()) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 The
--no-mitmflag is silently accepted and ignored when--sandbox=containeris used, even though container mode always routes through MITM and cannot bypass it. This is inconsistent with the explicit design principle stated in the adjacent code: container-only flags are rejected rather than silently ignored in process mode "rather than silently ignoring them, which would be a foot-gun" — the same principle should apply symmetrically for process-only flags in container mode.Extended reasoning...
What the bug is and how it manifests
validateSandboxFlagConflicts(cmd/run_container.go:25-38) returnsnilimmediately whenmode == SandboxContainer, skipping any validation of process-only flags. As a result,agent-vault vault run --no-mitm --sandbox=container -- claudeaccepts the flag without error or warning, even though--no-mitmhas absolutely zero effect in container mode — the container path always callsfetchMITMCAand always routes all traffic through the MITM proxy.The specific code path that triggers it
When the user passes
--no-mitm --sandbox=container,validateSandboxFlagConflictsis called at cmd/run.go:75, but returns immediately at theif mode == SandboxContainer { return nil }branch on run_container.go:27.runContainernever callscmd.Flags().GetBool("no-mitm")at any point — the flag is simply never consulted. The code even documents this on run_container.go:62: "Container mode always routes through MITM — --no-mitm is a process-mode-only escape hatch."Why existing code doesn't prevent it
The
containerOnlyFlagslist only enumerates flags that are container-only (image,mount,keep,no-firewall,home-volume-shared). There is no corresponding list of process-only flags (like--no-mitm) that should be rejected in container mode. The validation function is asymmetric by construction.Why the design principle demands symmetry
The comment on lines 21-23 explicitly states the governing rule: "containerOnlyFlags are no-ops in process mode; we reject them explicitly rather than silently ignoring them, which would be a foot-gun." This exact principle applies in reverse:
--no-mitmis a no-op in container mode. A user who passes it may reasonably believe the MITM proxy is bypassed — particularly because--no-mitmis a meaningful, effective escape hatch in process mode (it disables all HTTPS_PROXY injection entirely).Impact
No security regression: container mode enforces MITM at the iptables level regardless of what flags are passed, so the MITM is never actually bypassed. The impact is purely UX/correctness — a user who passes
--no-mitm --sandbox=containergets no feedback that their flag is a no-op, which contradicts the stated design principle and could mislead the operator about the sandbox's actual network behavior.Step-by-step proof
agent-vault vault run --no-mitm --sandbox=container -- claudeRunEresolvesmode = SandboxContainerand callsvalidateSandboxFlagConflicts(cmd, SandboxContainer)validateSandboxFlagConflictshits line 27:if mode == SandboxContainer { return nil }— returns immediately with no errorRunEproceeds torunContainer(cmd, args, ...)(cmd/run.go:101-103)runContainercallsfetchMITMCAunconditionally (line ~60) and routes all HTTPS through MITM —--no-mitmis never readHow to fix
Add a
processOnlyFlagslist (e.g.["no-mitm"]) and check it symmetrically insidevalidateSandboxFlagConflictswhenmode == SandboxContainer, returning an error such as"--no-mitm is not supported in container mode (MITM is always active)". Alternatively, emit afmt.Fprintf(os.Stderr, "warning: --no-mitm has no effect in container mode")instead of a hard error, matching the pattern used by--no-firewall.