Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@
# Observability (optional)
# AGENT_VAULT_LOG_LEVEL=info # info (default) | debug — debug emits one line per proxied request (no secret values)

# Request-log retention (optional) — controls the per-vault audit log of proxied requests.
# Bodies and query strings are never stored; only method/host/path/status/latency metadata.
# AGENT_VAULT_LOGS_MAX_AGE_HOURS=168 # Drop rows older than this (default 168 = 7 days).
# AGENT_VAULT_LOGS_MAX_ROWS_PER_VAULT=10000 # Keep at most this many rows per vault (default 10000).
# AGENT_VAULT_LOGS_RETENTION_LOCK=false # When true, ignore any owner UI overrides (operator pin).

# Rate limiting (optional) — tiered limits with sensible defaults.
# Profile: default | strict (≈0.5×) | loose (≈2×) | off (disable all limits).
# AGENT_VAULT_RATELIMIT_PROFILE=default
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Agent Vault takes a different approach: **Agent Vault never reveals vault-stored
- **Self-service access** - Agents discover available services at runtime and [propose access](https://docs.agent-vault.dev/learn/proposals) for anything missing. You review and approve in your browser with one click. Any service can be toggled on/off without losing its configuration — disabled services return `403 service_disabled` until re-enabled.
- **Encrypted at rest** - Credentials are encrypted with AES-256-GCM using a random data encryption key (DEK). An optional master password wraps the DEK via Argon2id — change the password without re-encrypting credentials. Passwordless mode available for PaaS deploys. [Learn more](https://docs.agent-vault.dev/learn/credentials)
- **Multi-user, multi-vault** - Role-based access control with instance and vault-level [permissions](https://docs.agent-vault.dev/learn/permissions). Invite teammates, scope agents to specific [vaults](https://docs.agent-vault.dev/learn/vaults), and audit everything.
- **Request logs** - Every proxied request is persisted per vault with method, host, path, status, latency, and the credential key names involved — never bodies, headers, or query strings. View them in the **Logs** tab. Retention defaults to 7 days / 10k rows per vault and is owner-tunable.

<p align="center">
<img src="docs/images/architecture.png" alt="Agent Vault architecture diagram" />
Expand Down
27 changes: 27 additions & 0 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/Infisical/agent-vault/internal/notify"
"github.com/Infisical/agent-vault/internal/oauth"
"github.com/Infisical/agent-vault/internal/pidfile"
"github.com/Infisical/agent-vault/internal/requestlog"
"github.com/Infisical/agent-vault/internal/server"
"github.com/Infisical/agent-vault/internal/store"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -145,6 +146,8 @@ var serverCmd = &cobra.Command{
oauthProviders := loadOAuthProviders(baseURL)
srv := server.New(addr, db, masterKey.Key(), notifier, initialized, baseURL, oauthProviders, logger)
srv.SetSkills(skillCLI, skillHTTP)
shutdownLogs := attachLogSink(srv, db, logger)
defer shutdownLogs()
if err := attachMITMIfEnabled(srv, host, mitmPort, masterKey.Key()); err != nil {
return err
}
Expand Down Expand Up @@ -178,11 +181,33 @@ func attachMITMIfEnabled(srv *server.Server, host string, mitmPort int, masterKe
BaseURL: srv.BaseURL(),
Logger: srv.Logger(),
RateLimit: srv.RateLimit(),
LogSink: srv.LogSink(),
},
))
return nil
}

// attachLogSink wires the request-log pipeline: a SQLiteSink with async
// batching feeds persistent storage, and a retention goroutine trims old
// rows. Returns a shutdown function the caller runs after Start()
// returns to flush pending records and stop retention.
func attachLogSink(srv *server.Server, db *store.SQLiteStore, logger *slog.Logger) func() {
sink := requestlog.NewSQLiteSink(db, logger, requestlog.SQLiteSinkConfig{})
srv.AttachLogSink(sink)

retentionCtx, cancelRetention := context.WithCancel(context.Background())
go requestlog.RunRetention(retentionCtx, db, logger)

return func() {
cancelRetention()
flushCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := sink.Close(flushCtx); err != nil {
fmt.Fprintf(os.Stderr, "warning: request_log sink flush: %v\n", err)
}
}
}

// promptOwnerSetup interactively creates the owner account.
// masterPassword is optional — if provided, the admin password is checked against it.
func promptOwnerSetup(cmd *cobra.Command, db *store.SQLiteStore, masterPassword []byte) error {
Expand Down Expand Up @@ -438,6 +463,8 @@ func runDetachedChild(host, addr string, mitmPort int, logger *slog.Logger) erro
oauthProviders := loadOAuthProviders(baseURL)
srv := server.New(addr, db, key, notifier, initialized, baseURL, oauthProviders, logger)
srv.SetSkills(skillCLI, skillHTTP)
shutdownLogs := attachLogSink(srv, db, logger)
defer shutdownLogs()
if err := attachMITMIfEnabled(srv, host, mitmPort, key); err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions cmd/skill_cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ Key fields (JSON mode):

**Check status:** `GET {AGENT_VAULT_ADDR}/v1/proposals/{id}` with `Authorization: Bearer {AGENT_VAULT_SESSION_TOKEN}` -- returns `pending`, `applied`, `rejected`, or `expired`

## Request Logs

Agent Vault keeps a per-vault audit log of proxied requests (method, host, path, status, latency -- never bodies or query strings). The CLI does not wrap this yet; fetch via the HTTP API: `GET {AGENT_VAULT_ADDR}/v1/vaults/{vault}/logs` with `Authorization: Bearer {AGENT_VAULT_SESSION_TOKEN}`. Requires vault `member` or `admin` role. See `skill_http.md` for query params.

## Building Code That Needs Credentials

When you are writing or modifying application code that requires secrets or API keys (e.g. `process.env.STRIPE_KEY`, `os.Getenv("DB_PASSWORD")`), use Agent Vault to ensure those credentials are tracked and available.
Expand Down
11 changes: 11 additions & 0 deletions cmd/skill_http.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,17 @@ Key fields:

**List proposals:** `GET {AGENT_VAULT_ADDR}/v1/proposals?status=pending`

## Request Logs

Agent Vault persists a per-request audit log for each vault (method, host, path, status, latency, matched service, credential key names -- **never** bodies or query strings). Useful for debugging "did the request go through?" and inspecting traffic patterns. Requires vault `member` or `admin` role.

```
GET {AGENT_VAULT_ADDR}/v1/vaults/{vault}/logs
Authorization: Bearer {AGENT_VAULT_SESSION_TOKEN}
```

Query params: `ingress` (`explicit`|`mitm`), `status_bucket` (`2xx`|`3xx`|`4xx`|`5xx`|`err`), `service`, `limit` (default 50, max 200), `before=<id>` (page back), `after=<id>` (tail forward for new rows). Response: `{ "logs": [...], "next_cursor": <id|null>, "latest_id": <id> }`.

## Building Code That Needs Credentials

When you are writing or modifying application code that requires secrets or API keys (e.g. `process.env.STRIPE_KEY`, `os.Getenv("DB_PASSWORD")`), use Agent Vault to ensure those credentials are tracked and available.
Expand Down
3 changes: 3 additions & 0 deletions docs/reference/cli.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ description: "Complete reference for all Agent Vault CLI commands."
| `AGENT_VAULT_RATELIMIT_PROFILE` | Rate-limit profile: `default`, `strict`, `loose`, or `off`. Affects anonymous auth, token-redeem, proxy, authenticated CRUD, and the global in-flight / RPS ceilings. |
| `AGENT_VAULT_RATELIMIT_LOCK` | When `true`, the rate-limit section in the Manage Instance UI is read-only and UI overrides are ignored. Use when you want limits pinned to env vars on PaaS. |
| `AGENT_VAULT_RATELIMIT_<TIER>_<KNOB>` | Fine-grained per-tier overrides. `TIER` ∈ `AUTH`, `PROXY`, `AUTHED`, `GLOBAL`. `KNOB` ∈ `RATE`, `BURST`, `WINDOW`, `MAX`, `CONCURRENCY`. Env-set knobs always beat UI overrides. |
| `AGENT_VAULT_LOGS_MAX_AGE_HOURS` | Retention ceiling for the per-vault request log. Default `168` (7 days). Rows older than this are trimmed by a background job every 15 minutes. Only non-secret metadata is stored. |
| `AGENT_VAULT_LOGS_MAX_ROWS_PER_VAULT` | Per-vault row cap for the request log. Default `10000`. Whichever limit (age or rows) fills first wins. Set `0` to disable the cap. |
| `AGENT_VAULT_LOGS_RETENTION_LOCK` | When `true`, owner-UI overrides for log retention are ignored and env values (or defaults) are pinned. |
</Accordion>

<Accordion title="agent-vault server stop">
Expand Down
3 changes: 3 additions & 0 deletions docs/self-hosting/environment-variables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ description: "All environment variables used to configure Agent Vault"
| `AGENT_VAULT_RATELIMIT_PROFILE` | `default` | Rate-limit profile: `default`, `strict` (≈0.5× the defaults), `loose` (≈2×), or `off` (disable all limits). Affects every tier — anonymous auth, token-redeem, proxy, authenticated CRUD, global in-flight. Owners can override per-tier in **Manage Instance → Settings → Rate Limiting** unless `AGENT_VAULT_RATELIMIT_LOCK=true`. |
| `AGENT_VAULT_RATELIMIT_LOCK` | `false` | When `true`, the rate-limit UI in **Manage Instance** is read-only and UI overrides are ignored. Use on PaaS deployments (Fly.io, Cloud Run) when the operator wants limits pinned to env vars. |
| `AGENT_VAULT_RATELIMIT_<TIER>_<KNOB>` | — | Fine-grained per-tier overrides. `TIER` is one of `AUTH` (unauthenticated endpoints), `PROXY` (proxy + MITM), `AUTHED` (everything behind requireAuth), `GLOBAL` (server-wide backstop). `KNOB` is one of `RATE` (tokens/sec), `BURST` (bucket depth), `WINDOW` (duration like `5m`), `MAX` (sliding-window event cap), `CONCURRENCY` (semaphore slots). Env-set knobs always take precedence over UI overrides. |
| `AGENT_VAULT_LOGS_MAX_AGE_HOURS` | `168` | Retention for the per-vault request log (surfaced in **Vault → Logs**). Rows older than this many hours are trimmed by a background job every 15 minutes. Only secret-free metadata is stored (method, host, path, status, latency, matched service, credential key names) — never bodies or query strings. |
| `AGENT_VAULT_LOGS_MAX_ROWS_PER_VAULT` | `10000` | Per-vault row cap for the request log. Whichever limit (age or rows) hits first wins, so heavy-traffic vaults retain a shorter window than the time-based TTL alone would suggest. Set to `0` to disable the row cap. |
| `AGENT_VAULT_LOGS_RETENTION_LOCK` | `false` | When `true`, any owner-UI overrides for log retention are ignored and env values (or defaults) are pinned. Use when you want retention limits controlled only by the operator. |

Master password resolution order:

Expand Down
17 changes: 16 additions & 1 deletion internal/brokercore/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,27 @@ import (
"time"
)

// Ingress labels identify which entrypoint handled a proxied request.
// Persisted into request logs and filterable by the Logs UI, so a typo
// at any call site would silently desync filters from the real data.
const (
IngressExplicit = "explicit"
IngressMITM = "mitm"
)

// Actor types identify the principal behind a proxied request. Same
// reason for constants as the ingress labels above.
const (
ActorTypeUser = "user"
ActorTypeAgent = "agent"
)

// ProxyEvent is the shape of a single structured per-request log line
// emitted by both the explicit /proxy/ handler and the transparent MITM
// forward handler. It is intentionally shallow and contains only
// non-secret metadata — no header values, no bodies, no query strings.
type ProxyEvent struct {
Ingress string // "explicit" | "mitm"
Ingress string // one of IngressExplicit, IngressMITM
Method string // HTTP method from the agent request
Host string // target host (with port if present)
Path string // r.URL.Path only — no query, no fragment
Expand Down
21 changes: 19 additions & 2 deletions internal/mitm/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,31 @@ func mitmConnectIPKey(r *http.Request) string {
return "mitm:" + host
}

// isLoopbackPeer reports whether the HTTP request came from a loopback
// peer (127.0.0.0/8 or ::1). Used to skip the CONNECT flood gate for
// local `vault run` clients — a single agent legitimately opens dozens
// of CONNECTs (one per distinct upstream host) on startup, and a
// cooperating or higher-privilege local process can trivially DoS the
// proxy by other means regardless, so rate-limiting loopback only
// breaks legitimate agents without adding defense.
func isLoopbackPeer(r *http.Request) bool {
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil || host == "" {
host = r.RemoteAddr
}
ip := net.ParseIP(host)
return ip != nil && ip.IsLoopback()
}

// handleConnect terminates a CONNECT tunnel and serves HTTP/1.1 off the
// resulting TLS connection. The upstream target is taken from the
// CONNECT request line (r.Host) and captured in a closure so subsequent
// Host-header rewrites by the client cannot redirect the tunnel.
func (p *Proxy) handleConnect(w http.ResponseWriter, r *http.Request) {
// Gate before ParseProxyAuth + session lookup so a bad-auth flood
// can't burn CPU. Per-IP on the raw TCP peer.
if p.rateLimit != nil {
// can't burn CPU. Per-IP on the raw TCP peer, shared with the
// TierAuth budget. Loopback is exempt — see isLoopbackPeer.
if p.rateLimit != nil && !isLoopbackPeer(r) {
if d := p.rateLimit.Allow(ratelimit.TierAuth, mitmConnectIPKey(r)); !d.Allow {
ratelimit.WriteDenial(w, d, "Too many CONNECT attempts")
return
Expand Down
29 changes: 29 additions & 0 deletions internal/mitm/connect_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package mitm

import (
"net/http"
"testing"
)

func TestIsLoopbackPeer(t *testing.T) {
cases := []struct {
remote string
want bool
}{
{"127.0.0.1:54321", true},
{"127.1.2.3:54321", true},
{"[::1]:54321", true},
{"10.0.0.5:54321", false},
{"172.17.0.1:54321", false},
{"203.0.113.9:54321", false},
{"[2001:db8::1]:54321", false},
{"", false},
{"not-an-address", false},
}
for _, tc := range cases {
r := &http.Request{RemoteAddr: tc.remote}
if got := isLoopbackPeer(r); got != tc.want {
t.Errorf("isLoopbackPeer(%q) = %v, want %v", tc.remote, got, tc.want)
}
}
}
22 changes: 21 additions & 1 deletion internal/mitm/forward.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,24 @@ import (

"github.com/Infisical/agent-vault/internal/brokercore"
"github.com/Infisical/agent-vault/internal/ratelimit"
"github.com/Infisical/agent-vault/internal/requestlog"
)

// actorFromScope returns the (type, id) pair used in request log rows.
// Empty strings when neither principal is set on the scope.
func actorFromScope(scope *brokercore.ProxyScope) (string, string) {
if scope == nil {
return "", ""
}
if scope.UserID != "" {
return brokercore.ActorTypeUser, scope.UserID
}
if scope.AgentID != "" {
return brokercore.ActorTypeAgent, scope.AgentID
}
return "", ""
}

// forwardHandler returns an http.Handler that forwards each request to
// target (the host:port captured from the original CONNECT line). Using
// a closed-over target rather than r.Host defeats post-tunnel host
Expand All @@ -20,13 +36,17 @@ func (p *Proxy) forwardHandler(target, host string, scope *brokercore.ProxyScope
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
event := brokercore.ProxyEvent{
Ingress: "mitm",
Ingress: brokercore.IngressMITM,
Method: r.Method,
Host: target,
Path: r.URL.Path,
}
actorType, actorID := actorFromScope(scope)
emit := func(status int, errCode string) {
event.Emit(p.logger, start, status, errCode)
if p.logSink != nil {
p.logSink.Record(r.Context(), requestlog.FromEvent(event, scope.VaultID, actorType, actorID))
}
}

// Shares one budget with /proxy so switching ingress can't bypass.
Expand Down
8 changes: 8 additions & 0 deletions internal/mitm/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/Infisical/agent-vault/internal/ca"
"github.com/Infisical/agent-vault/internal/netguard"
"github.com/Infisical/agent-vault/internal/ratelimit"
"github.com/Infisical/agent-vault/internal/requestlog"
)

// Proxy is a transparent MITM proxy. It is safe to start at most once;
Expand All @@ -43,6 +44,7 @@ type Proxy struct {
baseURL string // externally-reachable control-plane URL for help links
logger *slog.Logger
rateLimit *ratelimit.Registry // shared with the HTTP server; nil = no-op
logSink requestlog.Sink // never nil (Nop default); shared with HTTP ingress
}

// Options carries the dependencies a Proxy needs. BaseURL is the
Expand All @@ -58,6 +60,7 @@ type Options struct {
BaseURL string
Logger *slog.Logger
RateLimit *ratelimit.Registry
LogSink requestlog.Sink // nil → Nop
}

// New builds a Proxy bound to addr. The returned Proxy does not begin
Expand All @@ -73,6 +76,10 @@ func New(addr string, opts Options) *Proxy {
ResponseHeaderTimeout: 30 * time.Second,
}

sink := opts.LogSink
if sink == nil {
sink = requestlog.Nop{}
}
p := &Proxy{
ca: opts.CA,
sessions: opts.Sessions,
Expand All @@ -81,6 +88,7 @@ func New(addr string, opts Options) *Proxy {
baseURL: opts.BaseURL,
logger: opts.Logger,
rateLimit: opts.RateLimit,
logSink: sink,
}

p.tlsConfig = &tls.Config{
Expand Down
Loading