From cacf24f4ada1ab08f7ed3efae24d43c2c810e93f Mon Sep 17 00:00:00 2001 From: Sanskarzz Date: Mon, 27 Apr 2026 23:52:02 +0530 Subject: [PATCH 1/4] Configure rate limits on VirtualMCPServer Signed-off-by: Sanskarzz --- .../api/v1beta1/mcpserver_types.go | 54 ++- .../api/v1beta1/mcpserver_types_test.go | 45 +++ .../api/v1beta1/virtualmcpserver_types.go | 9 + .../api/v1beta1/zz_generated.deepcopy.go | 17 +- .../virtualmcpserver_vmcpconfig_test.go | 29 ++ cmd/thv-operator/pkg/vmcpconfig/converter.go | 1 + .../pkg/vmcpconfig/converter_test.go | 36 ++ ...irtualmcpserver_sessionstorage_cel_test.go | 55 +++ .../toolhive.stacklok.dev_mcpservers.yaml | 128 +++++- ...olhive.stacklok.dev_virtualmcpservers.yaml | 364 ++++++++++++++++++ .../toolhive.stacklok.dev_mcpservers.yaml | 128 +++++- ...olhive.stacklok.dev_virtualmcpservers.yaml | 364 ++++++++++++++++++ docs/operator/crd-api.md | 48 ++- docs/server/docs.go | 6 + docs/server/swagger.json | 6 + docs/server/swagger.yaml | 4 + pkg/ratelimit/config/config.go | 147 +++++++ pkg/ratelimit/config/doc.go | 8 + pkg/ratelimit/limiter.go | 42 +- pkg/ratelimit/limiter_test.go | 172 ++++++++- pkg/ratelimit/middleware.go | 34 +- pkg/ratelimit/middleware_test.go | 51 ++- pkg/runner/middleware.go | 2 +- pkg/vmcp/cli/serve.go | 2 + pkg/vmcp/config/config.go | 9 + pkg/vmcp/config/zz_generated.deepcopy.go | 4 + pkg/vmcp/server/ratelimit.go | 72 ++++ pkg/vmcp/server/ratelimit_test.go | 76 ++++ pkg/vmcp/server/server.go | 184 +++++---- .../virtualmcp/virtualmcp_ratelimit_test.go | 248 ++++++++++++ 30 files changed, 2185 insertions(+), 160 deletions(-) create mode 100644 pkg/ratelimit/config/config.go create mode 100644 pkg/ratelimit/config/doc.go create mode 100644 pkg/vmcp/server/ratelimit.go create mode 100644 pkg/vmcp/server/ratelimit_test.go create mode 100644 test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go index 3a21eac58e..6738e6e134 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go @@ -7,6 +7,8 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + + rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" ) // Condition types for MCPServer @@ -505,11 +507,15 @@ type SessionStorageConfig struct { // RateLimitConfig defines rate limiting configuration for an MCP server. // At least one of shared, perUser, or tools must be configured. // -// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured" +// +kubebuilder:validation:XValidation:rule="has(self.global) || has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of global, shared, perUser, or tools must be configured" // //nolint:lll // CEL validation rules exceed line length limit type RateLimitConfig struct { - // Shared is a token bucket shared across all users for the entire server. + // Global is a token bucket shared across all users for the entire server. + // +optional + Global *RateLimitBucket `json:"global,omitempty"` + + // Shared is a deprecated alias for Global. Use global instead. // +optional Shared *RateLimitBucket `json:"shared,omitempty"` @@ -547,9 +553,9 @@ type RateLimitBucket struct { } // ToolRateLimitConfig defines rate limits for a specific tool. -// At least one of shared or perUser must be configured. +// At least one of global, shared, or perUser must be configured. // -// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser)",message="at least one of shared or perUser must be configured" +// +kubebuilder:validation:XValidation:rule="has(self.global) || has(self.shared) || has(self.perUser)",message="at least one of global, shared, or perUser must be configured" // //nolint:lll // kubebuilder marker exceeds line length type ToolRateLimitConfig struct { @@ -558,7 +564,11 @@ type ToolRateLimitConfig struct { // +kubebuilder:validation:MinLength=1 Name string `json:"name"` - // Shared token bucket for this specific tool. + // Global token bucket for this specific tool. + // +optional + Global *RateLimitBucket `json:"global,omitempty"` + + // Shared is a deprecated alias for Global. Use global instead. // +optional Shared *RateLimitBucket `json:"shared,omitempty"` @@ -567,6 +577,40 @@ type ToolRateLimitConfig struct { PerUser *RateLimitBucket `json:"perUser,omitempty"` } +// ToInternal converts the Kubernetes API rate limit config to the runtime-neutral representation. +func (in *RateLimitConfig) ToInternal() *rlconfig.Config { + if in == nil { + return nil + } + out := &rlconfig.Config{ + Global: rateLimitBucketToInternal(in.Global), + Shared: rateLimitBucketToInternal(in.Shared), + PerUser: rateLimitBucketToInternal(in.PerUser), + } + if len(in.Tools) > 0 { + out.Tools = make([]rlconfig.ToolConfig, 0, len(in.Tools)) + for _, tool := range in.Tools { + out.Tools = append(out.Tools, rlconfig.ToolConfig{ + Name: tool.Name, + Global: rateLimitBucketToInternal(tool.Global), + Shared: rateLimitBucketToInternal(tool.Shared), + PerUser: rateLimitBucketToInternal(tool.PerUser), + }) + } + } + return out +} + +func rateLimitBucketToInternal(in *RateLimitBucket) *rlconfig.Bucket { + if in == nil { + return nil + } + return &rlconfig.Bucket{ + MaxTokens: in.MaxTokens, + RefillPeriod: in.RefillPeriod, + } +} + // Permission profile types const ( // PermissionProfileTypeBuiltin is the type for built-in permission profiles diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go index 0e69f33836..25506fb4dd 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go @@ -75,6 +75,13 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) { input RateLimitConfig wantJSON string }{ + { + name: "global only", + input: RateLimitConfig{ + Global: &RateLimitBucket{MaxTokens: 100, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + }, + wantJSON: `{"global":{"maxTokens":100,"refillPeriod":"1m0s"}}`, + }, { name: "shared only", input: RateLimitConfig{ @@ -116,6 +123,44 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) { } } +func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) { + t.Parallel() + + spec := VirtualMCPServerSpec{ + IncomingAuth: &IncomingAuthConfig{Type: "oidc"}, + GroupRef: &MCPGroupRef{Name: "group-a"}, + SessionStorage: &SessionStorageConfig{ + Provider: "redis", + Address: "redis.default.svc.cluster.local:6379", + }, + RateLimiting: &RateLimitConfig{ + Global: &RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + PerUser: &RateLimitBucket{ + MaxTokens: 2, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Global: &RateLimitBucket{ + MaxTokens: 5, + RefillPeriod: metav1.Duration{Duration: 30 * time.Second}, + }, + }, + }, + }, + } + + b, err := json.Marshal(spec) + require.NoError(t, err) + out := string(b) + assert.Contains(t, out, `"rateLimiting"`) + assert.Contains(t, out, `"global"`) + assert.Contains(t, out, `"perUser"`) + assert.Contains(t, out, `"backend_a_echo"`) + assert.NotContains(t, out, `"config":{"rateLimiting"`) +} + func TestMCPServerSpecScalingFieldsJSONRoundtrip(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go index c63139b133..a23f2feaaa 100644 --- a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go @@ -16,6 +16,10 @@ import ( // VirtualMCPServerSpec defines the desired state of VirtualMCPServer // +// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider == 'redis')",message="rateLimiting requires sessionStorage with provider 'redis'" +// +kubebuilder:validation:XValidation:rule="!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="rateLimiting.perUser requires incomingAuth.type oidc" +// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="per-tool perUser rate limiting requires incomingAuth.type oidc" +// //nolint:lll // CEL validation rules exceed line length limit type VirtualMCPServerSpec struct { // IncomingAuth configures authentication for clients connecting to the Virtual MCP server. @@ -143,6 +147,11 @@ type VirtualMCPServerSpec struct { // +listType=atomic // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + + // RateLimiting defines rate limiting configuration for the Virtual MCP server. + // Requires Redis session storage to be configured for distributed rate limiting. + // +optional + RateLimiting *RateLimitConfig `json:"rateLimiting,omitempty"` } // EmbeddingServerRef references an existing EmbeddingServer resource by name. diff --git a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go index 04da44756d..fd3706419d 100644 --- a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go @@ -23,7 +23,7 @@ package v1beta1 import ( corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -2210,6 +2210,11 @@ func (in *RateLimitBucket) DeepCopy() *RateLimitBucket { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RateLimitConfig) DeepCopyInto(out *RateLimitConfig) { *out = *in + if in.Global != nil { + in, out := &in.Global, &out.Global + *out = new(RateLimitBucket) + **out = **in + } if in.Shared != nil { in, out := &in.Shared, &out.Shared *out = new(RateLimitBucket) @@ -2664,6 +2669,11 @@ func (in *ToolOverride) DeepCopy() *ToolOverride { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ToolRateLimitConfig) DeepCopyInto(out *ToolRateLimitConfig) { *out = *in + if in.Global != nil { + in, out := &in.Global, &out.Global + *out = new(RateLimitBucket) + **out = **in + } if in.Shared != nil { in, out := &in.Shared, &out.Shared *out = new(RateLimitBucket) @@ -3008,6 +3018,11 @@ func (in *VirtualMCPServerSpec) DeepCopyInto(out *VirtualMCPServerSpec) { *out = make([]corev1.LocalObjectReference, len(*in)) copy(*out, *in) } + if in.RateLimiting != nil { + in, out := &in.RateLimiting, &out.RateLimiting + *out = new(RateLimitConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMCPServerSpec. diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go index 5d0fe5efde..76da31960f 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go @@ -459,6 +459,25 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { }, Spec: mcpv1beta1.VirtualMCPServerSpec{ GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, + SessionStorage: &mcpv1beta1.SessionStorageConfig{ + Provider: mcpv1beta1.SessionStorageProviderRedis, + Address: "redis.default.svc.cluster.local:6379", + }, + RateLimiting: &mcpv1beta1.RateLimitConfig{ + PerUser: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 2, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []mcpv1beta1.ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Global: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 5, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + }, + }, + }, }, } @@ -507,6 +526,16 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { assert.Equal(t, "test-vmcp-vmcp-config", cm.Name) assert.Contains(t, cm.Data, "config.yaml") assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"]) + + var cfg vmcpconfig.Config + require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg)) + require.NotNil(t, cfg.RateLimiting, "runtime config must include spec.rateLimiting") + require.NotNil(t, cfg.RateLimiting.PerUser) + assert.Equal(t, int32(2), cfg.RateLimiting.PerUser.MaxTokens) + require.Len(t, cfg.RateLimiting.Tools, 1) + assert.Equal(t, "backend_a_echo", cfg.RateLimiting.Tools[0].Name) + require.NotNil(t, cfg.RateLimiting.Tools[0].Global) + assert.Equal(t, int32(5), cfg.RateLimiting.Tools[0].Global.MaxTokens) } // TestSetAuthConfigConditions tests that auth config conditions reflect the current state diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter.go b/cmd/thv-operator/pkg/vmcpconfig/converter.go index cd31af6d69..6c9a4975e9 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter.go @@ -140,6 +140,7 @@ func (c *Converter) Convert( } config.SessionStorage = convertSessionStorage(vmcp) + config.RateLimiting = vmcp.Spec.RateLimiting.ToInternal() // Apply operational defaults (fills missing values) config.EnsureOperationalDefaults() diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go index cee72256af..0fdb67443b 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go @@ -97,6 +97,42 @@ func newTestConverterWithObjects(t *testing.T, resolver oidc.Resolver, objects . return converter } +func TestConverter_RateLimitingFromTopLevelSpec(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1beta1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test-vmcp", Namespace: "default"}, + Spec: mcpv1beta1.VirtualMCPServerSpec{ + GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, + IncomingAuth: &mcpv1beta1.IncomingAuthConfig{Type: "anonymous"}, + RateLimiting: &mcpv1beta1.RateLimitConfig{ + Global: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 10, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []mcpv1beta1.ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Global: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + }, + }, + }, + }, + } + + converter := newTestConverter(t, newNoOpMockResolver(t)) + cfg, _, err := converter.Convert(context.Background(), vmcp, nil) + require.NoError(t, err) + require.NotNil(t, cfg.RateLimiting) + require.NotNil(t, cfg.RateLimiting.Global) + assert.Equal(t, int32(10), cfg.RateLimiting.Global.MaxTokens) + require.Len(t, cfg.RateLimiting.Tools, 1) + assert.Equal(t, "backend_a_echo", cfg.RateLimiting.Tools[0].Name) +} + func TestConverter_OIDCResolution(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go index 45b6043196..df614f32a6 100644 --- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go +++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go @@ -5,6 +5,8 @@ package controllers import ( + "time" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -106,4 +108,57 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer", Expect(err).To(HaveOccurred()) }) }) + + Context("rateLimiting", func() { + It("should reject rate limiting without redis session storage", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-no-redis", nil) + vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ + Global: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("rateLimiting requires sessionStorage with provider 'redis'")) + }) + + It("should reject perUser rate limiting with anonymous auth", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-anon", &mcpv1beta1.SessionStorageConfig{ + Provider: "redis", + Address: "redis:6379", + }) + vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ + PerUser: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("rateLimiting.perUser requires incomingAuth.type oidc")) + }) + + It("should accept perUser rate limiting with oidc auth and redis session storage", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-oidc", &mcpv1beta1.SessionStorageConfig{ + Provider: "redis", + Address: "redis:6379", + }) + vmcp.Spec.IncomingAuth = &mcpv1beta1.IncomingAuthConfig{ + Type: "oidc", + OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{Name: "oidc"}, + } + vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ + PerUser: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).NotTo(HaveOccurred()) + }) + }) }) diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml index 44eb6f7a02..33aa0febec 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml @@ -320,6 +320,28 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -346,8 +368,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a token bucket shared across all users - for the entire server. + description: Shared is a deprecated alias for Global. Use global + instead. properties: maxTokens: description: |- @@ -375,8 +397,29 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of shared or perUser must be configured. + At least one of global, shared, or perUser must be configured. properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object name: description: Name is the MCP tool name this limit applies to. @@ -405,7 +448,8 @@ spec: - refillPeriod type: object shared: - description: Shared token bucket for this specific tool. + description: Shared is a deprecated alias for Global. Use + global instead. properties: maxTokens: description: |- @@ -429,17 +473,19 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of shared or perUser must be configured - rule: has(self.shared) || has(self.perUser) + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of shared, perUser, or tools must be configured - rule: has(self.shared) || has(self.perUser) || (has(self.tools) - && size(self.tools) > 0) + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. @@ -1169,6 +1215,28 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -1195,8 +1263,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a token bucket shared across all users - for the entire server. + description: Shared is a deprecated alias for Global. Use global + instead. properties: maxTokens: description: |- @@ -1224,8 +1292,29 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of shared or perUser must be configured. + At least one of global, shared, or perUser must be configured. properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object name: description: Name is the MCP tool name this limit applies to. @@ -1254,7 +1343,8 @@ spec: - refillPeriod type: object shared: - description: Shared token bucket for this specific tool. + description: Shared is a deprecated alias for Global. Use + global instead. properties: maxTokens: description: |- @@ -1278,17 +1368,19 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of shared or perUser must be configured - rule: has(self.shared) || has(self.perUser) + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of shared, perUser, or tools must be configured - rule: has(self.shared) || has(self.perUser) || (has(self.tools) - && size(self.tools) > 0) + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml index a51fe4b5bd..bbc8df3509 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -2242,6 +2242,177 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use global + instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of global, shared, or perUser must be configured. + properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use + global instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -2347,6 +2518,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: @@ -4738,6 +4920,177 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use global + instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of global, shared, or perUser must be configured. + properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use + global instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -4843,6 +5196,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml index a248508d41..8f07967a66 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml @@ -323,6 +323,28 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -349,8 +371,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a token bucket shared across all users - for the entire server. + description: Shared is a deprecated alias for Global. Use global + instead. properties: maxTokens: description: |- @@ -378,8 +400,29 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of shared or perUser must be configured. + At least one of global, shared, or perUser must be configured. properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object name: description: Name is the MCP tool name this limit applies to. @@ -408,7 +451,8 @@ spec: - refillPeriod type: object shared: - description: Shared token bucket for this specific tool. + description: Shared is a deprecated alias for Global. Use + global instead. properties: maxTokens: description: |- @@ -432,17 +476,19 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of shared or perUser must be configured - rule: has(self.shared) || has(self.perUser) + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of shared, perUser, or tools must be configured - rule: has(self.shared) || has(self.perUser) || (has(self.tools) - && size(self.tools) > 0) + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. @@ -1172,6 +1218,28 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -1198,8 +1266,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a token bucket shared across all users - for the entire server. + description: Shared is a deprecated alias for Global. Use global + instead. properties: maxTokens: description: |- @@ -1227,8 +1295,29 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of shared or perUser must be configured. + At least one of global, shared, or perUser must be configured. properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object name: description: Name is the MCP tool name this limit applies to. @@ -1257,7 +1346,8 @@ spec: - refillPeriod type: object shared: - description: Shared token bucket for this specific tool. + description: Shared is a deprecated alias for Global. Use + global instead. properties: maxTokens: description: |- @@ -1281,17 +1371,19 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of shared or perUser must be configured - rule: has(self.shared) || has(self.perUser) + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of shared, perUser, or tools must be configured - rule: has(self.shared) || has(self.perUser) || (has(self.tools) - && size(self.tools) > 0) + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml index 6078670479..cc804a054c 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -2245,6 +2245,177 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use global + instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of global, shared, or perUser must be configured. + properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use + global instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -2350,6 +2521,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: @@ -4741,6 +4923,177 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + global: + description: Global is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use global + instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of global, shared, or perUser must be configured. + properties: + global: + description: Global token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a deprecated alias for Global. Use + global instead. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of global, shared, or perUser must be + configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of global, shared, perUser, or tools must + be configured + rule: has(self.global) || has(self.shared) || has(self.perUser) + || (has(self.tools) && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -4846,6 +5199,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index b6e0c50cda..d7313e9135 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -233,6 +233,25 @@ _Appears in:_ | `primaryUpstreamProvider` _string_ | PrimaryUpstreamProvider names the upstream IDP provider whose access
token should be used as the source of JWT claims for Cedar evaluation.
When empty, claims from the ToolHive-issued token are used.
Must match an upstream provider name configured in the embedded auth server
(e.g. "default", "github"). Only relevant when the embedded auth server is active. | | Optional: \{\}
| +#### ratelimit.config.Bucket + + + +Bucket defines a token bucket configuration with a maximum capacity and a +refill period. + + + +_Appears in:_ +- [ratelimit.config.Config](#ratelimitconfigconfig) +- [ratelimit.config.ToolConfig](#ratelimitconfigtoolconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `maxTokens` _integer_ | MaxTokens is the maximum number of tokens. | | | +| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket. | | | + + #### vmcp.config.CircuitBreakerConfig @@ -640,6 +659,25 @@ _Appears in:_ +#### ratelimit.config.ToolConfig + + + +ToolConfig defines rate limits for a specific tool. + + + +_Appears in:_ +- [ratelimit.config.Config](#ratelimitconfigconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | Name is the MCP tool name this limit applies to. | | | +| `global` _[ratelimit.config.Bucket](#ratelimitconfigbucket)_ | Global is a token bucket shared across all users for this tool. | | | +| `shared` _[ratelimit.config.Bucket](#ratelimitconfigbucket)_ | Shared is a deprecated alias for Global. | | | +| `perUser` _[ratelimit.config.Bucket](#ratelimitconfigbucket)_ | PerUser is a token bucket applied independently to each authenticated user
for this tool. | | | + + #### vmcp.config.ToolConfigRef @@ -2712,10 +2750,12 @@ At least one of shared, perUser, or tools must be configured. _Appears in:_ - [api.v1beta1.MCPServerSpec](#apiv1beta1mcpserverspec) +- [api.v1beta1.VirtualMCPServerSpec](#apiv1beta1virtualmcpserverspec) | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared is a token bucket shared across all users for the entire server. | | Optional: \{\}
| +| `global` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Global is a token bucket shared across all users for the entire server. | | Optional: \{\}
| +| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared is a deprecated alias for Global. Use global instead. | | Optional: \{\}
| | `perUser` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | PerUser is a token bucket applied independently to each authenticated user
at the server level. Requires authentication to be enabled.
Each unique userID creates Redis keys that expire after 2x refillPeriod.
Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. | | Optional: \{\}
| | `tools` _[api.v1beta1.ToolRateLimitConfig](#apiv1beta1toolratelimitconfig) array_ | Tools defines per-tool rate limit overrides.
Each entry applies additional rate limits to calls targeting a specific tool name.
A request must pass both the server-level limit and the per-tool limit. | | Optional: \{\}
| @@ -3130,7 +3170,7 @@ _Appears in:_ ToolRateLimitConfig defines rate limits for a specific tool. -At least one of shared or perUser must be configured. +At least one of global, shared, or perUser must be configured. @@ -3140,7 +3180,8 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `name` _string_ | Name is the MCP tool name this limit applies to. | | MinLength: 1
Required: \{\}
| -| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared token bucket for this specific tool. | | Optional: \{\}
| +| `global` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Global token bucket for this specific tool. | | Optional: \{\}
| +| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared is a deprecated alias for Global. Use global instead. | | Optional: \{\}
| | `perUser` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | PerUser token bucket configuration for this tool. | | Optional: \{\}
| @@ -3439,6 +3480,7 @@ _Appears in:_ | `replicas` _integer_ | Replicas is the desired number of vMCP pod replicas.
VirtualMCPServer creates a single Deployment for the vMCP aggregator process,
so there is only one replicas field (unlike MCPServer which has separate
Replicas and BackendReplicas for its two Deployments).
When nil, the operator does not set Deployment.Spec.Replicas, leaving replica
management to an HPA or other external controller. | | Minimum: 0
Optional: \{\}
| | `sessionStorage` _[api.v1beta1.SessionStorageConfig](#apiv1beta1sessionstorageconfig)_ | SessionStorage configures session storage for stateful horizontal scaling.
When nil, no session storage is configured. | | Optional: \{\}
| | `imagePullSecrets` _[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#localobjectreference-v1-core) array_ | ImagePullSecrets allows specifying image pull secrets for the vMCP workload.
These are applied to both the vMCP Deployment's PodSpec.ImagePullSecrets
and to the operator-managed ServiceAccount the vMCP server runs as, so private
images are pullable through either path.
Merge semantics with PodTemplateSpec:
The deployed PodSpec.ImagePullSecrets is the Kubernetes-native strategic-merge
union of this field and spec.podTemplateSpec.spec.imagePullSecrets, merged by
the patchStrategy:"merge" / patchMergeKey:"name" tags on corev1.PodSpec.
- This field is rendered first as the controller-generated default.
- spec.podTemplateSpec.spec.imagePullSecrets is then strategic-merge-patched
on top, keyed by Name. Distinct names from the two sources are unioned in
the resulting list; entries with the same Name are deduplicated and the
PodTemplateSpec entry wins on overlap (user override).
- Order in the resulting list is not guaranteed and should not be relied on:
strategic merge by name is order-insensitive.
- The operator-managed ServiceAccount's imagePullSecrets list is populated
ONLY from this field. spec.podTemplateSpec.spec.imagePullSecrets does not
reach the ServiceAccount because PodTemplateSpec has no notion of a
ServiceAccount. To make a secret usable via the ServiceAccount path
(e.g. for sidecars or init containers that pull images independently),
list it here rather than under spec.podTemplateSpec.
Note on cross-CRD consistency:
MCPRegistry currently uses an atomic-replace strategy for its imagePullSecrets
(the user-provided value replaces the controller-generated list rather than
being merged on top). VirtualMCPServer follows the Kubernetes-native
strategic-merge-by-name behavior described above. Aligning the two is tracked
as a separate follow-up; until then, manifests that set imagePullSecrets on
both CRDs will see different override behavior between them. | | Optional: \{\}
| +| `rateLimiting` _[api.v1beta1.RateLimitConfig](#apiv1beta1ratelimitconfig)_ | RateLimiting defines rate limiting configuration for the Virtual MCP server.
Requires Redis session storage to be configured for distributed rate limiting. | | Optional: \{\}
| #### api.v1beta1.VirtualMCPServerStatus diff --git a/docs/server/docs.go b/docs/server/docs.go index a108f6087f..bad149a9d7 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -60,6 +60,9 @@ const docTemplate = `{ "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitConfig": { "description": "RateLimitConfig contains the CRD rate limiting configuration.\nWhen set, rate limiting middleware is added to the proxy middleware chain.", "properties": { + "global": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" + }, "perUser": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" }, @@ -79,6 +82,9 @@ const docTemplate = `{ }, "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.ToolRateLimitConfig": { "properties": { + "global": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" + }, "name": { "description": "Name is the MCP tool name this limit applies to.\n+kubebuilder:validation:Required\n+kubebuilder:validation:MinLength=1", "type": "string" diff --git a/docs/server/swagger.json b/docs/server/swagger.json index 8a5d8df9b5..6269efbf3d 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -53,6 +53,9 @@ "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitConfig": { "description": "RateLimitConfig contains the CRD rate limiting configuration.\nWhen set, rate limiting middleware is added to the proxy middleware chain.", "properties": { + "global": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" + }, "perUser": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" }, @@ -72,6 +75,9 @@ }, "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.ToolRateLimitConfig": { "properties": { + "global": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" + }, "name": { "description": "Name is the MCP tool name this limit applies to.\n+kubebuilder:validation:Required\n+kubebuilder:validation:MinLength=1", "type": "string" diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index bdc1cac8a0..2799158f5c 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -52,6 +52,8 @@ components: RateLimitConfig contains the CRD rate limiting configuration. When set, rate limiting middleware is added to the proxy middleware chain. properties: + global: + $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket' perUser: $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket' shared: @@ -71,6 +73,8 @@ components: type: object github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.ToolRateLimitConfig: properties: + global: + $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket' name: description: |- Name is the MCP tool name this limit applies to. diff --git a/pkg/ratelimit/config/config.go b/pkg/ratelimit/config/config.go new file mode 100644 index 0000000000..38aaa01780 --- /dev/null +++ b/pkg/ratelimit/config/config.go @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Package config defines runtime-neutral rate limiting configuration. +package config + +import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + +// Config defines rate limiting configuration for an MCP server. +// +gendoc +type Config struct { + // Global is a token bucket shared across all users for the entire server. + Global *Bucket `json:"global,omitempty" yaml:"global,omitempty"` + + // Shared is a deprecated alias for Global. It is kept for compatibility with + // older CRDs and config files that used "shared" before the RFC settled on + // "global". + Shared *Bucket `json:"shared,omitempty" yaml:"shared,omitempty"` + + // PerUser is a token bucket applied independently to each authenticated user + // at the server level. + PerUser *Bucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` + + // Tools defines per-tool rate limits. + Tools []ToolConfig `json:"tools,omitempty" yaml:"tools,omitempty"` +} + +// EffectiveGlobal returns the configured server-level global bucket, including +// the deprecated shared alias. +func (c *Config) EffectiveGlobal() *Bucket { + if c == nil { + return nil + } + if c.Global != nil { + return c.Global + } + return c.Shared +} + +// Bucket defines a token bucket configuration with a maximum capacity and a +// refill period. +// +gendoc +type Bucket struct { + // MaxTokens is the maximum number of tokens. + MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"` + + // RefillPeriod is the duration to fully refill the bucket. + RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"` +} + +// ToolConfig defines rate limits for a specific tool. +// +gendoc +type ToolConfig struct { + // Name is the MCP tool name this limit applies to. + Name string `json:"name" yaml:"name"` + + // Global is a token bucket shared across all users for this tool. + Global *Bucket `json:"global,omitempty" yaml:"global,omitempty"` + + // Shared is a deprecated alias for Global. + Shared *Bucket `json:"shared,omitempty" yaml:"shared,omitempty"` + + // PerUser is a token bucket applied independently to each authenticated user + // for this tool. + PerUser *Bucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` +} + +// EffectiveGlobal returns the configured tool-level global bucket, including +// the deprecated shared alias. +func (c *ToolConfig) EffectiveGlobal() *Bucket { + if c == nil { + return nil + } + if c.Global != nil { + return c.Global + } + return c.Shared +} + +// DeepCopyInto copies the receiver into out. +func (in *Config) DeepCopyInto(out *Config) { + *out = *in + if in.Global != nil { + out.Global = in.Global.DeepCopy() + } + if in.Shared != nil { + out.Shared = in.Shared.DeepCopy() + } + if in.PerUser != nil { + out.PerUser = in.PerUser.DeepCopy() + } + if in.Tools != nil { + out.Tools = make([]ToolConfig, len(in.Tools)) + for i := range in.Tools { + in.Tools[i].DeepCopyInto(&out.Tools[i]) + } + } +} + +// DeepCopy returns a copy of the receiver. +func (in *Config) DeepCopy() *Config { + if in == nil { + return nil + } + out := new(Config) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto copies the receiver into out. +func (in *Bucket) DeepCopyInto(out *Bucket) { + *out = *in +} + +// DeepCopy returns a copy of the receiver. +func (in *Bucket) DeepCopy() *Bucket { + if in == nil { + return nil + } + out := new(Bucket) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto copies the receiver into out. +func (in *ToolConfig) DeepCopyInto(out *ToolConfig) { + *out = *in + if in.Global != nil { + out.Global = in.Global.DeepCopy() + } + if in.Shared != nil { + out.Shared = in.Shared.DeepCopy() + } + if in.PerUser != nil { + out.PerUser = in.PerUser.DeepCopy() + } +} + +// DeepCopy returns a copy of the receiver. +func (in *ToolConfig) DeepCopy() *ToolConfig { + if in == nil { + return nil + } + out := new(ToolConfig) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/ratelimit/config/doc.go b/pkg/ratelimit/config/doc.go new file mode 100644 index 0000000000..ef361de965 --- /dev/null +++ b/pkg/ratelimit/config/doc.go @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Package config defines runtime-neutral rate limiting configuration. +// +// +groupName=toolhive.stacklok.dev +// +versionName=config +package config diff --git a/pkg/ratelimit/limiter.go b/pkg/ratelimit/limiter.go index 3490c1cbaa..4a5e654912 100644 --- a/pkg/ratelimit/limiter.go +++ b/pkg/ratelimit/limiter.go @@ -14,7 +14,7 @@ import ( "github.com/redis/go-redis/v9" - v1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/ratelimit/internal/bucket" ) @@ -39,17 +39,17 @@ type Decision struct { // NewLimiter constructs a Limiter from CRD configuration. // Returns a no-op limiter (always allows) when crd is nil. // namespace and name identify the MCP server for Redis key derivation. -func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateLimitConfig) (Limiter, error) { +func NewLimiter(client redis.Cmdable, namespace, name string, crd *rlconfig.Config) (Limiter, error) { if crd == nil { return noopLimiter{}, nil } l := &limiter{client: client} - if crd.Shared != nil { - b, err := newBucket(namespace, name, "shared", crd.Shared) + if global, suffix := serverGlobalBucket(crd); global != nil { + b, err := newBucket(namespace, name, suffix, global) if err != nil { - return nil, fmt.Errorf("shared bucket: %w", err) + return nil, fmt.Errorf("%s bucket: %w", suffix, err) } l.serverBucket = b } @@ -63,10 +63,10 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateL } for _, t := range crd.Tools { - if t.Shared != nil { - b, err := newBucket(namespace, name, "shared:tool:"+t.Name, t.Shared) + if global, suffix := toolGlobalBucket(&t); global != nil { + b, err := newBucket(namespace, name, suffix+":tool:"+t.Name, global) if err != nil { - return nil, fmt.Errorf("tool %q shared bucket: %w", t.Name, err) + return nil, fmt.Errorf("tool %q %s bucket: %w", t.Name, suffix, err) } if l.toolBuckets == nil { l.toolBuckets = make(map[string]*bucket.TokenBucket) @@ -88,6 +88,26 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateL return l, nil } +func serverGlobalBucket(crd *rlconfig.Config) (*rlconfig.Bucket, string) { + if crd.Global != nil { + return crd.Global, "global" + } + if crd.Shared != nil { + return crd.Shared, "shared" + } + return nil, "" +} + +func toolGlobalBucket(tool *rlconfig.ToolConfig) (*rlconfig.Bucket, string) { + if tool.Global != nil { + return tool.Global, "global" + } + if tool.Shared != nil { + return tool.Shared, "shared" + } + return nil, "" +} + // bucketSpec holds deferred bucket parameters for per-user buckets that are // created on the fly in Allow() because the userID is not known at construction time. type bucketSpec struct { @@ -177,7 +197,7 @@ func (noopLimiter) Allow(context.Context, string, string) (*Decision, error) { } // validateBucketCRD checks that a CRD bucket spec has valid parameters. -func validateBucketCRD(b *v1beta1.RateLimitBucket) (int32, time.Duration, error) { +func validateBucketCRD(b *rlconfig.Bucket) (int32, time.Duration, error) { if b.MaxTokens < 1 { return 0, 0, fmt.Errorf("maxTokens must be >= 1, got %d", b.MaxTokens) } @@ -189,7 +209,7 @@ func validateBucketCRD(b *v1beta1.RateLimitBucket) (int32, time.Duration, error) } // newBucket validates a CRD bucket spec and creates a TokenBucket. -func newBucket(namespace, serverName, suffix string, b *v1beta1.RateLimitBucket) (*bucket.TokenBucket, error) { +func newBucket(namespace, serverName, suffix string, b *rlconfig.Bucket) (*bucket.TokenBucket, error) { maxTokens, refillPeriod, err := validateBucketCRD(b) if err != nil { return nil, err @@ -199,7 +219,7 @@ func newBucket(namespace, serverName, suffix string, b *v1beta1.RateLimitBucket) // newBucketSpec validates a CRD bucket spec and creates a deferred bucketSpec // for per-user buckets that are materialized at Allow() time. -func newBucketSpec(namespace, serverName string, b *v1beta1.RateLimitBucket) (bucketSpec, error) { +func newBucketSpec(namespace, serverName string, b *rlconfig.Bucket) (bucketSpec, error) { maxTokens, refillPeriod, err := validateBucketCRD(b) if err != nil { return bucketSpec{}, err diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go index a007191d3c..e5e921aea8 100644 --- a/pkg/ratelimit/limiter_test.go +++ b/pkg/ratelimit/limiter_test.go @@ -49,7 +49,7 @@ func TestNewLimiter_ZeroMaxTokens(t *testing.T) { }, } - _, err := NewLimiter(client, "ns", "srv", crd) + _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) assert.Error(t, err) assert.Contains(t, err.Error(), "maxTokens must be >= 1") } @@ -65,7 +65,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) { }, } - _, err := NewLimiter(client, "ns", "srv", crd) + _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) assert.Error(t, err) assert.Contains(t, err.Error(), "refillPeriod must be positive") } @@ -78,7 +78,7 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) { crd := &v1beta1.RateLimitConfig{ Shared: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) for range 2 { @@ -93,6 +93,70 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) { assert.Greater(t, d.RetryAfter, time.Duration(0)) } +func TestLimiter_SharedAliasUsesLegacyRedisKeys(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + Tools: []v1beta1.ToolRateLimitConfig{ + { + Name: "search", + Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + }, + }, + } + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + require.NoError(t, err) + + d, err := l.Allow(ctx, "search", "") + require.NoError(t, err) + require.True(t, d.Allowed) + + legacyServerKey := "thv:rl:{ns:srv}:shared" + legacyToolKey := "thv:rl:{ns:srv}:shared:tool:search" + newServerKey := "thv:rl:{ns:srv}:global" + newToolKey := "thv:rl:{ns:srv}:global:tool:search" + + exists, err := client.Exists(ctx, legacyServerKey, legacyToolKey).Result() + require.NoError(t, err) + assert.Equal(t, int64(2), exists) + + exists, err = client.Exists(ctx, newServerKey, newToolKey).Result() + require.NoError(t, err) + assert.Equal(t, int64(0), exists) +} + +func TestLimiter_GlobalUsesGlobalRedisKeys(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + Global: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + Tools: []v1beta1.ToolRateLimitConfig{ + { + Name: "search", + Global: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + }, + }, + } + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + require.NoError(t, err) + + d, err := l.Allow(ctx, "search", "") + require.NoError(t, err) + require.True(t, d.Allowed) + + exists, err := client.Exists(ctx, + "thv:rl:{ns:srv}:global", + "thv:rl:{ns:srv}:global:tool:search", + ).Result() + require.NoError(t, err) + assert.Equal(t, int64(2), exists) +} + func TestLimiter_PerToolIsolation(t *testing.T) { t.Parallel() client, _ := newTestClient(t) @@ -106,7 +170,7 @@ func TestLimiter_PerToolIsolation(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) d, err := l.Allow(ctx, "search", "") @@ -137,7 +201,7 @@ func TestLimiter_ServerAndPerToolBothRequired(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) for range 2 { @@ -164,7 +228,7 @@ func TestLimiter_RedisUnavailableReturnsError(t *testing.T) { crd := &v1beta1.RateLimitConfig{ Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) mr.Close() @@ -181,7 +245,7 @@ func TestLimiter_PerUserServerLevel(t *testing.T) { crd := &v1beta1.RateLimitConfig{ PerUser: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) // User A exhausts their 2 tokens. @@ -201,6 +265,28 @@ func TestLimiter_PerUserServerLevel(t *testing.T) { assert.True(t, d.Allowed) } +func TestLimiter_PerUserServerLevelSharedAcrossBackendTools(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + PerUser: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + } + l, err := NewLimiter(client, "ns", "vmcp", crd.ToInternal()) + require.NoError(t, err) + + for _, toolName := range []string{"backend_a_echo", "backend_b_search"} { + d, allowErr := l.Allow(ctx, toolName, "user-a") + require.NoError(t, allowErr) + require.True(t, d.Allowed) + } + + d, err := l.Allow(ctx, "backend_c_status", "user-a") + require.NoError(t, err) + assert.False(t, d.Allowed, "server-level per-user limit must be shared across all backend tools") +} + func TestLimiter_PerToolPerUserIsolation(t *testing.T) { t.Parallel() client, _ := newTestClient(t) @@ -214,7 +300,7 @@ func TestLimiter_PerToolPerUserIsolation(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) // User A uses their 1 token for "search". @@ -252,7 +338,7 @@ func TestLimiter_ServerAndToolPerUserBothRequired(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) // User A makes 2 "search" calls — both pass. @@ -273,6 +359,66 @@ func TestLimiter_ServerAndToolPerUserBothRequired(t *testing.T) { assert.True(t, d.Allowed) } +func TestLimiter_PerToolUsesPostAggregationName(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + Tools: []v1beta1.ToolRateLimitConfig{ + { + Name: "backend_a_expensive_tool", + Global: &v1beta1.RateLimitBucket{MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + }, + }, + } + l, err := NewLimiter(client, "ns", "vmcp", crd.ToInternal()) + require.NoError(t, err) + + d, err := l.Allow(ctx, "backend_a_expensive_tool", "user-a") + require.NoError(t, err) + require.True(t, d.Allowed) + + d, err = l.Allow(ctx, "backend_a_expensive_tool", "user-b") + require.NoError(t, err) + assert.False(t, d.Allowed, "global per-tool limit should apply to the post-aggregation tool name") + + d, err = l.Allow(ctx, "expensive_tool", "user-a") + require.NoError(t, err) + assert.True(t, d.Allowed, "unprefixed backend name should not match the vMCP post-aggregation limit") +} + +func TestLimiter_GlobalAndPerUserBothEnforced(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + Global: &v1beta1.RateLimitBucket{MaxTokens: 3, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + PerUser: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + } + l, err := NewLimiter(client, "ns", "vmcp", crd.ToInternal()) + require.NoError(t, err) + + for range 2 { + d, allowErr := l.Allow(ctx, "backend_a_echo", "user-a") + require.NoError(t, allowErr) + require.True(t, d.Allowed) + } + + d, err := l.Allow(ctx, "backend_a_echo", "user-a") + require.NoError(t, err) + assert.False(t, d.Allowed, "per-user limit should reject user-a's third call") + + d, err = l.Allow(ctx, "backend_b_echo", "user-b") + require.NoError(t, err) + require.True(t, d.Allowed, "global bucket should still have one token because rejected calls do not drain") + + d, err = l.Allow(ctx, "backend_b_echo", "user-c") + require.NoError(t, err) + assert.False(t, d.Allowed, "global limit should reject after total allowed calls are exhausted") +} + func TestLimiter_PerUserRejectionDoesNotDrainShared(t *testing.T) { t.Parallel() client, _ := newTestClient(t) @@ -284,7 +430,7 @@ func TestLimiter_PerUserRejectionDoesNotDrainShared(t *testing.T) { Shared: &v1beta1.RateLimitBucket{MaxTokens: 3, RefillPeriod: metav1.Duration{Duration: time.Minute}}, PerUser: &v1beta1.RateLimitBucket{MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) // User A: first call passes (shared=2, user-a=0). @@ -319,7 +465,7 @@ func TestLimiter_RedisUnavailablePerUser(t *testing.T) { crd := &v1beta1.RateLimitConfig{ PerUser: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd) + l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) require.NoError(t, err) mr.Close() @@ -335,7 +481,7 @@ func TestNewLimiter_PerUserZeroMaxTokens(t *testing.T) { crd := &v1beta1.RateLimitConfig{ PerUser: &v1beta1.RateLimitBucket{MaxTokens: 0, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - _, err := NewLimiter(client, "ns", "srv", crd) + _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) assert.Error(t, err) assert.Contains(t, err.Error(), "perUser bucket: maxTokens must be >= 1") } @@ -352,7 +498,7 @@ func TestNewLimiter_ToolPerUserZeroDuration(t *testing.T) { }, }, } - _, err := NewLimiter(client, "ns", "srv", crd) + _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) assert.Error(t, err) assert.Contains(t, err.Error(), `tool "search" perUser bucket: refillPeriod must be positive`) } diff --git a/pkg/ratelimit/middleware.go b/pkg/ratelimit/middleware.go index ee108fdf87..a472923b59 100644 --- a/pkg/ratelimit/middleware.go +++ b/pkg/ratelimit/middleware.go @@ -15,10 +15,11 @@ import ( "github.com/redis/go-redis/v9" - v1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" "github.com/stacklok/toolhive/pkg/auth" "github.com/stacklok/toolhive/pkg/mcp" + rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/transport/types" + "github.com/stacklok/toolhive/pkg/vmcp/session/optimizerdec" ) const ( @@ -39,11 +40,11 @@ const ( // MiddlewareParams holds the parameters for the rate limit middleware factory. type MiddlewareParams struct { - Namespace string `json:"namespace"` - ServerName string `json:"server_name"` - Config *v1beta1.RateLimitConfig `json:"config"` - RedisAddr string `json:"redis_addr,omitempty"` - RedisDB int32 `json:"redis_db,omitempty"` + Namespace string `json:"namespace"` + ServerName string `json:"server_name"` + Config *rlconfig.Config `json:"config"` + RedisAddr string `json:"redis_addr,omitempty"` + RedisDB int32 `json:"redis_db,omitempty"` } // rateLimitMiddleware wraps rate limiting functionality for the factory pattern. @@ -99,16 +100,16 @@ func CreateMiddleware(config *types.MiddlewareConfig, runner types.MiddlewareRun } mw := &rateLimitMiddleware{ - handler: rateLimitHandler(limiter), + handler: HTTPMiddleware(limiter), client: client, } runner.AddMiddleware(MiddlewareType, mw) return nil } -// rateLimitHandler returns a middleware function that enforces rate limits +// HTTPMiddleware returns a middleware function that enforces rate limits // on tools/call requests. -func rateLimitHandler(limiter Limiter) types.MiddlewareFunction { +func HTTPMiddleware(limiter Limiter) types.MiddlewareFunction { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Rate limits only apply to parsed tools/call requests. @@ -127,7 +128,8 @@ func rateLimitHandler(limiter Limiter) types.MiddlewareFunction { if identity, ok := auth.IdentityFromContext(r.Context()); ok { userID = identity.Subject } - decision, err := limiter.Allow(r.Context(), parsed.ResourceID, userID) + toolName := rateLimitToolName(parsed) + decision, err := limiter.Allow(r.Context(), toolName, userID) if err != nil { slog.Warn("rate limit check failed, allowing request", "error", err) next.ServeHTTP(w, r) @@ -142,6 +144,18 @@ func rateLimitHandler(limiter Limiter) types.MiddlewareFunction { } } +func rateLimitToolName(parsed *mcp.ParsedMCPRequest) string { + if parsed == nil { + return "" + } + if parsed.ResourceID == optimizerdec.CallToolName && parsed.Arguments != nil { + if toolName, ok := parsed.Arguments[optimizerdec.CallToolArgToolName].(string); ok && toolName != "" { + return toolName + } + } + return parsed.ResourceID +} + // writeRateLimited writes an HTTP 429 response with a JSON-RPC error body. func writeRateLimited(w http.ResponseWriter, requestID any, retryAfter time.Duration) { retrySeconds := int(math.Ceil(retryAfter.Seconds())) diff --git a/pkg/ratelimit/middleware_test.go b/pkg/ratelimit/middleware_test.go index ed76e72e0c..c526665783 100644 --- a/pkg/ratelimit/middleware_test.go +++ b/pkg/ratelimit/middleware_test.go @@ -61,11 +61,23 @@ func withParsedMCPRequest(r *http.Request, method, resourceID string, id any) *h return r.WithContext(ctx) } +func withParsedToolCall(r *http.Request, resourceID string, arguments map[string]interface{}, id any) *http.Request { + parsed := &mcp.ParsedMCPRequest{ + Method: "tools/call", + ResourceID: resourceID, + Arguments: arguments, + ID: id, + IsRequest: true, + } + ctx := context.WithValue(r.Context(), mcp.MCPRequestContextKey, parsed) + return r.WithContext(ctx) +} + func TestRateLimitHandler_ToolCallAllowed(t *testing.T) { t.Parallel() limiter := &dummyLimiter{decision: &Decision{Allowed: true}} - handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -82,7 +94,7 @@ func TestRateLimitHandler_ToolCallRejected(t *testing.T) { t.Parallel() limiter := &dummyLimiter{decision: &Decision{Allowed: false, RetryAfter: 5 * time.Second}} - handler := rateLimitHandler(limiter)(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { + handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { t.Fatal("next handler should not be called when rate limited") })) @@ -114,7 +126,7 @@ func TestRateLimitHandler_RedisErrorFailOpen(t *testing.T) { limiter := &dummyLimiter{err: errors.New("redis connection refused")} nextCalled := false - handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { nextCalled = true w.WriteHeader(http.StatusOK) })) @@ -134,7 +146,7 @@ func TestRateLimitHandler_NoParsedMCPRequest_PassesThrough(t *testing.T) { limiter := &dummyLimiter{decision: &Decision{Allowed: false}} nextCalled := false - handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { nextCalled = true w.WriteHeader(http.StatusOK) })) @@ -154,7 +166,7 @@ func TestRateLimitHandler_NonToolCallPassesThrough(t *testing.T) { limiter := &dummyLimiter{decision: &Decision{Allowed: false, RetryAfter: time.Second}} nextCalled := false - handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { nextCalled = true w.WriteHeader(http.StatusOK) })) @@ -173,7 +185,7 @@ func TestRateLimitHandler_PassesUserID(t *testing.T) { t.Parallel() recorder := &recordingLimiter{} - handler := rateLimitHandler(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := HTTPMiddleware(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -189,11 +201,36 @@ func TestRateLimitHandler_PassesUserID(t *testing.T) { assert.Equal(t, "alice@example.com", recorder.userID) } +func TestRateLimitHandler_OptimizerCallToolUsesInnerToolName(t *testing.T) { + t.Parallel() + + recorder := &recordingLimiter{} + handler := HTTPMiddleware(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + req = withParsedToolCall(req, "call_tool", map[string]interface{}{ + "tool_name": "backend_a_expensive_tool", + "parameters": map[string]interface{}{ + "query": "test", + }, + }, 1) + req = withIdentity(req, "alice@example.com") + w := httptest.NewRecorder() + + handler.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + assert.Equal(t, "backend_a_expensive_tool", recorder.toolName) + assert.Equal(t, "alice@example.com", recorder.userID) +} + func TestRateLimitHandler_NoIdentityPassesEmptyUserID(t *testing.T) { t.Parallel() recorder := &recordingLimiter{} - handler := rateLimitHandler(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := HTTPMiddleware(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) diff --git a/pkg/runner/middleware.go b/pkg/runner/middleware.go index be9dd33506..0d711e4439 100644 --- a/pkg/runner/middleware.go +++ b/pkg/runner/middleware.go @@ -434,7 +434,7 @@ func addRateLimitMiddleware(middlewares []types.MiddlewareConfig, config *RunCon params := ratelimit.MiddlewareParams{ Namespace: config.RateLimitNamespace, ServerName: config.Name, - Config: config.RateLimitConfig, + Config: config.RateLimitConfig.ToInternal(), RedisAddr: redisAddr, RedisDB: redisDB, } diff --git a/pkg/vmcp/cli/serve.go b/pkg/vmcp/cli/serve.go index ffb501115c..72b72148e6 100644 --- a/pkg/vmcp/cli/serve.go +++ b/pkg/vmcp/cli/serve.go @@ -386,6 +386,8 @@ func Serve(ctx context.Context, cfg ServeConfig) error { OptimizerConfig: optCfg, SessionFactory: sessionFactory, SessionStorage: vmcpCfg.SessionStorage, + RateLimiting: vmcpCfg.RateLimiting, + RateLimitNamespace: os.Getenv("VMCP_NAMESPACE"), } // Assign Watcher only when backendWatcher is non-nil. A typed nil diff --git a/pkg/vmcp/config/config.go b/pkg/vmcp/config/config.go index 7a2c699290..9deabf6cd0 100644 --- a/pkg/vmcp/config/config.go +++ b/pkg/vmcp/config/config.go @@ -15,6 +15,7 @@ import ( "github.com/stacklok/toolhive/pkg/audit" thvjson "github.com/stacklok/toolhive/pkg/json" + rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/telemetry" "github.com/stacklok/toolhive/pkg/vmcp" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" @@ -173,6 +174,14 @@ type Config struct { // the THV_SESSION_REDIS_PASSWORD environment variable. // +optional SessionStorage *SessionStorageConfig `json:"sessionStorage,omitempty" yaml:"sessionStorage,omitempty"` + + // RateLimiting configures request rate limits for tools/call. + // Kubernetes users should set VirtualMCPServer.spec.rateLimiting; the operator + // copies that top-level field here for the vMCP runtime. + // This remains YAML-only so it can be written to the runtime config ConfigMap + // without exposing spec.config.rateLimiting in the Kubernetes API. + // +optional + RateLimiting *rlconfig.Config `json:"-" yaml:"rateLimiting,omitempty"` } // IncomingAuthConfig configures client authentication to the virtual MCP server. diff --git a/pkg/vmcp/config/zz_generated.deepcopy.go b/pkg/vmcp/config/zz_generated.deepcopy.go index 80861bff11..018d6cabdd 100644 --- a/pkg/vmcp/config/zz_generated.deepcopy.go +++ b/pkg/vmcp/config/zz_generated.deepcopy.go @@ -204,6 +204,10 @@ func (in *Config) DeepCopyInto(out *Config) { *out = new(SessionStorageConfig) **out = **in } + if in.RateLimiting != nil { + in, out := &in.RateLimiting, &out.RateLimiting + *out = (*in).DeepCopy() + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. diff --git a/pkg/vmcp/server/ratelimit.go b/pkg/vmcp/server/ratelimit.go new file mode 100644 index 0000000000..95d0c2f16e --- /dev/null +++ b/pkg/vmcp/server/ratelimit.go @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package server + +import ( + "context" + "fmt" + "log/slog" + "os" + "strings" + "time" + + "github.com/redis/go-redis/v9" + + "github.com/stacklok/toolhive/pkg/ratelimit" + transporttypes "github.com/stacklok/toolhive/pkg/transport/types" + vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config" +) + +type httpMiddleware = transporttypes.MiddlewareFunction + +type shutdownFunc func(context.Context) error + +func (s *Server) buildRateLimitMiddleware(ctx context.Context) (httpMiddleware, shutdownFunc, error) { + if s.config.RateLimiting == nil { + return nil, nil, nil + } + if s.config.SessionStorage == nil || + !strings.EqualFold(s.config.SessionStorage.Provider, "redis") || + s.config.SessionStorage.Address == "" { + return nil, nil, fmt.Errorf("rate limiting requires sessionStorage with provider redis") + } + + client := redis.NewClient(&redis.Options{ + Addr: s.config.SessionStorage.Address, + DB: int(s.config.SessionStorage.DB), + Password: os.Getenv(vmcpconfig.RedisPasswordEnvVar), + }) + + pingCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + if err := client.Ping(pingCtx).Err(); err != nil { + _ = client.Close() + return nil, nil, fmt.Errorf("rate limit middleware: failed to connect to Redis at %s: %w", + s.config.SessionStorage.Address, err) + } + + namespace := s.config.RateLimitNamespace + if namespace == "" { + namespace = os.Getenv("VMCP_NAMESPACE") + } + if namespace == "" { + namespace = "default" + } + + limiter, err := ratelimit.NewLimiter(client, namespace, s.config.Name, s.config.RateLimiting) + if err != nil { + _ = client.Close() + return nil, nil, fmt.Errorf("failed to create rate limiter: %w", err) + } + + slog.Info("rate limit middleware enabled for vMCP", + "namespace", namespace, + "name", s.config.Name, + "redis_addr", s.config.SessionStorage.Address, + "redis_db", s.config.SessionStorage.DB) + + return ratelimit.HTTPMiddleware(limiter), func(context.Context) error { + return client.Close() + }, nil +} diff --git a/pkg/vmcp/server/ratelimit_test.go b/pkg/vmcp/server/ratelimit_test.go new file mode 100644 index 0000000000..a804a1ace9 --- /dev/null +++ b/pkg/vmcp/server/ratelimit_test.go @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package server + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/alicebob/miniredis/v2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/stacklok/toolhive/pkg/auth" + mcpparser "github.com/stacklok/toolhive/pkg/mcp" + rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" + vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config" +) + +func TestBuildRateLimitMiddleware_EnforcesToolsCall(t *testing.T) { + t.Parallel() + + redisServer := miniredis.RunT(t) + srv := &Server{ + config: &Config{ + Name: "vmcp-a", + RateLimitNamespace: "default", + SessionStorage: &vmcpconfig.SessionStorageConfig{ + Provider: "redis", + Address: redisServer.Addr(), + }, + RateLimiting: &rlconfig.Config{ + PerUser: &rlconfig.Bucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + }, + }, + } + + middleware, closeFn, err := srv.buildRateLimitMiddleware(context.Background()) + require.NoError(t, err) + require.NotNil(t, middleware) + t.Cleanup(func() { + if closeFn != nil { + require.NoError(t, closeFn(context.Background())) + } + }) + + handler := middleware(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + req = req.WithContext(auth.WithIdentity(req.Context(), &auth.Identity{ + PrincipalInfo: auth.PrincipalInfo{Subject: "alice"}, + })) + req = req.WithContext(context.WithValue(req.Context(), mcpparser.MCPRequestContextKey, &mcpparser.ParsedMCPRequest{ + Method: "tools/call", + ResourceID: "backend_a_echo", + ID: 1, + IsRequest: true, + })) + + first := httptest.NewRecorder() + handler.ServeHTTP(first, req) + assert.Equal(t, http.StatusOK, first.Code) + + second := httptest.NewRecorder() + handler.ServeHTTP(second, req) + assert.Equal(t, http.StatusTooManyRequests, second.Code) +} diff --git a/pkg/vmcp/server/server.go b/pkg/vmcp/server/server.go index 23c415a551..694e43b25d 100644 --- a/pkg/vmcp/server/server.go +++ b/pkg/vmcp/server/server.go @@ -28,6 +28,7 @@ import ( "github.com/stacklok/toolhive/pkg/auth" asrunner "github.com/stacklok/toolhive/pkg/authserver/runner" mcpparser "github.com/stacklok/toolhive/pkg/mcp" + rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/recovery" "github.com/stacklok/toolhive/pkg/telemetry" transportmiddleware "github.com/stacklok/toolhive/pkg/transport/middleware" @@ -181,6 +182,12 @@ type Config struct { // session persistence; the Redis password is read from the // THV_SESSION_REDIS_PASSWORD environment variable. SessionStorage *vmcpconfig.SessionStorageConfig + + // RateLimiting configures rate limiting for incoming tools/call requests. + RateLimiting *rlconfig.Config + + // RateLimitNamespace is used with Name to derive Redis keys. + RateLimitNamespace string } // Server is the Virtual MCP Server that aggregates multiple backends. @@ -525,7 +532,7 @@ func New( // Each call builds a fresh handler. The method is safe to call multiple times. // All returned handlers share the same underlying MCPServer and SessionManager, // so callers should not serve concurrent traffic through multiple handlers. -func (s *Server) Handler(_ context.Context) (http.Handler, error) { +func (s *Server) Handler(ctx context.Context) (http.Handler, error) { // Create Streamable HTTP server with ToolHive session management streamableServer := server.NewStreamableHTTPServer( s.mcpServer, @@ -537,6 +544,25 @@ func (s *Server) Handler(_ context.Context) (http.Handler, error) { // Create HTTP mux with separated authenticated and unauthenticated routes mux := http.NewServeMux() + s.registerHTTPRoutes(mux) + + // MCP endpoint - apply middleware chain (wrapping order, execution happens in reverse): + // Code wraps: auth → MCP-parsing → rate-limit → audit → discovery → + // annotation-enrichment → authz → backend-enrichment → telemetry + // Execution order: recovery → header-val → auth → MCP-parsing → + // rate-limit → audit → discovery → annotation-enrichment → authz → + // backend-enrichment → telemetry → handler + mcpHandler, err := s.buildMCPHandler(ctx, streamableServer) + if err != nil { + return nil, err + } + + mux.Handle("/", mcpHandler) + + return mux, nil +} + +func (s *Server) registerHTTPRoutes(mux *http.ServeMux) { // Unauthenticated health endpoints mux.HandleFunc("/health", s.handleHealth) mux.HandleFunc("/ping", s.handleHealth) @@ -570,32 +596,56 @@ func (s *Server) Handler(_ context.Context) (http.Handler, error) { s.config.AuthServer.RegisterHandlers(mux) slog.Debug("embedded authorization server routes registered") } +} - // MCP endpoint - apply middleware chain (wrapping order, execution happens in reverse): - // Code wraps: auth+parser → audit → discovery → annotation-enrichment → - // authz → backend-enrichment → MCP-parsing → telemetry - // Execution order: recovery → header-val → auth+parser → audit → - // discovery → annotation-enrichment → authz → backend-enrichment → - // MCP-parsing → telemetry → handler +func (s *Server) buildMCPHandler(ctx context.Context, base http.Handler) (http.Handler, error) { + mcpHandler := s.wrapMCPHandler(base) + + if err := s.applyAuditMiddleware(&mcpHandler); err != nil { + return nil, err + } + if err := s.applyRateLimitMiddleware(ctx, &mcpHandler); err != nil { + return nil, err + } + + // Apply MCP parsing middleware to extract JSON-RPC method from request body. + // This must run before rate limiting so global limits also work when auth is + // disabled, and before telemetry so metrics can use the real MCP method. + mcpHandler = mcpparser.ParsingMiddleware(mcpHandler) + + // Apply authentication middleware if configured (runs first in chain) + if s.config.AuthMiddleware != nil { + mcpHandler = s.config.AuthMiddleware(mcpHandler) + slog.Info("authentication middleware enabled for MCP endpoints") + } + + // Apply Accept header validation (rejects GET requests without Accept: text/event-stream) + mcpHandler = headerValidatingMiddleware(mcpHandler) - var mcpHandler http.Handler = streamableServer + // Clear the write deadline for qualifying SSE connections (GET + + // Accept: text/event-stream + MCP endpoint path) so the server-level + // WriteTimeout does not kill long-lived SSE streams (see golang/go#16100). + // Non-qualifying requests are left untouched; http.Server.WriteTimeout + // (defaultWriteTimeout) remains in effect for them. + mcpHandler = transportmiddleware.WriteTimeout(s.config.EndpointPath)(mcpHandler) + + // Apply recovery middleware as outermost (catches panics from all inner middleware) + mcpHandler = recovery.Middleware(mcpHandler) + slog.Info("recovery middleware enabled for MCP endpoints") + + return mcpHandler, nil +} + +func (s *Server) wrapMCPHandler(base http.Handler) http.Handler { + mcpHandler := base if s.config.TelemetryProvider != nil { mcpHandler = s.config.TelemetryProvider.Middleware(s.config.Name, "streamable-http")(mcpHandler) slog.Info("telemetry middleware enabled for MCP endpoints") } - // Apply MCP parsing middleware to extract JSON-RPC method from request body. - // This runs before telemetry so that recordMetrics can label metrics with the - // actual mcp_method (e.g. "tools/call", "initialize") instead of "unknown". - // Note: ParsingMiddleware is also composed inside the auth middleware (for audit/authz). - // The second application here is a no-op because the context already holds a - // ParsedMCPRequest; it exists only so the telemetry layer works correctly even - // when auth middleware is nil. - mcpHandler = mcpparser.ParsingMiddleware(mcpHandler) - - // Apply backend enrichment middleware if audit is configured - // This runs after discovery populates the routing table, so it can extract backend names + // Apply backend enrichment middleware if audit is configured. + // This runs after discovery populates the routing table, so it can extract backend names. if s.config.AuditConfig != nil { mcpHandler = s.backendEnrichmentMiddleware(mcpHandler) slog.Info("backend enrichment middleware enabled for audit events") @@ -606,75 +656,63 @@ func (s *Server) Handler(_ context.Context) (http.Handler, error) { if s.config.AuthzMiddleware != nil { mcpHandler = s.config.AuthzMiddleware(mcpHandler) slog.Info("authorization middleware enabled for MCP endpoints (post-discovery)") - } - - // Apply annotation enrichment middleware (runs after discovery, before authz in execution). - // Reads tool annotations from discovered capabilities and injects them into the - // request context so the authz middleware can make annotation-aware decisions. - if s.config.AuthzMiddleware != nil { mcpHandler = AnnotationEnrichmentMiddleware(mcpHandler) slog.Info("annotation enrichment middleware enabled for MCP endpoints") } - // Apply discovery middleware (runs after audit/auth middleware) - // Discovery middleware performs per-request capability aggregation with user context. - // vmcpSessionMgr (MultiSessionGetter) is used to retrieve the fully-formed MultiSession - // for subsequent requests so the routing table can be injected into context. - // The backend registry provides a dynamic backend list (supports DynamicRegistry for K8s). - // The health monitor enables filtering based on current health status (respects circuit breaker). - s.healthMonitorMu.RLock() - healthMon := s.healthMonitor - s.healthMonitorMu.RUnlock() - - var healthStatusProvider health.StatusProvider - if healthMon != nil { - healthStatusProvider = healthMon - } + healthStatusProvider := s.getHealthStatusProvider() mcpHandler = discovery.Middleware( s.discoveryMgr, s.backendRegistry, s.vmcpSessionMgr, healthStatusProvider, discovery.WithSessionScopedRouting(), )(mcpHandler) slog.Info("discovery middleware enabled for lazy per-user capability discovery") - // Apply audit middleware if configured (runs after auth, before discovery) - if s.config.AuditConfig != nil { - if err := s.config.AuditConfig.Validate(); err != nil { - return nil, fmt.Errorf("invalid audit configuration: %w", err) - } - auditor, err := audit.NewAuditorWithTransport( - s.config.AuditConfig, - "streamable-http", // vMCP uses streamable HTTP transport - ) - if err != nil { - return nil, fmt.Errorf("failed to create auditor: %w", err) - } - mcpHandler = auditor.Middleware(mcpHandler) - slog.Info("audit middleware enabled for MCP endpoints") - } - - // Apply authentication middleware if configured (runs first in chain) - if s.config.AuthMiddleware != nil { - mcpHandler = s.config.AuthMiddleware(mcpHandler) - slog.Info("authentication middleware enabled for MCP endpoints") - } + return mcpHandler +} - // Apply Accept header validation (rejects GET requests without Accept: text/event-stream) - mcpHandler = headerValidatingMiddleware(mcpHandler) +func (s *Server) getHealthStatusProvider() health.StatusProvider { + s.healthMonitorMu.RLock() + defer s.healthMonitorMu.RUnlock() - // Clear the write deadline for qualifying SSE connections (GET + - // Accept: text/event-stream + MCP endpoint path) so the server-level - // WriteTimeout does not kill long-lived SSE streams (see golang/go#16100). - // Non-qualifying requests are left untouched; http.Server.WriteTimeout - // (defaultWriteTimeout) remains in effect for them. - mcpHandler = transportmiddleware.WriteTimeout(s.config.EndpointPath)(mcpHandler) + if s.healthMonitor == nil { + return nil + } + return s.healthMonitor +} - // Apply recovery middleware as outermost (catches panics from all inner middleware) - mcpHandler = recovery.Middleware(mcpHandler) - slog.Info("recovery middleware enabled for MCP endpoints") +func (s *Server) applyAuditMiddleware(mcpHandler *http.Handler) error { + if s.config.AuditConfig == nil { + return nil + } + if err := s.config.AuditConfig.Validate(); err != nil { + return fmt.Errorf("invalid audit configuration: %w", err) + } + auditor, err := audit.NewAuditorWithTransport( + s.config.AuditConfig, + "streamable-http", // vMCP uses streamable HTTP transport + ) + if err != nil { + return fmt.Errorf("failed to create auditor: %w", err) + } + *mcpHandler = auditor.Middleware(*mcpHandler) + slog.Info("audit middleware enabled for MCP endpoints") + return nil +} - mux.Handle("/", mcpHandler) +func (s *Server) applyRateLimitMiddleware(ctx context.Context, mcpHandler *http.Handler) error { + rateLimitMiddleware, closeRateLimit, err := s.buildRateLimitMiddleware(ctx) + if err != nil { + return err + } + if rateLimitMiddleware == nil { + return nil + } - return mux, nil + *mcpHandler = rateLimitMiddleware(*mcpHandler) + if closeRateLimit != nil { + s.shutdownFuncs = append(s.shutdownFuncs, closeRateLimit) + } + return nil } // Start starts the Virtual MCP Server and begins serving requests. diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go new file mode 100644 index 0000000000..de9900ba6c --- /dev/null +++ b/test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go @@ -0,0 +1,248 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package virtualmcp + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + "github.com/stacklok/toolhive/test/e2e/images" +) + +var _ = Describe("VirtualMCPServer Rate Limiting", Ordered, func() { + const ( + timeout = 3 * time.Minute + pollingInterval = 1 * time.Second + ) + + var ( + httpClient *http.Client + mcpGroupName string + backendName string + vmcpName string + oidcName string + oidcConfig string + redisName string + oidcNodePort int32 + vmcpNodePort int32 + cleanupOIDC func() + ) + + BeforeAll(func() { + httpClient = &http.Client{Timeout: 30 * time.Second} + + timestamp := time.Now().UnixNano() + mcpGroupName = fmt.Sprintf("e2e-vmcp-rl-group-%d", timestamp) + backendName = fmt.Sprintf("e2e-vmcp-rl-backend-%d", timestamp) + vmcpName = fmt.Sprintf("e2e-vmcp-rl-%d", timestamp) + oidcName = fmt.Sprintf("e2e-vmcp-rl-oidc-%d", timestamp) + oidcConfig = fmt.Sprintf("e2e-vmcp-rl-oidc-config-%d", timestamp) + redisName = fmt.Sprintf("e2e-vmcp-rl-redis-%d", timestamp) + + By("Deploying Redis for vMCP session storage and rate limiting") + deployRedis(redisName) + + By("Deploying parameterized OIDC server for per-user identity") + var issuerURL string + issuerURL, oidcNodePort, cleanupOIDC = DeployParameterizedOIDCServer( + ctx, k8sClient, oidcName, defaultNamespace, timeout, pollingInterval) + + By("Creating MCPOIDCConfig for vMCP incoming auth") + Expect(k8sClient.Create(ctx, &mcpv1beta1.MCPOIDCConfig{ + ObjectMeta: metav1.ObjectMeta{Name: oidcConfig, Namespace: defaultNamespace}, + Spec: mcpv1beta1.MCPOIDCConfigSpec{ + Type: mcpv1beta1.MCPOIDCConfigTypeInline, + Inline: &mcpv1beta1.InlineOIDCSharedConfig{ + Issuer: issuerURL, + InsecureAllowHTTP: true, + JWKSAllowPrivateIP: true, + ProtectedResourceAllowPrivateIP: true, + }, + }, + })).To(Succeed()) + + By("Creating MCPGroup and yardstick backend") + CreateMCPGroupAndWait(ctx, k8sClient, mcpGroupName, defaultNamespace, + "VirtualMCPServer rate limit e2e group", timeout, pollingInterval) + CreateMCPServerAndWait(ctx, k8sClient, backendName, defaultNamespace, mcpGroupName, + images.YardstickServerImage, timeout, pollingInterval) + + By("Creating VirtualMCPServer with per-user rate limit") + redisAddr := fmt.Sprintf("%s.%s.svc.cluster.local:6379", redisName, defaultNamespace) + Expect(k8sClient.Create(ctx, &mcpv1beta1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: vmcpName, Namespace: defaultNamespace}, + Spec: mcpv1beta1.VirtualMCPServerSpec{ + GroupRef: &mcpv1beta1.MCPGroupRef{Name: mcpGroupName}, + IncomingAuth: &mcpv1beta1.IncomingAuthConfig{ + Type: "oidc", + OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{ + Name: oidcConfig, + Audience: "vmcp-audience", + }, + }, + ServiceType: "NodePort", + SessionStorage: &mcpv1beta1.SessionStorageConfig{ + Provider: mcpv1beta1.SessionStorageProviderRedis, + Address: redisAddr, + }, + RateLimiting: &mcpv1beta1.RateLimitConfig{ + PerUser: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 2, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + }, + }, + })).To(Succeed()) + + By("Waiting for VirtualMCPServer to be ready") + WaitForVirtualMCPServerReady(ctx, k8sClient, vmcpName, defaultNamespace, timeout, pollingInterval) + WaitForCondition(ctx, k8sClient, vmcpName, defaultNamespace, "BackendsDiscovered", "True", timeout, pollingInterval) + vmcpNodePort = GetVMCPNodePort(ctx, k8sClient, vmcpName, defaultNamespace, timeout, pollingInterval) + }) + + AfterAll(func() { + _ = k8sClient.Delete(ctx, &mcpv1beta1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: vmcpName, Namespace: defaultNamespace}, + }) + _ = k8sClient.Delete(ctx, &mcpv1beta1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: backendName, Namespace: defaultNamespace}, + }) + _ = k8sClient.Delete(ctx, &mcpv1beta1.MCPGroup{ + ObjectMeta: metav1.ObjectMeta{Name: mcpGroupName, Namespace: defaultNamespace}, + }) + _ = k8sClient.Delete(ctx, &mcpv1beta1.MCPOIDCConfig{ + ObjectMeta: metav1.ObjectMeta{Name: oidcConfig, Namespace: defaultNamespace}, + }) + cleanupRedis(redisName) + if cleanupOIDC != nil { + cleanupOIDC() + } + }) + + It("rejects tools/call after the per-user limit is exceeded", func() { + token := getVMCPRateLimitOIDCToken(ctx, httpClient, oidcNodePort, "vmcp-rate-limit-user") + sessionID := sendVMCPRateLimitInitialize(ctx, httpClient, vmcpNodePort, token) + + By("Sending requests within the per-user rate limit") + for i := range 2 { + status, body, _ := sendVMCPRateLimitToolCall(ctx, httpClient, vmcpNodePort, "echo", i+1, token, sessionID) + Expect(status).To(Equal(http.StatusOK), + "request %d should succeed, got status %d: %s", i+1, status, string(body)) + } + + By("Sending request that exceeds the per-user rate limit") + status, body, retryAfter := sendVMCPRateLimitToolCall(ctx, httpClient, vmcpNodePort, "echo", 3, token, sessionID) + Expect(status).To(Equal(http.StatusTooManyRequests), + "third request should be rate limited, body: %s", string(body)) + Expect(retryAfter).ToNot(BeEmpty(), "Retry-After header should be set") + + var rpcResp map[string]any + Expect(json.Unmarshal(body, &rpcResp)).To(Succeed()) + errObj, ok := rpcResp["error"].(map[string]any) + Expect(ok).To(BeTrue(), "response should include JSON-RPC error") + Expect(errObj["code"]).To(BeEquivalentTo(-32029)) + Expect(errObj["message"]).To(Equal("Rate limit exceeded")) + }) +}) + +func getVMCPRateLimitOIDCToken(ctx context.Context, httpClient *http.Client, oidcNodePort int32, subject string) string { + url := fmt.Sprintf("http://localhost:%d/token?subject=%s", oidcNodePort, subject) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil) + Expect(err).ToNot(HaveOccurred()) + + resp, err := httpClient.Do(req) + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = resp.Body.Close() }() + + var tokenResp struct { + AccessToken string `json:"access_token"` + } + Expect(json.NewDecoder(resp.Body).Decode(&tokenResp)).To(Succeed()) + Expect(tokenResp.AccessToken).ToNot(BeEmpty(), "OIDC server should return a token") + return tokenResp.AccessToken +} + +func sendVMCPRateLimitInitialize( + ctx context.Context, httpClient *http.Client, port int32, bearerToken string, +) string { + reqBody := map[string]any{ + "jsonrpc": "2.0", + "id": 0, + "method": "initialize", + "params": map[string]any{ + "protocolVersion": "2025-03-26", + "capabilities": map[string]any{}, + "clientInfo": map[string]any{ + "name": "vmcp-rate-limit-e2e", + "version": "1.0.0", + }, + }, + } + bodyBytes, err := json.Marshal(reqBody) + Expect(err).ToNot(HaveOccurred()) + + url := fmt.Sprintf("http://localhost:%d/mcp", port) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + Expect(err).ToNot(HaveOccurred()) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json, text/event-stream") + req.Header.Set("Authorization", "Bearer "+bearerToken) + + resp, err := httpClient.Do(req) + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = resp.Body.Close() }() + Expect(resp.StatusCode).To(Equal(http.StatusOK), "initialize should succeed") + + sessionID := resp.Header.Get("Mcp-Session-Id") + Expect(sessionID).ToNot(BeEmpty(), "initialize response should include Mcp-Session-Id header") + return sessionID +} + +func sendVMCPRateLimitToolCall( + ctx context.Context, + httpClient *http.Client, + port int32, + toolName string, + requestID int, + bearerToken string, + sessionID string, +) (int, []byte, string) { + reqBody := map[string]any{ + "jsonrpc": "2.0", + "id": requestID, + "method": "tools/call", + "params": map[string]any{ + "name": toolName, + "arguments": map[string]any{"input": "test"}, + }, + } + bodyBytes, err := json.Marshal(reqBody) + Expect(err).ToNot(HaveOccurred()) + + url := fmt.Sprintf("http://localhost:%d/mcp", port) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + Expect(err).ToNot(HaveOccurred()) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json, text/event-stream") + req.Header.Set("Authorization", "Bearer "+bearerToken) + req.Header.Set("Mcp-Session-Id", sessionID) + + resp, err := httpClient.Do(req) + Expect(err).ToNot(HaveOccurred()) + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + Expect(err).ToNot(HaveOccurred()) + return resp.StatusCode, respBody, resp.Header.Get("Retry-After") +} From 933b10640924e871e1f71967f8a3fa176b87ff43 Mon Sep 17 00:00:00 2001 From: Sanskarzz Date: Tue, 28 Apr 2026 23:56:58 +0530 Subject: [PATCH 2/4] Only schema changes, no runtime behavior Signed-off-by: Sanskarzz --- .../api/v1beta1/mcpserver_types.go | 56 +--- .../virtualmcpserver_vmcpconfig_test.go | 20 +- cmd/thv-operator/pkg/vmcpconfig/converter.go | 26 +- .../pkg/vmcpconfig/converter_test.go | 63 ++++- .../embeddingserver_update_test.go | 1 + ...irtualmcpserver_sessionstorage_cel_test.go | 7 +- docs/operator/crd-api.md | 38 --- pkg/ratelimit/config/config.go | 147 ----------- pkg/ratelimit/config/doc.go | 8 - pkg/ratelimit/limiter.go | 14 +- pkg/ratelimit/limiter_test.go | 112 ++------ pkg/ratelimit/middleware.go | 34 +-- pkg/ratelimit/middleware_test.go | 51 +--- pkg/runner/middleware.go | 2 +- pkg/vmcp/cli/serve.go | 2 - pkg/vmcp/config/config.go | 3 +- pkg/vmcp/config/config_test.go | 27 ++ pkg/vmcp/server/ratelimit.go | 72 ----- pkg/vmcp/server/ratelimit_test.go | 76 ------ pkg/vmcp/server/server.go | 184 ++++++------- .../virtualmcp_circuit_breaker_test.go | 24 +- .../virtualmcp/virtualmcp_ratelimit_test.go | 248 ------------------ 22 files changed, 278 insertions(+), 937 deletions(-) delete mode 100644 pkg/ratelimit/config/config.go delete mode 100644 pkg/ratelimit/config/doc.go delete mode 100644 pkg/vmcp/server/ratelimit.go delete mode 100644 pkg/vmcp/server/ratelimit_test.go delete mode 100644 test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go index 6738e6e134..ad9dc96b0f 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go @@ -7,8 +7,6 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - - rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" ) // Condition types for MCPServer @@ -513,18 +511,18 @@ type SessionStorageConfig struct { type RateLimitConfig struct { // Global is a token bucket shared across all users for the entire server. // +optional - Global *RateLimitBucket `json:"global,omitempty"` + Global *RateLimitBucket `json:"global,omitempty" yaml:"global,omitempty"` // Shared is a deprecated alias for Global. Use global instead. // +optional - Shared *RateLimitBucket `json:"shared,omitempty"` + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` // PerUser is a token bucket applied independently to each authenticated user // at the server level. Requires authentication to be enabled. // Each unique userID creates Redis keys that expire after 2x refillPeriod. // Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. // +optional - PerUser *RateLimitBucket `json:"perUser,omitempty"` + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` // Tools defines per-tool rate limit overrides. // Each entry applies additional rate limits to calls targeting a specific tool name. @@ -532,7 +530,7 @@ type RateLimitConfig struct { // +listType=map // +listMapKey=name // +optional - Tools []ToolRateLimitConfig `json:"tools,omitempty"` + Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"` } // RateLimitBucket defines a token bucket configuration with a maximum capacity @@ -543,13 +541,13 @@ type RateLimitBucket struct { // instantaneously before the bucket is depleted. // +kubebuilder:validation:Required // +kubebuilder:validation:Minimum=1 - MaxTokens int32 `json:"maxTokens"` + MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"` // RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. // The effective refill rate is maxTokens / refillPeriod tokens per second. // Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). // +kubebuilder:validation:Required - RefillPeriod metav1.Duration `json:"refillPeriod"` + RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"` } // ToolRateLimitConfig defines rate limits for a specific tool. @@ -562,53 +560,19 @@ type ToolRateLimitConfig struct { // Name is the MCP tool name this limit applies to. // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Name string `json:"name"` + Name string `json:"name" yaml:"name"` // Global token bucket for this specific tool. // +optional - Global *RateLimitBucket `json:"global,omitempty"` + Global *RateLimitBucket `json:"global,omitempty" yaml:"global,omitempty"` // Shared is a deprecated alias for Global. Use global instead. // +optional - Shared *RateLimitBucket `json:"shared,omitempty"` + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` // PerUser token bucket configuration for this tool. // +optional - PerUser *RateLimitBucket `json:"perUser,omitempty"` -} - -// ToInternal converts the Kubernetes API rate limit config to the runtime-neutral representation. -func (in *RateLimitConfig) ToInternal() *rlconfig.Config { - if in == nil { - return nil - } - out := &rlconfig.Config{ - Global: rateLimitBucketToInternal(in.Global), - Shared: rateLimitBucketToInternal(in.Shared), - PerUser: rateLimitBucketToInternal(in.PerUser), - } - if len(in.Tools) > 0 { - out.Tools = make([]rlconfig.ToolConfig, 0, len(in.Tools)) - for _, tool := range in.Tools { - out.Tools = append(out.Tools, rlconfig.ToolConfig{ - Name: tool.Name, - Global: rateLimitBucketToInternal(tool.Global), - Shared: rateLimitBucketToInternal(tool.Shared), - PerUser: rateLimitBucketToInternal(tool.PerUser), - }) - } - } - return out -} - -func rateLimitBucketToInternal(in *RateLimitBucket) *rlconfig.Bucket { - if in == nil { - return nil - } - return &rlconfig.Bucket{ - MaxTokens: in.MaxTokens, - RefillPeriod: in.RefillPeriod, - } + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` } // Permission profile types diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go index 76da31960f..e671e9083b 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go @@ -530,12 +530,20 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { var cfg vmcpconfig.Config require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg)) require.NotNil(t, cfg.RateLimiting, "runtime config must include spec.rateLimiting") - require.NotNil(t, cfg.RateLimiting.PerUser) - assert.Equal(t, int32(2), cfg.RateLimiting.PerUser.MaxTokens) - require.Len(t, cfg.RateLimiting.Tools, 1) - assert.Equal(t, "backend_a_echo", cfg.RateLimiting.Tools[0].Name) - require.NotNil(t, cfg.RateLimiting.Tools[0].Global) - assert.Equal(t, int32(5), cfg.RateLimiting.Tools[0].Global.MaxTokens) + + rateLimiting := cfg.RateLimiting.Get() + perUser, ok := rateLimiting["perUser"].(map[string]any) + require.True(t, ok) + assert.EqualValues(t, 2, perUser["maxTokens"]) + tools, ok := rateLimiting["tools"].([]any) + require.True(t, ok) + require.Len(t, tools, 1) + tool, ok := tools[0].(map[string]any) + require.True(t, ok) + assert.Equal(t, "backend_a_echo", tool["name"]) + global, ok := tool["global"].(map[string]any) + require.True(t, ok) + assert.EqualValues(t, 5, global["maxTokens"]) } // TestSetAuthConfigConditions tests that auth config conditions reflect the current state diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter.go b/cmd/thv-operator/pkg/vmcpconfig/converter.go index 6c9a4975e9..1b0a5ae140 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter.go @@ -6,6 +6,7 @@ package vmcpconfig import ( "context" + "encoding/json" "fmt" "github.com/go-logr/logr" @@ -19,6 +20,7 @@ import ( "github.com/stacklok/toolhive/cmd/thv-operator/pkg/oidc" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/spectoconfig" "github.com/stacklok/toolhive/pkg/authserver" + thvjson "github.com/stacklok/toolhive/pkg/json" "github.com/stacklok/toolhive/pkg/telemetry" "github.com/stacklok/toolhive/pkg/vmcp/auth/converters" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" @@ -140,7 +142,13 @@ func (c *Converter) Convert( } config.SessionStorage = convertSessionStorage(vmcp) - config.RateLimiting = vmcp.Spec.RateLimiting.ToInternal() + if vmcp.Spec.RateLimiting != nil { + rateLimiting, err := convertRateLimiting(vmcp.Spec.RateLimiting) + if err != nil { + return nil, nil, fmt.Errorf("failed to convert rate limiting: %w", err) + } + config.RateLimiting = rateLimiting + } // Apply operational defaults (fills missing values) config.EnsureOperationalDefaults() @@ -158,6 +166,22 @@ func (c *Converter) Convert( return config, authServerRC, nil } +func convertRateLimiting(rateLimiting *mcpv1beta1.RateLimitConfig) (*thvjson.Map, error) { + if rateLimiting == nil { + return nil, nil + } + raw, err := json.Marshal(rateLimiting) + if err != nil { + return nil, err + } + var value map[string]any + if err := json.Unmarshal(raw, &value); err != nil { + return nil, err + } + converted := thvjson.NewMap(value) + return &converted, nil +} + // convertIncomingAuth converts IncomingAuthConfig from CRD to vmcp config. func (c *Converter) convertIncomingAuth( ctx context.Context, diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go index 0fdb67443b..6e195e9b9e 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go @@ -127,10 +127,65 @@ func TestConverter_RateLimitingFromTopLevelSpec(t *testing.T) { cfg, _, err := converter.Convert(context.Background(), vmcp, nil) require.NoError(t, err) require.NotNil(t, cfg.RateLimiting) - require.NotNil(t, cfg.RateLimiting.Global) - assert.Equal(t, int32(10), cfg.RateLimiting.Global.MaxTokens) - require.Len(t, cfg.RateLimiting.Tools, 1) - assert.Equal(t, "backend_a_echo", cfg.RateLimiting.Tools[0].Name) + + rateLimiting := cfg.RateLimiting.Get() + global, ok := rateLimiting["global"].(map[string]any) + require.True(t, ok) + assert.EqualValues(t, 10, global["maxTokens"]) + tools, ok := rateLimiting["tools"].([]any) + require.True(t, ok) + require.Len(t, tools, 1) + tool, ok := tools[0].(map[string]any) + require.True(t, ok) + assert.Equal(t, "backend_a_echo", tool["name"]) +} + +func TestConverter_RateLimitingPreservesSharedAliasAndCopiesInput(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1beta1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test-vmcp", Namespace: "default"}, + Spec: mcpv1beta1.VirtualMCPServerSpec{ + GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, + IncomingAuth: &mcpv1beta1.IncomingAuthConfig{Type: "anonymous"}, + RateLimiting: &mcpv1beta1.RateLimitConfig{ + Shared: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 10, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []mcpv1beta1.ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Shared: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Second}, + }, + }, + }, + }, + }, + } + + converter := newTestConverter(t, newNoOpMockResolver(t)) + cfg, _, err := converter.Convert(context.Background(), vmcp, nil) + require.NoError(t, err) + require.NotNil(t, cfg.RateLimiting) + + vmcp.Spec.RateLimiting.Shared.MaxTokens = 99 + vmcp.Spec.RateLimiting.Tools[0].Shared.MaxTokens = 88 + + rateLimiting := cfg.RateLimiting.Get() + shared, ok := rateLimiting["shared"].(map[string]any) + require.True(t, ok) + assert.EqualValues(t, 10, shared["maxTokens"]) + tools, ok := rateLimiting["tools"].([]any) + require.True(t, ok) + require.Len(t, tools, 1) + tool, ok := tools[0].(map[string]any) + require.True(t, ok) + toolShared, ok := tool["shared"].(map[string]any) + require.True(t, ok) + assert.EqualValues(t, 1, toolShared["maxTokens"]) } func TestConverter_OIDCResolution(t *testing.T) { diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go index b91f9d021d..c4a3484bd2 100644 --- a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go +++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go @@ -466,6 +466,7 @@ var _ = Describe("EmbeddingServer Controller Update Tests", func() { Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed()) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed()) + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &corev1.Service{})).To(Succeed()) }, timeout, interval).Should(Succeed()) }) diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go index df614f32a6..dc00607240 100644 --- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go +++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go @@ -147,8 +147,11 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer", Address: "redis:6379", }) vmcp.Spec.IncomingAuth = &mcpv1beta1.IncomingAuthConfig{ - Type: "oidc", - OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{Name: "oidc"}, + Type: "oidc", + OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{ + Name: "oidc", + Audience: "test-audience", + }, } vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ PerUser: &mcpv1beta1.RateLimitBucket{ diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index d7313e9135..2b66ac030d 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -233,25 +233,6 @@ _Appears in:_ | `primaryUpstreamProvider` _string_ | PrimaryUpstreamProvider names the upstream IDP provider whose access
token should be used as the source of JWT claims for Cedar evaluation.
When empty, claims from the ToolHive-issued token are used.
Must match an upstream provider name configured in the embedded auth server
(e.g. "default", "github"). Only relevant when the embedded auth server is active. | | Optional: \{\}
| -#### ratelimit.config.Bucket - - - -Bucket defines a token bucket configuration with a maximum capacity and a -refill period. - - - -_Appears in:_ -- [ratelimit.config.Config](#ratelimitconfigconfig) -- [ratelimit.config.ToolConfig](#ratelimitconfigtoolconfig) - -| Field | Description | Default | Validation | -| --- | --- | --- | --- | -| `maxTokens` _integer_ | MaxTokens is the maximum number of tokens. | | | -| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket. | | | - - #### vmcp.config.CircuitBreakerConfig @@ -659,25 +640,6 @@ _Appears in:_ -#### ratelimit.config.ToolConfig - - - -ToolConfig defines rate limits for a specific tool. - - - -_Appears in:_ -- [ratelimit.config.Config](#ratelimitconfigconfig) - -| Field | Description | Default | Validation | -| --- | --- | --- | --- | -| `name` _string_ | Name is the MCP tool name this limit applies to. | | | -| `global` _[ratelimit.config.Bucket](#ratelimitconfigbucket)_ | Global is a token bucket shared across all users for this tool. | | | -| `shared` _[ratelimit.config.Bucket](#ratelimitconfigbucket)_ | Shared is a deprecated alias for Global. | | | -| `perUser` _[ratelimit.config.Bucket](#ratelimitconfigbucket)_ | PerUser is a token bucket applied independently to each authenticated user
for this tool. | | | - - #### vmcp.config.ToolConfigRef diff --git a/pkg/ratelimit/config/config.go b/pkg/ratelimit/config/config.go deleted file mode 100644 index 38aaa01780..0000000000 --- a/pkg/ratelimit/config/config.go +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. -// SPDX-License-Identifier: Apache-2.0 - -// Package config defines runtime-neutral rate limiting configuration. -package config - -import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - -// Config defines rate limiting configuration for an MCP server. -// +gendoc -type Config struct { - // Global is a token bucket shared across all users for the entire server. - Global *Bucket `json:"global,omitempty" yaml:"global,omitempty"` - - // Shared is a deprecated alias for Global. It is kept for compatibility with - // older CRDs and config files that used "shared" before the RFC settled on - // "global". - Shared *Bucket `json:"shared,omitempty" yaml:"shared,omitempty"` - - // PerUser is a token bucket applied independently to each authenticated user - // at the server level. - PerUser *Bucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` - - // Tools defines per-tool rate limits. - Tools []ToolConfig `json:"tools,omitempty" yaml:"tools,omitempty"` -} - -// EffectiveGlobal returns the configured server-level global bucket, including -// the deprecated shared alias. -func (c *Config) EffectiveGlobal() *Bucket { - if c == nil { - return nil - } - if c.Global != nil { - return c.Global - } - return c.Shared -} - -// Bucket defines a token bucket configuration with a maximum capacity and a -// refill period. -// +gendoc -type Bucket struct { - // MaxTokens is the maximum number of tokens. - MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"` - - // RefillPeriod is the duration to fully refill the bucket. - RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"` -} - -// ToolConfig defines rate limits for a specific tool. -// +gendoc -type ToolConfig struct { - // Name is the MCP tool name this limit applies to. - Name string `json:"name" yaml:"name"` - - // Global is a token bucket shared across all users for this tool. - Global *Bucket `json:"global,omitempty" yaml:"global,omitempty"` - - // Shared is a deprecated alias for Global. - Shared *Bucket `json:"shared,omitempty" yaml:"shared,omitempty"` - - // PerUser is a token bucket applied independently to each authenticated user - // for this tool. - PerUser *Bucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` -} - -// EffectiveGlobal returns the configured tool-level global bucket, including -// the deprecated shared alias. -func (c *ToolConfig) EffectiveGlobal() *Bucket { - if c == nil { - return nil - } - if c.Global != nil { - return c.Global - } - return c.Shared -} - -// DeepCopyInto copies the receiver into out. -func (in *Config) DeepCopyInto(out *Config) { - *out = *in - if in.Global != nil { - out.Global = in.Global.DeepCopy() - } - if in.Shared != nil { - out.Shared = in.Shared.DeepCopy() - } - if in.PerUser != nil { - out.PerUser = in.PerUser.DeepCopy() - } - if in.Tools != nil { - out.Tools = make([]ToolConfig, len(in.Tools)) - for i := range in.Tools { - in.Tools[i].DeepCopyInto(&out.Tools[i]) - } - } -} - -// DeepCopy returns a copy of the receiver. -func (in *Config) DeepCopy() *Config { - if in == nil { - return nil - } - out := new(Config) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto copies the receiver into out. -func (in *Bucket) DeepCopyInto(out *Bucket) { - *out = *in -} - -// DeepCopy returns a copy of the receiver. -func (in *Bucket) DeepCopy() *Bucket { - if in == nil { - return nil - } - out := new(Bucket) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto copies the receiver into out. -func (in *ToolConfig) DeepCopyInto(out *ToolConfig) { - *out = *in - if in.Global != nil { - out.Global = in.Global.DeepCopy() - } - if in.Shared != nil { - out.Shared = in.Shared.DeepCopy() - } - if in.PerUser != nil { - out.PerUser = in.PerUser.DeepCopy() - } -} - -// DeepCopy returns a copy of the receiver. -func (in *ToolConfig) DeepCopy() *ToolConfig { - if in == nil { - return nil - } - out := new(ToolConfig) - in.DeepCopyInto(out) - return out -} diff --git a/pkg/ratelimit/config/doc.go b/pkg/ratelimit/config/doc.go deleted file mode 100644 index ef361de965..0000000000 --- a/pkg/ratelimit/config/doc.go +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. -// SPDX-License-Identifier: Apache-2.0 - -// Package config defines runtime-neutral rate limiting configuration. -// -// +groupName=toolhive.stacklok.dev -// +versionName=config -package config diff --git a/pkg/ratelimit/limiter.go b/pkg/ratelimit/limiter.go index 4a5e654912..6679d6db4a 100644 --- a/pkg/ratelimit/limiter.go +++ b/pkg/ratelimit/limiter.go @@ -14,7 +14,7 @@ import ( "github.com/redis/go-redis/v9" - rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" + v1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" "github.com/stacklok/toolhive/pkg/ratelimit/internal/bucket" ) @@ -39,7 +39,7 @@ type Decision struct { // NewLimiter constructs a Limiter from CRD configuration. // Returns a no-op limiter (always allows) when crd is nil. // namespace and name identify the MCP server for Redis key derivation. -func NewLimiter(client redis.Cmdable, namespace, name string, crd *rlconfig.Config) (Limiter, error) { +func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateLimitConfig) (Limiter, error) { if crd == nil { return noopLimiter{}, nil } @@ -88,7 +88,7 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *rlconfig.Conf return l, nil } -func serverGlobalBucket(crd *rlconfig.Config) (*rlconfig.Bucket, string) { +func serverGlobalBucket(crd *v1beta1.RateLimitConfig) (*v1beta1.RateLimitBucket, string) { if crd.Global != nil { return crd.Global, "global" } @@ -98,7 +98,7 @@ func serverGlobalBucket(crd *rlconfig.Config) (*rlconfig.Bucket, string) { return nil, "" } -func toolGlobalBucket(tool *rlconfig.ToolConfig) (*rlconfig.Bucket, string) { +func toolGlobalBucket(tool *v1beta1.ToolRateLimitConfig) (*v1beta1.RateLimitBucket, string) { if tool.Global != nil { return tool.Global, "global" } @@ -197,7 +197,7 @@ func (noopLimiter) Allow(context.Context, string, string) (*Decision, error) { } // validateBucketCRD checks that a CRD bucket spec has valid parameters. -func validateBucketCRD(b *rlconfig.Bucket) (int32, time.Duration, error) { +func validateBucketCRD(b *v1beta1.RateLimitBucket) (int32, time.Duration, error) { if b.MaxTokens < 1 { return 0, 0, fmt.Errorf("maxTokens must be >= 1, got %d", b.MaxTokens) } @@ -209,7 +209,7 @@ func validateBucketCRD(b *rlconfig.Bucket) (int32, time.Duration, error) { } // newBucket validates a CRD bucket spec and creates a TokenBucket. -func newBucket(namespace, serverName, suffix string, b *rlconfig.Bucket) (*bucket.TokenBucket, error) { +func newBucket(namespace, serverName, suffix string, b *v1beta1.RateLimitBucket) (*bucket.TokenBucket, error) { maxTokens, refillPeriod, err := validateBucketCRD(b) if err != nil { return nil, err @@ -219,7 +219,7 @@ func newBucket(namespace, serverName, suffix string, b *rlconfig.Bucket) (*bucke // newBucketSpec validates a CRD bucket spec and creates a deferred bucketSpec // for per-user buckets that are materialized at Allow() time. -func newBucketSpec(namespace, serverName string, b *rlconfig.Bucket) (bucketSpec, error) { +func newBucketSpec(namespace, serverName string, b *v1beta1.RateLimitBucket) (bucketSpec, error) { maxTokens, refillPeriod, err := validateBucketCRD(b) if err != nil { return bucketSpec{}, err diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go index e5e921aea8..aea2b1636a 100644 --- a/pkg/ratelimit/limiter_test.go +++ b/pkg/ratelimit/limiter_test.go @@ -49,7 +49,7 @@ func TestNewLimiter_ZeroMaxTokens(t *testing.T) { }, } - _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + _, err := NewLimiter(client, "ns", "srv", crd) assert.Error(t, err) assert.Contains(t, err.Error(), "maxTokens must be >= 1") } @@ -65,7 +65,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) { }, } - _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + _, err := NewLimiter(client, "ns", "srv", crd) assert.Error(t, err) assert.Contains(t, err.Error(), "refillPeriod must be positive") } @@ -78,7 +78,7 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) { crd := &v1beta1.RateLimitConfig{ Shared: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) for range 2 { @@ -107,7 +107,7 @@ func TestLimiter_SharedAliasUsesLegacyRedisKeys(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) d, err := l.Allow(ctx, "search", "") @@ -142,7 +142,7 @@ func TestLimiter_GlobalUsesGlobalRedisKeys(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) d, err := l.Allow(ctx, "search", "") @@ -170,7 +170,7 @@ func TestLimiter_PerToolIsolation(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) d, err := l.Allow(ctx, "search", "") @@ -201,7 +201,7 @@ func TestLimiter_ServerAndPerToolBothRequired(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) for range 2 { @@ -228,7 +228,7 @@ func TestLimiter_RedisUnavailableReturnsError(t *testing.T) { crd := &v1beta1.RateLimitConfig{ Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) mr.Close() @@ -245,7 +245,7 @@ func TestLimiter_PerUserServerLevel(t *testing.T) { crd := &v1beta1.RateLimitConfig{ PerUser: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) // User A exhausts their 2 tokens. @@ -265,28 +265,6 @@ func TestLimiter_PerUserServerLevel(t *testing.T) { assert.True(t, d.Allowed) } -func TestLimiter_PerUserServerLevelSharedAcrossBackendTools(t *testing.T) { - t.Parallel() - client, _ := newTestClient(t) - ctx := t.Context() - - crd := &v1beta1.RateLimitConfig{ - PerUser: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - } - l, err := NewLimiter(client, "ns", "vmcp", crd.ToInternal()) - require.NoError(t, err) - - for _, toolName := range []string{"backend_a_echo", "backend_b_search"} { - d, allowErr := l.Allow(ctx, toolName, "user-a") - require.NoError(t, allowErr) - require.True(t, d.Allowed) - } - - d, err := l.Allow(ctx, "backend_c_status", "user-a") - require.NoError(t, err) - assert.False(t, d.Allowed, "server-level per-user limit must be shared across all backend tools") -} - func TestLimiter_PerToolPerUserIsolation(t *testing.T) { t.Parallel() client, _ := newTestClient(t) @@ -300,7 +278,7 @@ func TestLimiter_PerToolPerUserIsolation(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) // User A uses their 1 token for "search". @@ -338,7 +316,7 @@ func TestLimiter_ServerAndToolPerUserBothRequired(t *testing.T) { }, }, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) // User A makes 2 "search" calls — both pass. @@ -359,66 +337,6 @@ func TestLimiter_ServerAndToolPerUserBothRequired(t *testing.T) { assert.True(t, d.Allowed) } -func TestLimiter_PerToolUsesPostAggregationName(t *testing.T) { - t.Parallel() - client, _ := newTestClient(t) - ctx := t.Context() - - crd := &v1beta1.RateLimitConfig{ - Tools: []v1beta1.ToolRateLimitConfig{ - { - Name: "backend_a_expensive_tool", - Global: &v1beta1.RateLimitBucket{MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - }, - }, - } - l, err := NewLimiter(client, "ns", "vmcp", crd.ToInternal()) - require.NoError(t, err) - - d, err := l.Allow(ctx, "backend_a_expensive_tool", "user-a") - require.NoError(t, err) - require.True(t, d.Allowed) - - d, err = l.Allow(ctx, "backend_a_expensive_tool", "user-b") - require.NoError(t, err) - assert.False(t, d.Allowed, "global per-tool limit should apply to the post-aggregation tool name") - - d, err = l.Allow(ctx, "expensive_tool", "user-a") - require.NoError(t, err) - assert.True(t, d.Allowed, "unprefixed backend name should not match the vMCP post-aggregation limit") -} - -func TestLimiter_GlobalAndPerUserBothEnforced(t *testing.T) { - t.Parallel() - client, _ := newTestClient(t) - ctx := t.Context() - - crd := &v1beta1.RateLimitConfig{ - Global: &v1beta1.RateLimitBucket{MaxTokens: 3, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - PerUser: &v1beta1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - } - l, err := NewLimiter(client, "ns", "vmcp", crd.ToInternal()) - require.NoError(t, err) - - for range 2 { - d, allowErr := l.Allow(ctx, "backend_a_echo", "user-a") - require.NoError(t, allowErr) - require.True(t, d.Allowed) - } - - d, err := l.Allow(ctx, "backend_a_echo", "user-a") - require.NoError(t, err) - assert.False(t, d.Allowed, "per-user limit should reject user-a's third call") - - d, err = l.Allow(ctx, "backend_b_echo", "user-b") - require.NoError(t, err) - require.True(t, d.Allowed, "global bucket should still have one token because rejected calls do not drain") - - d, err = l.Allow(ctx, "backend_b_echo", "user-c") - require.NoError(t, err) - assert.False(t, d.Allowed, "global limit should reject after total allowed calls are exhausted") -} - func TestLimiter_PerUserRejectionDoesNotDrainShared(t *testing.T) { t.Parallel() client, _ := newTestClient(t) @@ -430,7 +348,7 @@ func TestLimiter_PerUserRejectionDoesNotDrainShared(t *testing.T) { Shared: &v1beta1.RateLimitBucket{MaxTokens: 3, RefillPeriod: metav1.Duration{Duration: time.Minute}}, PerUser: &v1beta1.RateLimitBucket{MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) // User A: first call passes (shared=2, user-a=0). @@ -465,7 +383,7 @@ func TestLimiter_RedisUnavailablePerUser(t *testing.T) { crd := &v1beta1.RateLimitConfig{ PerUser: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - l, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + l, err := NewLimiter(client, "ns", "srv", crd) require.NoError(t, err) mr.Close() @@ -481,7 +399,7 @@ func TestNewLimiter_PerUserZeroMaxTokens(t *testing.T) { crd := &v1beta1.RateLimitConfig{ PerUser: &v1beta1.RateLimitBucket{MaxTokens: 0, RefillPeriod: metav1.Duration{Duration: time.Minute}}, } - _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + _, err := NewLimiter(client, "ns", "srv", crd) assert.Error(t, err) assert.Contains(t, err.Error(), "perUser bucket: maxTokens must be >= 1") } @@ -498,7 +416,7 @@ func TestNewLimiter_ToolPerUserZeroDuration(t *testing.T) { }, }, } - _, err := NewLimiter(client, "ns", "srv", crd.ToInternal()) + _, err := NewLimiter(client, "ns", "srv", crd) assert.Error(t, err) assert.Contains(t, err.Error(), `tool "search" perUser bucket: refillPeriod must be positive`) } diff --git a/pkg/ratelimit/middleware.go b/pkg/ratelimit/middleware.go index a472923b59..ee108fdf87 100644 --- a/pkg/ratelimit/middleware.go +++ b/pkg/ratelimit/middleware.go @@ -15,11 +15,10 @@ import ( "github.com/redis/go-redis/v9" + v1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" "github.com/stacklok/toolhive/pkg/auth" "github.com/stacklok/toolhive/pkg/mcp" - rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/transport/types" - "github.com/stacklok/toolhive/pkg/vmcp/session/optimizerdec" ) const ( @@ -40,11 +39,11 @@ const ( // MiddlewareParams holds the parameters for the rate limit middleware factory. type MiddlewareParams struct { - Namespace string `json:"namespace"` - ServerName string `json:"server_name"` - Config *rlconfig.Config `json:"config"` - RedisAddr string `json:"redis_addr,omitempty"` - RedisDB int32 `json:"redis_db,omitempty"` + Namespace string `json:"namespace"` + ServerName string `json:"server_name"` + Config *v1beta1.RateLimitConfig `json:"config"` + RedisAddr string `json:"redis_addr,omitempty"` + RedisDB int32 `json:"redis_db,omitempty"` } // rateLimitMiddleware wraps rate limiting functionality for the factory pattern. @@ -100,16 +99,16 @@ func CreateMiddleware(config *types.MiddlewareConfig, runner types.MiddlewareRun } mw := &rateLimitMiddleware{ - handler: HTTPMiddleware(limiter), + handler: rateLimitHandler(limiter), client: client, } runner.AddMiddleware(MiddlewareType, mw) return nil } -// HTTPMiddleware returns a middleware function that enforces rate limits +// rateLimitHandler returns a middleware function that enforces rate limits // on tools/call requests. -func HTTPMiddleware(limiter Limiter) types.MiddlewareFunction { +func rateLimitHandler(limiter Limiter) types.MiddlewareFunction { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Rate limits only apply to parsed tools/call requests. @@ -128,8 +127,7 @@ func HTTPMiddleware(limiter Limiter) types.MiddlewareFunction { if identity, ok := auth.IdentityFromContext(r.Context()); ok { userID = identity.Subject } - toolName := rateLimitToolName(parsed) - decision, err := limiter.Allow(r.Context(), toolName, userID) + decision, err := limiter.Allow(r.Context(), parsed.ResourceID, userID) if err != nil { slog.Warn("rate limit check failed, allowing request", "error", err) next.ServeHTTP(w, r) @@ -144,18 +142,6 @@ func HTTPMiddleware(limiter Limiter) types.MiddlewareFunction { } } -func rateLimitToolName(parsed *mcp.ParsedMCPRequest) string { - if parsed == nil { - return "" - } - if parsed.ResourceID == optimizerdec.CallToolName && parsed.Arguments != nil { - if toolName, ok := parsed.Arguments[optimizerdec.CallToolArgToolName].(string); ok && toolName != "" { - return toolName - } - } - return parsed.ResourceID -} - // writeRateLimited writes an HTTP 429 response with a JSON-RPC error body. func writeRateLimited(w http.ResponseWriter, requestID any, retryAfter time.Duration) { retrySeconds := int(math.Ceil(retryAfter.Seconds())) diff --git a/pkg/ratelimit/middleware_test.go b/pkg/ratelimit/middleware_test.go index c526665783..ed76e72e0c 100644 --- a/pkg/ratelimit/middleware_test.go +++ b/pkg/ratelimit/middleware_test.go @@ -61,23 +61,11 @@ func withParsedMCPRequest(r *http.Request, method, resourceID string, id any) *h return r.WithContext(ctx) } -func withParsedToolCall(r *http.Request, resourceID string, arguments map[string]interface{}, id any) *http.Request { - parsed := &mcp.ParsedMCPRequest{ - Method: "tools/call", - ResourceID: resourceID, - Arguments: arguments, - ID: id, - IsRequest: true, - } - ctx := context.WithValue(r.Context(), mcp.MCPRequestContextKey, parsed) - return r.WithContext(ctx) -} - func TestRateLimitHandler_ToolCallAllowed(t *testing.T) { t.Parallel() limiter := &dummyLimiter{decision: &Decision{Allowed: true}} - handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -94,7 +82,7 @@ func TestRateLimitHandler_ToolCallRejected(t *testing.T) { t.Parallel() limiter := &dummyLimiter{decision: &Decision{Allowed: false, RetryAfter: 5 * time.Second}} - handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { + handler := rateLimitHandler(limiter)(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { t.Fatal("next handler should not be called when rate limited") })) @@ -126,7 +114,7 @@ func TestRateLimitHandler_RedisErrorFailOpen(t *testing.T) { limiter := &dummyLimiter{err: errors.New("redis connection refused")} nextCalled := false - handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { nextCalled = true w.WriteHeader(http.StatusOK) })) @@ -146,7 +134,7 @@ func TestRateLimitHandler_NoParsedMCPRequest_PassesThrough(t *testing.T) { limiter := &dummyLimiter{decision: &Decision{Allowed: false}} nextCalled := false - handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { nextCalled = true w.WriteHeader(http.StatusOK) })) @@ -166,7 +154,7 @@ func TestRateLimitHandler_NonToolCallPassesThrough(t *testing.T) { limiter := &dummyLimiter{decision: &Decision{Allowed: false, RetryAfter: time.Second}} nextCalled := false - handler := HTTPMiddleware(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := rateLimitHandler(limiter)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { nextCalled = true w.WriteHeader(http.StatusOK) })) @@ -185,7 +173,7 @@ func TestRateLimitHandler_PassesUserID(t *testing.T) { t.Parallel() recorder := &recordingLimiter{} - handler := HTTPMiddleware(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := rateLimitHandler(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -201,36 +189,11 @@ func TestRateLimitHandler_PassesUserID(t *testing.T) { assert.Equal(t, "alice@example.com", recorder.userID) } -func TestRateLimitHandler_OptimizerCallToolUsesInnerToolName(t *testing.T) { - t.Parallel() - - recorder := &recordingLimiter{} - handler := HTTPMiddleware(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - })) - - req := httptest.NewRequest(http.MethodPost, "/mcp", nil) - req = withParsedToolCall(req, "call_tool", map[string]interface{}{ - "tool_name": "backend_a_expensive_tool", - "parameters": map[string]interface{}{ - "query": "test", - }, - }, 1) - req = withIdentity(req, "alice@example.com") - w := httptest.NewRecorder() - - handler.ServeHTTP(w, req) - - assert.Equal(t, http.StatusOK, w.Code) - assert.Equal(t, "backend_a_expensive_tool", recorder.toolName) - assert.Equal(t, "alice@example.com", recorder.userID) -} - func TestRateLimitHandler_NoIdentityPassesEmptyUserID(t *testing.T) { t.Parallel() recorder := &recordingLimiter{} - handler := HTTPMiddleware(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := rateLimitHandler(recorder)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) diff --git a/pkg/runner/middleware.go b/pkg/runner/middleware.go index 0d711e4439..be9dd33506 100644 --- a/pkg/runner/middleware.go +++ b/pkg/runner/middleware.go @@ -434,7 +434,7 @@ func addRateLimitMiddleware(middlewares []types.MiddlewareConfig, config *RunCon params := ratelimit.MiddlewareParams{ Namespace: config.RateLimitNamespace, ServerName: config.Name, - Config: config.RateLimitConfig.ToInternal(), + Config: config.RateLimitConfig, RedisAddr: redisAddr, RedisDB: redisDB, } diff --git a/pkg/vmcp/cli/serve.go b/pkg/vmcp/cli/serve.go index 72b72148e6..ffb501115c 100644 --- a/pkg/vmcp/cli/serve.go +++ b/pkg/vmcp/cli/serve.go @@ -386,8 +386,6 @@ func Serve(ctx context.Context, cfg ServeConfig) error { OptimizerConfig: optCfg, SessionFactory: sessionFactory, SessionStorage: vmcpCfg.SessionStorage, - RateLimiting: vmcpCfg.RateLimiting, - RateLimitNamespace: os.Getenv("VMCP_NAMESPACE"), } // Assign Watcher only when backendWatcher is non-nil. A typed nil diff --git a/pkg/vmcp/config/config.go b/pkg/vmcp/config/config.go index 9deabf6cd0..4d46cf9032 100644 --- a/pkg/vmcp/config/config.go +++ b/pkg/vmcp/config/config.go @@ -15,7 +15,6 @@ import ( "github.com/stacklok/toolhive/pkg/audit" thvjson "github.com/stacklok/toolhive/pkg/json" - rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/telemetry" "github.com/stacklok/toolhive/pkg/vmcp" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" @@ -181,7 +180,7 @@ type Config struct { // This remains YAML-only so it can be written to the runtime config ConfigMap // without exposing spec.config.rateLimiting in the Kubernetes API. // +optional - RateLimiting *rlconfig.Config `json:"-" yaml:"rateLimiting,omitempty"` + RateLimiting *thvjson.Map `json:"-" yaml:"rateLimiting,omitempty"` } // IncomingAuthConfig configures client authentication to the virtual MCP server. diff --git a/pkg/vmcp/config/config_test.go b/pkg/vmcp/config/config_test.go index 5f652ebce0..da9e88aeda 100644 --- a/pkg/vmcp/config/config_test.go +++ b/pkg/vmcp/config/config_test.go @@ -4,6 +4,7 @@ package config import ( + "encoding/json" "fmt" "os" "path/filepath" @@ -15,10 +16,36 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" + thvjson "github.com/stacklok/toolhive/pkg/json" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" ) +func TestConfigRateLimitingIsYAMLOnly(t *testing.T) { + t.Parallel() + + rateLimiting := thvjson.NewMap(map[string]any{ + "global": map[string]any{ + "maxTokens": 10, + "refillPeriod": "1m0s", + }, + }) + cfg := Config{ + Name: "test-vmcp", + RateLimiting: &rateLimiting, + } + + jsonBytes, err := json.Marshal(cfg) + require.NoError(t, err) + assert.NotContains(t, string(jsonBytes), "rateLimiting") + + yamlBytes, err := yaml.Marshal(cfg) + require.NoError(t, err) + assert.Contains(t, string(yamlBytes), "rateLimiting:") + assert.Contains(t, string(yamlBytes), "maxTokens: 10") +} + func TestOutgoingAuthConfig_ResolveForBackend(t *testing.T) { t.Parallel() diff --git a/pkg/vmcp/server/ratelimit.go b/pkg/vmcp/server/ratelimit.go deleted file mode 100644 index 95d0c2f16e..0000000000 --- a/pkg/vmcp/server/ratelimit.go +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. -// SPDX-License-Identifier: Apache-2.0 - -package server - -import ( - "context" - "fmt" - "log/slog" - "os" - "strings" - "time" - - "github.com/redis/go-redis/v9" - - "github.com/stacklok/toolhive/pkg/ratelimit" - transporttypes "github.com/stacklok/toolhive/pkg/transport/types" - vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config" -) - -type httpMiddleware = transporttypes.MiddlewareFunction - -type shutdownFunc func(context.Context) error - -func (s *Server) buildRateLimitMiddleware(ctx context.Context) (httpMiddleware, shutdownFunc, error) { - if s.config.RateLimiting == nil { - return nil, nil, nil - } - if s.config.SessionStorage == nil || - !strings.EqualFold(s.config.SessionStorage.Provider, "redis") || - s.config.SessionStorage.Address == "" { - return nil, nil, fmt.Errorf("rate limiting requires sessionStorage with provider redis") - } - - client := redis.NewClient(&redis.Options{ - Addr: s.config.SessionStorage.Address, - DB: int(s.config.SessionStorage.DB), - Password: os.Getenv(vmcpconfig.RedisPasswordEnvVar), - }) - - pingCtx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - if err := client.Ping(pingCtx).Err(); err != nil { - _ = client.Close() - return nil, nil, fmt.Errorf("rate limit middleware: failed to connect to Redis at %s: %w", - s.config.SessionStorage.Address, err) - } - - namespace := s.config.RateLimitNamespace - if namespace == "" { - namespace = os.Getenv("VMCP_NAMESPACE") - } - if namespace == "" { - namespace = "default" - } - - limiter, err := ratelimit.NewLimiter(client, namespace, s.config.Name, s.config.RateLimiting) - if err != nil { - _ = client.Close() - return nil, nil, fmt.Errorf("failed to create rate limiter: %w", err) - } - - slog.Info("rate limit middleware enabled for vMCP", - "namespace", namespace, - "name", s.config.Name, - "redis_addr", s.config.SessionStorage.Address, - "redis_db", s.config.SessionStorage.DB) - - return ratelimit.HTTPMiddleware(limiter), func(context.Context) error { - return client.Close() - }, nil -} diff --git a/pkg/vmcp/server/ratelimit_test.go b/pkg/vmcp/server/ratelimit_test.go deleted file mode 100644 index a804a1ace9..0000000000 --- a/pkg/vmcp/server/ratelimit_test.go +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. -// SPDX-License-Identifier: Apache-2.0 - -package server - -import ( - "context" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/alicebob/miniredis/v2" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/stacklok/toolhive/pkg/auth" - mcpparser "github.com/stacklok/toolhive/pkg/mcp" - rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" - vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config" -) - -func TestBuildRateLimitMiddleware_EnforcesToolsCall(t *testing.T) { - t.Parallel() - - redisServer := miniredis.RunT(t) - srv := &Server{ - config: &Config{ - Name: "vmcp-a", - RateLimitNamespace: "default", - SessionStorage: &vmcpconfig.SessionStorageConfig{ - Provider: "redis", - Address: redisServer.Addr(), - }, - RateLimiting: &rlconfig.Config{ - PerUser: &rlconfig.Bucket{ - MaxTokens: 1, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - }, - }, - } - - middleware, closeFn, err := srv.buildRateLimitMiddleware(context.Background()) - require.NoError(t, err) - require.NotNil(t, middleware) - t.Cleanup(func() { - if closeFn != nil { - require.NoError(t, closeFn(context.Background())) - } - }) - - handler := middleware(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - })) - - req := httptest.NewRequest(http.MethodPost, "/mcp", nil) - req = req.WithContext(auth.WithIdentity(req.Context(), &auth.Identity{ - PrincipalInfo: auth.PrincipalInfo{Subject: "alice"}, - })) - req = req.WithContext(context.WithValue(req.Context(), mcpparser.MCPRequestContextKey, &mcpparser.ParsedMCPRequest{ - Method: "tools/call", - ResourceID: "backend_a_echo", - ID: 1, - IsRequest: true, - })) - - first := httptest.NewRecorder() - handler.ServeHTTP(first, req) - assert.Equal(t, http.StatusOK, first.Code) - - second := httptest.NewRecorder() - handler.ServeHTTP(second, req) - assert.Equal(t, http.StatusTooManyRequests, second.Code) -} diff --git a/pkg/vmcp/server/server.go b/pkg/vmcp/server/server.go index 694e43b25d..23c415a551 100644 --- a/pkg/vmcp/server/server.go +++ b/pkg/vmcp/server/server.go @@ -28,7 +28,6 @@ import ( "github.com/stacklok/toolhive/pkg/auth" asrunner "github.com/stacklok/toolhive/pkg/authserver/runner" mcpparser "github.com/stacklok/toolhive/pkg/mcp" - rlconfig "github.com/stacklok/toolhive/pkg/ratelimit/config" "github.com/stacklok/toolhive/pkg/recovery" "github.com/stacklok/toolhive/pkg/telemetry" transportmiddleware "github.com/stacklok/toolhive/pkg/transport/middleware" @@ -182,12 +181,6 @@ type Config struct { // session persistence; the Redis password is read from the // THV_SESSION_REDIS_PASSWORD environment variable. SessionStorage *vmcpconfig.SessionStorageConfig - - // RateLimiting configures rate limiting for incoming tools/call requests. - RateLimiting *rlconfig.Config - - // RateLimitNamespace is used with Name to derive Redis keys. - RateLimitNamespace string } // Server is the Virtual MCP Server that aggregates multiple backends. @@ -532,7 +525,7 @@ func New( // Each call builds a fresh handler. The method is safe to call multiple times. // All returned handlers share the same underlying MCPServer and SessionManager, // so callers should not serve concurrent traffic through multiple handlers. -func (s *Server) Handler(ctx context.Context) (http.Handler, error) { +func (s *Server) Handler(_ context.Context) (http.Handler, error) { // Create Streamable HTTP server with ToolHive session management streamableServer := server.NewStreamableHTTPServer( s.mcpServer, @@ -544,25 +537,6 @@ func (s *Server) Handler(ctx context.Context) (http.Handler, error) { // Create HTTP mux with separated authenticated and unauthenticated routes mux := http.NewServeMux() - s.registerHTTPRoutes(mux) - - // MCP endpoint - apply middleware chain (wrapping order, execution happens in reverse): - // Code wraps: auth → MCP-parsing → rate-limit → audit → discovery → - // annotation-enrichment → authz → backend-enrichment → telemetry - // Execution order: recovery → header-val → auth → MCP-parsing → - // rate-limit → audit → discovery → annotation-enrichment → authz → - // backend-enrichment → telemetry → handler - mcpHandler, err := s.buildMCPHandler(ctx, streamableServer) - if err != nil { - return nil, err - } - - mux.Handle("/", mcpHandler) - - return mux, nil -} - -func (s *Server) registerHTTPRoutes(mux *http.ServeMux) { // Unauthenticated health endpoints mux.HandleFunc("/health", s.handleHealth) mux.HandleFunc("/ping", s.handleHealth) @@ -596,56 +570,32 @@ func (s *Server) registerHTTPRoutes(mux *http.ServeMux) { s.config.AuthServer.RegisterHandlers(mux) slog.Debug("embedded authorization server routes registered") } -} - -func (s *Server) buildMCPHandler(ctx context.Context, base http.Handler) (http.Handler, error) { - mcpHandler := s.wrapMCPHandler(base) - - if err := s.applyAuditMiddleware(&mcpHandler); err != nil { - return nil, err - } - if err := s.applyRateLimitMiddleware(ctx, &mcpHandler); err != nil { - return nil, err - } - - // Apply MCP parsing middleware to extract JSON-RPC method from request body. - // This must run before rate limiting so global limits also work when auth is - // disabled, and before telemetry so metrics can use the real MCP method. - mcpHandler = mcpparser.ParsingMiddleware(mcpHandler) - - // Apply authentication middleware if configured (runs first in chain) - if s.config.AuthMiddleware != nil { - mcpHandler = s.config.AuthMiddleware(mcpHandler) - slog.Info("authentication middleware enabled for MCP endpoints") - } - - // Apply Accept header validation (rejects GET requests without Accept: text/event-stream) - mcpHandler = headerValidatingMiddleware(mcpHandler) - // Clear the write deadline for qualifying SSE connections (GET + - // Accept: text/event-stream + MCP endpoint path) so the server-level - // WriteTimeout does not kill long-lived SSE streams (see golang/go#16100). - // Non-qualifying requests are left untouched; http.Server.WriteTimeout - // (defaultWriteTimeout) remains in effect for them. - mcpHandler = transportmiddleware.WriteTimeout(s.config.EndpointPath)(mcpHandler) - - // Apply recovery middleware as outermost (catches panics from all inner middleware) - mcpHandler = recovery.Middleware(mcpHandler) - slog.Info("recovery middleware enabled for MCP endpoints") - - return mcpHandler, nil -} + // MCP endpoint - apply middleware chain (wrapping order, execution happens in reverse): + // Code wraps: auth+parser → audit → discovery → annotation-enrichment → + // authz → backend-enrichment → MCP-parsing → telemetry + // Execution order: recovery → header-val → auth+parser → audit → + // discovery → annotation-enrichment → authz → backend-enrichment → + // MCP-parsing → telemetry → handler -func (s *Server) wrapMCPHandler(base http.Handler) http.Handler { - mcpHandler := base + var mcpHandler http.Handler = streamableServer if s.config.TelemetryProvider != nil { mcpHandler = s.config.TelemetryProvider.Middleware(s.config.Name, "streamable-http")(mcpHandler) slog.Info("telemetry middleware enabled for MCP endpoints") } - // Apply backend enrichment middleware if audit is configured. - // This runs after discovery populates the routing table, so it can extract backend names. + // Apply MCP parsing middleware to extract JSON-RPC method from request body. + // This runs before telemetry so that recordMetrics can label metrics with the + // actual mcp_method (e.g. "tools/call", "initialize") instead of "unknown". + // Note: ParsingMiddleware is also composed inside the auth middleware (for audit/authz). + // The second application here is a no-op because the context already holds a + // ParsedMCPRequest; it exists only so the telemetry layer works correctly even + // when auth middleware is nil. + mcpHandler = mcpparser.ParsingMiddleware(mcpHandler) + + // Apply backend enrichment middleware if audit is configured + // This runs after discovery populates the routing table, so it can extract backend names if s.config.AuditConfig != nil { mcpHandler = s.backendEnrichmentMiddleware(mcpHandler) slog.Info("backend enrichment middleware enabled for audit events") @@ -656,63 +606,75 @@ func (s *Server) wrapMCPHandler(base http.Handler) http.Handler { if s.config.AuthzMiddleware != nil { mcpHandler = s.config.AuthzMiddleware(mcpHandler) slog.Info("authorization middleware enabled for MCP endpoints (post-discovery)") + } + + // Apply annotation enrichment middleware (runs after discovery, before authz in execution). + // Reads tool annotations from discovered capabilities and injects them into the + // request context so the authz middleware can make annotation-aware decisions. + if s.config.AuthzMiddleware != nil { mcpHandler = AnnotationEnrichmentMiddleware(mcpHandler) slog.Info("annotation enrichment middleware enabled for MCP endpoints") } - healthStatusProvider := s.getHealthStatusProvider() + // Apply discovery middleware (runs after audit/auth middleware) + // Discovery middleware performs per-request capability aggregation with user context. + // vmcpSessionMgr (MultiSessionGetter) is used to retrieve the fully-formed MultiSession + // for subsequent requests so the routing table can be injected into context. + // The backend registry provides a dynamic backend list (supports DynamicRegistry for K8s). + // The health monitor enables filtering based on current health status (respects circuit breaker). + s.healthMonitorMu.RLock() + healthMon := s.healthMonitor + s.healthMonitorMu.RUnlock() + + var healthStatusProvider health.StatusProvider + if healthMon != nil { + healthStatusProvider = healthMon + } mcpHandler = discovery.Middleware( s.discoveryMgr, s.backendRegistry, s.vmcpSessionMgr, healthStatusProvider, discovery.WithSessionScopedRouting(), )(mcpHandler) slog.Info("discovery middleware enabled for lazy per-user capability discovery") - return mcpHandler -} - -func (s *Server) getHealthStatusProvider() health.StatusProvider { - s.healthMonitorMu.RLock() - defer s.healthMonitorMu.RUnlock() - - if s.healthMonitor == nil { - return nil + // Apply audit middleware if configured (runs after auth, before discovery) + if s.config.AuditConfig != nil { + if err := s.config.AuditConfig.Validate(); err != nil { + return nil, fmt.Errorf("invalid audit configuration: %w", err) + } + auditor, err := audit.NewAuditorWithTransport( + s.config.AuditConfig, + "streamable-http", // vMCP uses streamable HTTP transport + ) + if err != nil { + return nil, fmt.Errorf("failed to create auditor: %w", err) + } + mcpHandler = auditor.Middleware(mcpHandler) + slog.Info("audit middleware enabled for MCP endpoints") } - return s.healthMonitor -} -func (s *Server) applyAuditMiddleware(mcpHandler *http.Handler) error { - if s.config.AuditConfig == nil { - return nil - } - if err := s.config.AuditConfig.Validate(); err != nil { - return fmt.Errorf("invalid audit configuration: %w", err) - } - auditor, err := audit.NewAuditorWithTransport( - s.config.AuditConfig, - "streamable-http", // vMCP uses streamable HTTP transport - ) - if err != nil { - return fmt.Errorf("failed to create auditor: %w", err) + // Apply authentication middleware if configured (runs first in chain) + if s.config.AuthMiddleware != nil { + mcpHandler = s.config.AuthMiddleware(mcpHandler) + slog.Info("authentication middleware enabled for MCP endpoints") } - *mcpHandler = auditor.Middleware(*mcpHandler) - slog.Info("audit middleware enabled for MCP endpoints") - return nil -} -func (s *Server) applyRateLimitMiddleware(ctx context.Context, mcpHandler *http.Handler) error { - rateLimitMiddleware, closeRateLimit, err := s.buildRateLimitMiddleware(ctx) - if err != nil { - return err - } - if rateLimitMiddleware == nil { - return nil - } + // Apply Accept header validation (rejects GET requests without Accept: text/event-stream) + mcpHandler = headerValidatingMiddleware(mcpHandler) - *mcpHandler = rateLimitMiddleware(*mcpHandler) - if closeRateLimit != nil { - s.shutdownFuncs = append(s.shutdownFuncs, closeRateLimit) - } - return nil + // Clear the write deadline for qualifying SSE connections (GET + + // Accept: text/event-stream + MCP endpoint path) so the server-level + // WriteTimeout does not kill long-lived SSE streams (see golang/go#16100). + // Non-qualifying requests are left untouched; http.Server.WriteTimeout + // (defaultWriteTimeout) remains in effect for them. + mcpHandler = transportmiddleware.WriteTimeout(s.config.EndpointPath)(mcpHandler) + + // Apply recovery middleware as outermost (catches panics from all inner middleware) + mcpHandler = recovery.Middleware(mcpHandler) + slog.Info("recovery middleware enabled for MCP endpoints") + + mux.Handle("/", mcpHandler) + + return mux, nil } // Start starts the Virtual MCP Server and begins serving requests. diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go index 12cebf47d2..61a24c86be 100644 --- a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go +++ b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go @@ -10,6 +10,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -460,9 +461,30 @@ var _ = Describe("VirtualMCPServer Circuit Breaker Lifecycle", Ordered, func() { backend.Spec.Image = images.YardstickServerImage Expect(k8sClient.Update(ctx, backend)).To(Succeed()) + By("Waiting for backend StatefulSet template to use the fixed image") + Eventually(func() error { + sts := &appsv1.StatefulSet{} + if err := k8sClient.Get(ctx, types.NamespacedName{ + Name: backend2Name, + Namespace: testNamespace, + }, sts); err != nil { + return err + } + for _, container := range sts.Spec.Template.Spec.Containers { + if container.Name == "mcp" { + if container.Image != images.YardstickServerImage { + return fmt.Errorf("statefulset still has image %q", container.Image) + } + return nil + } + } + return fmt.Errorf("mcp container not found in statefulset template") + }, timeout, pollingInterval).Should(Succeed()) + By("Deleting stuck pods to force recreation with fixed image") // Pods in ImagePullBackOff don't automatically recreate when image is fixed - // Delete them to force the statefulset to create new pods with the correct image + // Delete them after the statefulset template is updated, otherwise the old template + // can immediately recreate the pod with the broken image again. podList := &corev1.PodList{} Expect(k8sClient.List(ctx, podList, client.InNamespace(testNamespace), diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go deleted file mode 100644 index de9900ba6c..0000000000 --- a/test/e2e/thv-operator/virtualmcp/virtualmcp_ratelimit_test.go +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. -// SPDX-License-Identifier: Apache-2.0 - -package virtualmcp - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" - "github.com/stacklok/toolhive/test/e2e/images" -) - -var _ = Describe("VirtualMCPServer Rate Limiting", Ordered, func() { - const ( - timeout = 3 * time.Minute - pollingInterval = 1 * time.Second - ) - - var ( - httpClient *http.Client - mcpGroupName string - backendName string - vmcpName string - oidcName string - oidcConfig string - redisName string - oidcNodePort int32 - vmcpNodePort int32 - cleanupOIDC func() - ) - - BeforeAll(func() { - httpClient = &http.Client{Timeout: 30 * time.Second} - - timestamp := time.Now().UnixNano() - mcpGroupName = fmt.Sprintf("e2e-vmcp-rl-group-%d", timestamp) - backendName = fmt.Sprintf("e2e-vmcp-rl-backend-%d", timestamp) - vmcpName = fmt.Sprintf("e2e-vmcp-rl-%d", timestamp) - oidcName = fmt.Sprintf("e2e-vmcp-rl-oidc-%d", timestamp) - oidcConfig = fmt.Sprintf("e2e-vmcp-rl-oidc-config-%d", timestamp) - redisName = fmt.Sprintf("e2e-vmcp-rl-redis-%d", timestamp) - - By("Deploying Redis for vMCP session storage and rate limiting") - deployRedis(redisName) - - By("Deploying parameterized OIDC server for per-user identity") - var issuerURL string - issuerURL, oidcNodePort, cleanupOIDC = DeployParameterizedOIDCServer( - ctx, k8sClient, oidcName, defaultNamespace, timeout, pollingInterval) - - By("Creating MCPOIDCConfig for vMCP incoming auth") - Expect(k8sClient.Create(ctx, &mcpv1beta1.MCPOIDCConfig{ - ObjectMeta: metav1.ObjectMeta{Name: oidcConfig, Namespace: defaultNamespace}, - Spec: mcpv1beta1.MCPOIDCConfigSpec{ - Type: mcpv1beta1.MCPOIDCConfigTypeInline, - Inline: &mcpv1beta1.InlineOIDCSharedConfig{ - Issuer: issuerURL, - InsecureAllowHTTP: true, - JWKSAllowPrivateIP: true, - ProtectedResourceAllowPrivateIP: true, - }, - }, - })).To(Succeed()) - - By("Creating MCPGroup and yardstick backend") - CreateMCPGroupAndWait(ctx, k8sClient, mcpGroupName, defaultNamespace, - "VirtualMCPServer rate limit e2e group", timeout, pollingInterval) - CreateMCPServerAndWait(ctx, k8sClient, backendName, defaultNamespace, mcpGroupName, - images.YardstickServerImage, timeout, pollingInterval) - - By("Creating VirtualMCPServer with per-user rate limit") - redisAddr := fmt.Sprintf("%s.%s.svc.cluster.local:6379", redisName, defaultNamespace) - Expect(k8sClient.Create(ctx, &mcpv1beta1.VirtualMCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: vmcpName, Namespace: defaultNamespace}, - Spec: mcpv1beta1.VirtualMCPServerSpec{ - GroupRef: &mcpv1beta1.MCPGroupRef{Name: mcpGroupName}, - IncomingAuth: &mcpv1beta1.IncomingAuthConfig{ - Type: "oidc", - OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{ - Name: oidcConfig, - Audience: "vmcp-audience", - }, - }, - ServiceType: "NodePort", - SessionStorage: &mcpv1beta1.SessionStorageConfig{ - Provider: mcpv1beta1.SessionStorageProviderRedis, - Address: redisAddr, - }, - RateLimiting: &mcpv1beta1.RateLimitConfig{ - PerUser: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 2, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - }, - }, - })).To(Succeed()) - - By("Waiting for VirtualMCPServer to be ready") - WaitForVirtualMCPServerReady(ctx, k8sClient, vmcpName, defaultNamespace, timeout, pollingInterval) - WaitForCondition(ctx, k8sClient, vmcpName, defaultNamespace, "BackendsDiscovered", "True", timeout, pollingInterval) - vmcpNodePort = GetVMCPNodePort(ctx, k8sClient, vmcpName, defaultNamespace, timeout, pollingInterval) - }) - - AfterAll(func() { - _ = k8sClient.Delete(ctx, &mcpv1beta1.VirtualMCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: vmcpName, Namespace: defaultNamespace}, - }) - _ = k8sClient.Delete(ctx, &mcpv1beta1.MCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: backendName, Namespace: defaultNamespace}, - }) - _ = k8sClient.Delete(ctx, &mcpv1beta1.MCPGroup{ - ObjectMeta: metav1.ObjectMeta{Name: mcpGroupName, Namespace: defaultNamespace}, - }) - _ = k8sClient.Delete(ctx, &mcpv1beta1.MCPOIDCConfig{ - ObjectMeta: metav1.ObjectMeta{Name: oidcConfig, Namespace: defaultNamespace}, - }) - cleanupRedis(redisName) - if cleanupOIDC != nil { - cleanupOIDC() - } - }) - - It("rejects tools/call after the per-user limit is exceeded", func() { - token := getVMCPRateLimitOIDCToken(ctx, httpClient, oidcNodePort, "vmcp-rate-limit-user") - sessionID := sendVMCPRateLimitInitialize(ctx, httpClient, vmcpNodePort, token) - - By("Sending requests within the per-user rate limit") - for i := range 2 { - status, body, _ := sendVMCPRateLimitToolCall(ctx, httpClient, vmcpNodePort, "echo", i+1, token, sessionID) - Expect(status).To(Equal(http.StatusOK), - "request %d should succeed, got status %d: %s", i+1, status, string(body)) - } - - By("Sending request that exceeds the per-user rate limit") - status, body, retryAfter := sendVMCPRateLimitToolCall(ctx, httpClient, vmcpNodePort, "echo", 3, token, sessionID) - Expect(status).To(Equal(http.StatusTooManyRequests), - "third request should be rate limited, body: %s", string(body)) - Expect(retryAfter).ToNot(BeEmpty(), "Retry-After header should be set") - - var rpcResp map[string]any - Expect(json.Unmarshal(body, &rpcResp)).To(Succeed()) - errObj, ok := rpcResp["error"].(map[string]any) - Expect(ok).To(BeTrue(), "response should include JSON-RPC error") - Expect(errObj["code"]).To(BeEquivalentTo(-32029)) - Expect(errObj["message"]).To(Equal("Rate limit exceeded")) - }) -}) - -func getVMCPRateLimitOIDCToken(ctx context.Context, httpClient *http.Client, oidcNodePort int32, subject string) string { - url := fmt.Sprintf("http://localhost:%d/token?subject=%s", oidcNodePort, subject) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil) - Expect(err).ToNot(HaveOccurred()) - - resp, err := httpClient.Do(req) - Expect(err).ToNot(HaveOccurred()) - defer func() { _ = resp.Body.Close() }() - - var tokenResp struct { - AccessToken string `json:"access_token"` - } - Expect(json.NewDecoder(resp.Body).Decode(&tokenResp)).To(Succeed()) - Expect(tokenResp.AccessToken).ToNot(BeEmpty(), "OIDC server should return a token") - return tokenResp.AccessToken -} - -func sendVMCPRateLimitInitialize( - ctx context.Context, httpClient *http.Client, port int32, bearerToken string, -) string { - reqBody := map[string]any{ - "jsonrpc": "2.0", - "id": 0, - "method": "initialize", - "params": map[string]any{ - "protocolVersion": "2025-03-26", - "capabilities": map[string]any{}, - "clientInfo": map[string]any{ - "name": "vmcp-rate-limit-e2e", - "version": "1.0.0", - }, - }, - } - bodyBytes, err := json.Marshal(reqBody) - Expect(err).ToNot(HaveOccurred()) - - url := fmt.Sprintf("http://localhost:%d/mcp", port) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - Expect(err).ToNot(HaveOccurred()) - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Accept", "application/json, text/event-stream") - req.Header.Set("Authorization", "Bearer "+bearerToken) - - resp, err := httpClient.Do(req) - Expect(err).ToNot(HaveOccurred()) - defer func() { _ = resp.Body.Close() }() - Expect(resp.StatusCode).To(Equal(http.StatusOK), "initialize should succeed") - - sessionID := resp.Header.Get("Mcp-Session-Id") - Expect(sessionID).ToNot(BeEmpty(), "initialize response should include Mcp-Session-Id header") - return sessionID -} - -func sendVMCPRateLimitToolCall( - ctx context.Context, - httpClient *http.Client, - port int32, - toolName string, - requestID int, - bearerToken string, - sessionID string, -) (int, []byte, string) { - reqBody := map[string]any{ - "jsonrpc": "2.0", - "id": requestID, - "method": "tools/call", - "params": map[string]any{ - "name": toolName, - "arguments": map[string]any{"input": "test"}, - }, - } - bodyBytes, err := json.Marshal(reqBody) - Expect(err).ToNot(HaveOccurred()) - - url := fmt.Sprintf("http://localhost:%d/mcp", port) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyBytes)) - Expect(err).ToNot(HaveOccurred()) - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Accept", "application/json, text/event-stream") - req.Header.Set("Authorization", "Bearer "+bearerToken) - req.Header.Set("Mcp-Session-Id", sessionID) - - resp, err := httpClient.Do(req) - Expect(err).ToNot(HaveOccurred()) - defer func() { _ = resp.Body.Close() }() - - respBody, err := io.ReadAll(resp.Body) - Expect(err).ToNot(HaveOccurred()) - return resp.StatusCode, respBody, resp.Header.Get("Retry-After") -} From f96d0af3518ba34d8130a2e1d7e37c0d0faa340e Mon Sep 17 00:00:00 2001 From: Sanskarzz Date: Thu, 30 Apr 2026 03:41:59 +0530 Subject: [PATCH 3/4] Address vMCP rate limit review feedback Signed-off-by: Sanskarzz --- .../api/v1beta1/mcpserver_types.go | 22 +-- .../api/v1beta1/mcpserver_types_test.go | 13 +- .../api/v1beta1/zz_generated.deepcopy.go | 12 +- .../virtualmcpserver_vmcpconfig.go | 13 +- .../virtualmcpserver_vmcpconfig_test.go | 23 ++-- cmd/thv-operator/pkg/vmcpconfig/converter.go | 25 ---- .../pkg/vmcpconfig/converter_test.go | 91 ------------- ...irtualmcpserver_sessionstorage_cel_test.go | 2 +- .../toolhive.stacklok.dev_mcpservers.yaml | 128 +++--------------- ...olhive.stacklok.dev_virtualmcpservers.yaml | 128 +++--------------- .../toolhive.stacklok.dev_mcpservers.yaml | 128 +++--------------- ...olhive.stacklok.dev_virtualmcpservers.yaml | 128 +++--------------- docs/operator/crd-api.md | 10 +- docs/server/docs.go | 6 - docs/server/swagger.json | 6 - docs/server/swagger.yaml | 4 - pkg/ratelimit/internal/bucket/bucket.go | 2 +- pkg/ratelimit/limiter.go | 32 +---- pkg/ratelimit/limiter_test.go | 45 +----- pkg/vmcp/config/config.go | 8 -- pkg/vmcp/config/config_test.go | 27 ---- pkg/vmcp/config/zz_generated.deepcopy.go | 4 - 22 files changed, 120 insertions(+), 737 deletions(-) diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go index ad9dc96b0f..35fb76fe5b 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go @@ -505,15 +505,11 @@ type SessionStorageConfig struct { // RateLimitConfig defines rate limiting configuration for an MCP server. // At least one of shared, perUser, or tools must be configured. // -// +kubebuilder:validation:XValidation:rule="has(self.global) || has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of global, shared, perUser, or tools must be configured" +// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured" // -//nolint:lll // CEL validation rules exceed line length limit +//nolint:lll // kubebuilder marker exceeds line length type RateLimitConfig struct { - // Global is a token bucket shared across all users for the entire server. - // +optional - Global *RateLimitBucket `json:"global,omitempty" yaml:"global,omitempty"` - - // Shared is a deprecated alias for Global. Use global instead. + // Shared is a token bucket shared across all users for the entire server. // +optional Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` @@ -534,7 +530,7 @@ type RateLimitConfig struct { } // RateLimitBucket defines a token bucket configuration with a maximum capacity -// and a refill period. Used by both shared (global) and per-user rate limits. +// and a refill period. Used by both shared and per-user rate limits. type RateLimitBucket struct { // MaxTokens is the maximum number of tokens (bucket capacity). // This is also the burst size: the maximum number of requests that can be served @@ -551,9 +547,9 @@ type RateLimitBucket struct { } // ToolRateLimitConfig defines rate limits for a specific tool. -// At least one of global, shared, or perUser must be configured. +// At least one of shared or perUser must be configured. // -// +kubebuilder:validation:XValidation:rule="has(self.global) || has(self.shared) || has(self.perUser)",message="at least one of global, shared, or perUser must be configured" +// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser)",message="at least one of shared or perUser must be configured" // //nolint:lll // kubebuilder marker exceeds line length type ToolRateLimitConfig struct { @@ -562,11 +558,7 @@ type ToolRateLimitConfig struct { // +kubebuilder:validation:MinLength=1 Name string `json:"name" yaml:"name"` - // Global token bucket for this specific tool. - // +optional - Global *RateLimitBucket `json:"global,omitempty" yaml:"global,omitempty"` - - // Shared is a deprecated alias for Global. Use global instead. + // Shared token bucket for this specific tool. // +optional Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go index 25506fb4dd..4a886bb74f 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go @@ -75,13 +75,6 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) { input RateLimitConfig wantJSON string }{ - { - name: "global only", - input: RateLimitConfig{ - Global: &RateLimitBucket{MaxTokens: 100, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - }, - wantJSON: `{"global":{"maxTokens":100,"refillPeriod":"1m0s"}}`, - }, { name: "shared only", input: RateLimitConfig{ @@ -134,7 +127,7 @@ func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) { Address: "redis.default.svc.cluster.local:6379", }, RateLimiting: &RateLimitConfig{ - Global: &RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + Shared: &RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, PerUser: &RateLimitBucket{ MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}, @@ -142,7 +135,7 @@ func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) { Tools: []ToolRateLimitConfig{ { Name: "backend_a_echo", - Global: &RateLimitBucket{ + Shared: &RateLimitBucket{ MaxTokens: 5, RefillPeriod: metav1.Duration{Duration: 30 * time.Second}, }, @@ -155,7 +148,7 @@ func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) { require.NoError(t, err) out := string(b) assert.Contains(t, out, `"rateLimiting"`) - assert.Contains(t, out, `"global"`) + assert.Contains(t, out, `"shared"`) assert.Contains(t, out, `"perUser"`) assert.Contains(t, out, `"backend_a_echo"`) assert.NotContains(t, out, `"config":{"rateLimiting"`) diff --git a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go index fd3706419d..6a58686950 100644 --- a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go @@ -23,7 +23,7 @@ package v1beta1 import ( corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -2210,11 +2210,6 @@ func (in *RateLimitBucket) DeepCopy() *RateLimitBucket { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RateLimitConfig) DeepCopyInto(out *RateLimitConfig) { *out = *in - if in.Global != nil { - in, out := &in.Global, &out.Global - *out = new(RateLimitBucket) - **out = **in - } if in.Shared != nil { in, out := &in.Shared, &out.Shared *out = new(RateLimitBucket) @@ -2669,11 +2664,6 @@ func (in *ToolOverride) DeepCopy() *ToolOverride { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ToolRateLimitConfig) DeepCopyInto(out *ToolRateLimitConfig) { *out = *in - if in.Global != nil { - in, out := &in.Global, &out.Global - *out = new(RateLimitBucket) - **out = **in - } if in.Shared != nil { in, out := &in.Shared, &out.Shared *out = new(RateLimitBucket) diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go index b4c4956371..7a37984b56 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go @@ -82,9 +82,14 @@ func (r *VirtualMCPServerReconciler) ensureVmcpConfigConfigMap( } // Marshal the serializable Config to YAML for storage in ConfigMap. + // RateLimiting is a top-level VirtualMCPServer spec field, not part of + // spec.config, so add it only when writing the runtime ConfigMap. // Note: gopkg.in/yaml.v3 produces deterministic output by sorting map keys alphabetically. // This ensures stable checksums for triggering pod rollouts only when content actually changes. - vmcpConfigYAML, err := yaml.Marshal(config) + vmcpConfigYAML, err := yaml.Marshal(vmcpConfigMapConfig{ + Config: *config, + RateLimiting: vmcp.Spec.RateLimiting, + }) if err != nil { return fmt.Errorf("failed to marshal vmcp config: %w", err) } @@ -130,6 +135,12 @@ func (r *VirtualMCPServerReconciler) ensureVmcpConfigConfigMap( return nil } +type vmcpConfigMapConfig struct { + vmcpconfig.Config `yaml:",inline"` + + RateLimiting *mcpv1beta1.RateLimitConfig `yaml:"rateLimiting,omitempty"` +} + // populateOptimizerEmbeddingService wires the EmbeddingServer URL into the optimizer // config and emits warnings for non-recommended configurations. // diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go index e671e9083b..5e7c095cc5 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go @@ -471,7 +471,7 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { Tools: []mcpv1beta1.ToolRateLimitConfig{ { Name: "backend_a_echo", - Global: &mcpv1beta1.RateLimitBucket{ + Shared: &mcpv1beta1.RateLimitBucket{ MaxTokens: 5, RefillPeriod: metav1.Duration{Duration: time.Minute}, }, @@ -527,23 +527,16 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { assert.Contains(t, cm.Data, "config.yaml") assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"]) - var cfg vmcpconfig.Config + var cfg vmcpConfigMapConfig require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg)) require.NotNil(t, cfg.RateLimiting, "runtime config must include spec.rateLimiting") - rateLimiting := cfg.RateLimiting.Get() - perUser, ok := rateLimiting["perUser"].(map[string]any) - require.True(t, ok) - assert.EqualValues(t, 2, perUser["maxTokens"]) - tools, ok := rateLimiting["tools"].([]any) - require.True(t, ok) - require.Len(t, tools, 1) - tool, ok := tools[0].(map[string]any) - require.True(t, ok) - assert.Equal(t, "backend_a_echo", tool["name"]) - global, ok := tool["global"].(map[string]any) - require.True(t, ok) - assert.EqualValues(t, 5, global["maxTokens"]) + require.NotNil(t, cfg.RateLimiting.PerUser) + assert.EqualValues(t, 2, cfg.RateLimiting.PerUser.MaxTokens) + require.Len(t, cfg.RateLimiting.Tools, 1) + assert.Equal(t, "backend_a_echo", cfg.RateLimiting.Tools[0].Name) + require.NotNil(t, cfg.RateLimiting.Tools[0].Shared) + assert.EqualValues(t, 5, cfg.RateLimiting.Tools[0].Shared.MaxTokens) } // TestSetAuthConfigConditions tests that auth config conditions reflect the current state diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter.go b/cmd/thv-operator/pkg/vmcpconfig/converter.go index 1b0a5ae140..cd31af6d69 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter.go @@ -6,7 +6,6 @@ package vmcpconfig import ( "context" - "encoding/json" "fmt" "github.com/go-logr/logr" @@ -20,7 +19,6 @@ import ( "github.com/stacklok/toolhive/cmd/thv-operator/pkg/oidc" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/spectoconfig" "github.com/stacklok/toolhive/pkg/authserver" - thvjson "github.com/stacklok/toolhive/pkg/json" "github.com/stacklok/toolhive/pkg/telemetry" "github.com/stacklok/toolhive/pkg/vmcp/auth/converters" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" @@ -142,13 +140,6 @@ func (c *Converter) Convert( } config.SessionStorage = convertSessionStorage(vmcp) - if vmcp.Spec.RateLimiting != nil { - rateLimiting, err := convertRateLimiting(vmcp.Spec.RateLimiting) - if err != nil { - return nil, nil, fmt.Errorf("failed to convert rate limiting: %w", err) - } - config.RateLimiting = rateLimiting - } // Apply operational defaults (fills missing values) config.EnsureOperationalDefaults() @@ -166,22 +157,6 @@ func (c *Converter) Convert( return config, authServerRC, nil } -func convertRateLimiting(rateLimiting *mcpv1beta1.RateLimitConfig) (*thvjson.Map, error) { - if rateLimiting == nil { - return nil, nil - } - raw, err := json.Marshal(rateLimiting) - if err != nil { - return nil, err - } - var value map[string]any - if err := json.Unmarshal(raw, &value); err != nil { - return nil, err - } - converted := thvjson.NewMap(value) - return &converted, nil -} - // convertIncomingAuth converts IncomingAuthConfig from CRD to vmcp config. func (c *Converter) convertIncomingAuth( ctx context.Context, diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go index 6e195e9b9e..cee72256af 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go @@ -97,97 +97,6 @@ func newTestConverterWithObjects(t *testing.T, resolver oidc.Resolver, objects . return converter } -func TestConverter_RateLimitingFromTopLevelSpec(t *testing.T) { - t.Parallel() - - vmcp := &mcpv1beta1.VirtualMCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: "test-vmcp", Namespace: "default"}, - Spec: mcpv1beta1.VirtualMCPServerSpec{ - GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, - IncomingAuth: &mcpv1beta1.IncomingAuthConfig{Type: "anonymous"}, - RateLimiting: &mcpv1beta1.RateLimitConfig{ - Global: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 10, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - Tools: []mcpv1beta1.ToolRateLimitConfig{ - { - Name: "backend_a_echo", - Global: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 1, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - }, - }, - }, - }, - } - - converter := newTestConverter(t, newNoOpMockResolver(t)) - cfg, _, err := converter.Convert(context.Background(), vmcp, nil) - require.NoError(t, err) - require.NotNil(t, cfg.RateLimiting) - - rateLimiting := cfg.RateLimiting.Get() - global, ok := rateLimiting["global"].(map[string]any) - require.True(t, ok) - assert.EqualValues(t, 10, global["maxTokens"]) - tools, ok := rateLimiting["tools"].([]any) - require.True(t, ok) - require.Len(t, tools, 1) - tool, ok := tools[0].(map[string]any) - require.True(t, ok) - assert.Equal(t, "backend_a_echo", tool["name"]) -} - -func TestConverter_RateLimitingPreservesSharedAliasAndCopiesInput(t *testing.T) { - t.Parallel() - - vmcp := &mcpv1beta1.VirtualMCPServer{ - ObjectMeta: metav1.ObjectMeta{Name: "test-vmcp", Namespace: "default"}, - Spec: mcpv1beta1.VirtualMCPServerSpec{ - GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, - IncomingAuth: &mcpv1beta1.IncomingAuthConfig{Type: "anonymous"}, - RateLimiting: &mcpv1beta1.RateLimitConfig{ - Shared: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 10, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - Tools: []mcpv1beta1.ToolRateLimitConfig{ - { - Name: "backend_a_echo", - Shared: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 1, - RefillPeriod: metav1.Duration{Duration: time.Second}, - }, - }, - }, - }, - }, - } - - converter := newTestConverter(t, newNoOpMockResolver(t)) - cfg, _, err := converter.Convert(context.Background(), vmcp, nil) - require.NoError(t, err) - require.NotNil(t, cfg.RateLimiting) - - vmcp.Spec.RateLimiting.Shared.MaxTokens = 99 - vmcp.Spec.RateLimiting.Tools[0].Shared.MaxTokens = 88 - - rateLimiting := cfg.RateLimiting.Get() - shared, ok := rateLimiting["shared"].(map[string]any) - require.True(t, ok) - assert.EqualValues(t, 10, shared["maxTokens"]) - tools, ok := rateLimiting["tools"].([]any) - require.True(t, ok) - require.Len(t, tools, 1) - tool, ok := tools[0].(map[string]any) - require.True(t, ok) - toolShared, ok := tool["shared"].(map[string]any) - require.True(t, ok) - assert.EqualValues(t, 1, toolShared["maxTokens"]) -} - func TestConverter_OIDCResolution(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go index dc00607240..6a74efd2aa 100644 --- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go +++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go @@ -113,7 +113,7 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer", It("should reject rate limiting without redis session storage", func() { vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-no-redis", nil) vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ - Global: &mcpv1beta1.RateLimitBucket{ + Shared: &mcpv1beta1.RateLimitBucket{ MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}, }, diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml index 33aa0febec..44eb6f7a02 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml @@ -320,28 +320,6 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -368,8 +346,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -397,29 +375,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -448,8 +405,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -473,19 +429,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. @@ -1215,28 +1169,6 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -1263,8 +1195,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -1292,29 +1224,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -1343,8 +1254,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -1368,19 +1278,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml index bbc8df3509..913eb6ca9e 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -2247,28 +2247,6 @@ spec: RateLimiting defines rate limiting configuration for the Virtual MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -2295,8 +2273,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -2324,29 +2302,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -2375,8 +2332,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -2400,19 +2356,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -4925,28 +4879,6 @@ spec: RateLimiting defines rate limiting configuration for the Virtual MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -4973,8 +4905,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -5002,29 +4934,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -5053,8 +4964,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -5078,19 +4988,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml index 8f07967a66..a248508d41 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml @@ -323,28 +323,6 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -371,8 +349,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -400,29 +378,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -451,8 +408,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -476,19 +432,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. @@ -1218,28 +1172,6 @@ spec: RateLimiting defines rate limiting configuration for the MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -1266,8 +1198,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -1295,29 +1227,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -1346,8 +1257,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -1371,19 +1281,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of proxy runner (thv run) pod replicas. diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml index cc804a054c..1429da0fbe 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -2250,28 +2250,6 @@ spec: RateLimiting defines rate limiting configuration for the Virtual MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -2298,8 +2276,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -2327,29 +2305,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -2378,8 +2335,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -2403,19 +2359,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -4928,28 +4882,6 @@ spec: RateLimiting defines rate limiting configuration for the Virtual MCP server. Requires Redis session storage to be configured for distributed rate limiting. properties: - global: - description: Global is a token bucket shared across all users - for the entire server. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object perUser: description: |- PerUser is a token bucket applied independently to each authenticated user @@ -4976,8 +4908,8 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use global - instead. + description: Shared is a token bucket shared across all users + for the entire server. properties: maxTokens: description: |- @@ -5005,29 +4937,8 @@ spec: items: description: |- ToolRateLimitConfig defines rate limits for a specific tool. - At least one of global, shared, or perUser must be configured. + At least one of shared or perUser must be configured. properties: - global: - description: Global token bucket for this specific tool. - properties: - maxTokens: - description: |- - MaxTokens is the maximum number of tokens (bucket capacity). - This is also the burst size: the maximum number of requests that can be served - instantaneously before the bucket is depleted. - format: int32 - minimum: 1 - type: integer - refillPeriod: - description: |- - RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. - The effective refill rate is maxTokens / refillPeriod tokens per second. - Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). - type: string - required: - - maxTokens - - refillPeriod - type: object name: description: Name is the MCP tool name this limit applies to. @@ -5056,8 +4967,7 @@ spec: - refillPeriod type: object shared: - description: Shared is a deprecated alias for Global. Use - global instead. + description: Shared token bucket for this specific tool. properties: maxTokens: description: |- @@ -5081,19 +4991,17 @@ spec: - name type: object x-kubernetes-validations: - - message: at least one of global, shared, or perUser must be - configured - rule: has(self.global) || has(self.shared) || has(self.perUser) + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) type: array x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map type: object x-kubernetes-validations: - - message: at least one of global, shared, perUser, or tools must - be configured - rule: has(self.global) || has(self.shared) || has(self.perUser) - || (has(self.tools) && size(self.tools) > 0) + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index 2b66ac030d..9edb64f5ba 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -2687,7 +2687,7 @@ _Appears in:_ RateLimitBucket defines a token bucket configuration with a maximum capacity -and a refill period. Used by both shared (global) and per-user rate limits. +and a refill period. Used by both shared and per-user rate limits. @@ -2716,8 +2716,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `global` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Global is a token bucket shared across all users for the entire server. | | Optional: \{\}
| -| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared is a deprecated alias for Global. Use global instead. | | Optional: \{\}
| +| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared is a token bucket shared across all users for the entire server. | | Optional: \{\}
| | `perUser` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | PerUser is a token bucket applied independently to each authenticated user
at the server level. Requires authentication to be enabled.
Each unique userID creates Redis keys that expire after 2x refillPeriod.
Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. | | Optional: \{\}
| | `tools` _[api.v1beta1.ToolRateLimitConfig](#apiv1beta1toolratelimitconfig) array_ | Tools defines per-tool rate limit overrides.
Each entry applies additional rate limits to calls targeting a specific tool name.
A request must pass both the server-level limit and the per-tool limit. | | Optional: \{\}
| @@ -3132,7 +3131,7 @@ _Appears in:_ ToolRateLimitConfig defines rate limits for a specific tool. -At least one of global, shared, or perUser must be configured. +At least one of shared or perUser must be configured. @@ -3142,8 +3141,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `name` _string_ | Name is the MCP tool name this limit applies to. | | MinLength: 1
Required: \{\}
| -| `global` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Global token bucket for this specific tool. | | Optional: \{\}
| -| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared is a deprecated alias for Global. Use global instead. | | Optional: \{\}
| +| `shared` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | Shared token bucket for this specific tool. | | Optional: \{\}
| | `perUser` _[api.v1beta1.RateLimitBucket](#apiv1beta1ratelimitbucket)_ | PerUser token bucket configuration for this tool. | | Optional: \{\}
| diff --git a/docs/server/docs.go b/docs/server/docs.go index bad149a9d7..a108f6087f 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -60,9 +60,6 @@ const docTemplate = `{ "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitConfig": { "description": "RateLimitConfig contains the CRD rate limiting configuration.\nWhen set, rate limiting middleware is added to the proxy middleware chain.", "properties": { - "global": { - "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" - }, "perUser": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" }, @@ -82,9 +79,6 @@ const docTemplate = `{ }, "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.ToolRateLimitConfig": { "properties": { - "global": { - "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" - }, "name": { "description": "Name is the MCP tool name this limit applies to.\n+kubebuilder:validation:Required\n+kubebuilder:validation:MinLength=1", "type": "string" diff --git a/docs/server/swagger.json b/docs/server/swagger.json index 6269efbf3d..8a5d8df9b5 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -53,9 +53,6 @@ "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitConfig": { "description": "RateLimitConfig contains the CRD rate limiting configuration.\nWhen set, rate limiting middleware is added to the proxy middleware chain.", "properties": { - "global": { - "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" - }, "perUser": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" }, @@ -75,9 +72,6 @@ }, "github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.ToolRateLimitConfig": { "properties": { - "global": { - "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket" - }, "name": { "description": "Name is the MCP tool name this limit applies to.\n+kubebuilder:validation:Required\n+kubebuilder:validation:MinLength=1", "type": "string" diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index 2799158f5c..bdc1cac8a0 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -52,8 +52,6 @@ components: RateLimitConfig contains the CRD rate limiting configuration. When set, rate limiting middleware is added to the proxy middleware chain. properties: - global: - $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket' perUser: $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket' shared: @@ -73,8 +71,6 @@ components: type: object github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.ToolRateLimitConfig: properties: - global: - $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1beta1.RateLimitBucket' name: description: |- Name is the MCP tool name this limit applies to. diff --git a/pkg/ratelimit/internal/bucket/bucket.go b/pkg/ratelimit/internal/bucket/bucket.go index 28903bcdcd..d68a1709c8 100644 --- a/pkg/ratelimit/internal/bucket/bucket.go +++ b/pkg/ratelimit/internal/bucket/bucket.go @@ -90,7 +90,7 @@ type TokenBucket struct { } // New creates a TokenBucket. The Redis key is derived from namespace, server -// name, and suffix (e.g., "global" or "global:tool:search"). +// name, and suffix (e.g., "shared" or "shared:tool:search"). func New(namespace, serverName, suffix string, maxTokens int32, refillPeriod time.Duration) *TokenBucket { refillSec := refillPeriod.Seconds() return &TokenBucket{ diff --git a/pkg/ratelimit/limiter.go b/pkg/ratelimit/limiter.go index 6679d6db4a..3490c1cbaa 100644 --- a/pkg/ratelimit/limiter.go +++ b/pkg/ratelimit/limiter.go @@ -46,10 +46,10 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateL l := &limiter{client: client} - if global, suffix := serverGlobalBucket(crd); global != nil { - b, err := newBucket(namespace, name, suffix, global) + if crd.Shared != nil { + b, err := newBucket(namespace, name, "shared", crd.Shared) if err != nil { - return nil, fmt.Errorf("%s bucket: %w", suffix, err) + return nil, fmt.Errorf("shared bucket: %w", err) } l.serverBucket = b } @@ -63,10 +63,10 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateL } for _, t := range crd.Tools { - if global, suffix := toolGlobalBucket(&t); global != nil { - b, err := newBucket(namespace, name, suffix+":tool:"+t.Name, global) + if t.Shared != nil { + b, err := newBucket(namespace, name, "shared:tool:"+t.Name, t.Shared) if err != nil { - return nil, fmt.Errorf("tool %q %s bucket: %w", t.Name, suffix, err) + return nil, fmt.Errorf("tool %q shared bucket: %w", t.Name, err) } if l.toolBuckets == nil { l.toolBuckets = make(map[string]*bucket.TokenBucket) @@ -88,26 +88,6 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1beta1.RateL return l, nil } -func serverGlobalBucket(crd *v1beta1.RateLimitConfig) (*v1beta1.RateLimitBucket, string) { - if crd.Global != nil { - return crd.Global, "global" - } - if crd.Shared != nil { - return crd.Shared, "shared" - } - return nil, "" -} - -func toolGlobalBucket(tool *v1beta1.ToolRateLimitConfig) (*v1beta1.RateLimitBucket, string) { - if tool.Global != nil { - return tool.Global, "global" - } - if tool.Shared != nil { - return tool.Shared, "shared" - } - return nil, "" -} - // bucketSpec holds deferred bucket parameters for per-user buckets that are // created on the fly in Allow() because the userID is not known at construction time. type bucketSpec struct { diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go index aea2b1636a..147e22cd7e 100644 --- a/pkg/ratelimit/limiter_test.go +++ b/pkg/ratelimit/limiter_test.go @@ -70,7 +70,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) { assert.Contains(t, err.Error(), "refillPeriod must be positive") } -func TestLimiter_ServerGlobalExhausted(t *testing.T) { +func TestLimiter_ServerSharedExhausted(t *testing.T) { t.Parallel() client, _ := newTestClient(t) ctx := t.Context() @@ -93,7 +93,7 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) { assert.Greater(t, d.RetryAfter, time.Duration(0)) } -func TestLimiter_SharedAliasUsesLegacyRedisKeys(t *testing.T) { +func TestLimiter_SharedUsesRedisKeys(t *testing.T) { t.Parallel() client, _ := newTestClient(t) ctx := t.Context() @@ -114,45 +114,10 @@ func TestLimiter_SharedAliasUsesLegacyRedisKeys(t *testing.T) { require.NoError(t, err) require.True(t, d.Allowed) - legacyServerKey := "thv:rl:{ns:srv}:shared" - legacyToolKey := "thv:rl:{ns:srv}:shared:tool:search" - newServerKey := "thv:rl:{ns:srv}:global" - newToolKey := "thv:rl:{ns:srv}:global:tool:search" + serverKey := "thv:rl:{ns:srv}:shared" + toolKey := "thv:rl:{ns:srv}:shared:tool:search" - exists, err := client.Exists(ctx, legacyServerKey, legacyToolKey).Result() - require.NoError(t, err) - assert.Equal(t, int64(2), exists) - - exists, err = client.Exists(ctx, newServerKey, newToolKey).Result() - require.NoError(t, err) - assert.Equal(t, int64(0), exists) -} - -func TestLimiter_GlobalUsesGlobalRedisKeys(t *testing.T) { - t.Parallel() - client, _ := newTestClient(t) - ctx := t.Context() - - crd := &v1beta1.RateLimitConfig{ - Global: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - Tools: []v1beta1.ToolRateLimitConfig{ - { - Name: "search", - Global: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, - }, - }, - } - l, err := NewLimiter(client, "ns", "srv", crd) - require.NoError(t, err) - - d, err := l.Allow(ctx, "search", "") - require.NoError(t, err) - require.True(t, d.Allowed) - - exists, err := client.Exists(ctx, - "thv:rl:{ns:srv}:global", - "thv:rl:{ns:srv}:global:tool:search", - ).Result() + exists, err := client.Exists(ctx, serverKey, toolKey).Result() require.NoError(t, err) assert.Equal(t, int64(2), exists) } diff --git a/pkg/vmcp/config/config.go b/pkg/vmcp/config/config.go index 4d46cf9032..7a2c699290 100644 --- a/pkg/vmcp/config/config.go +++ b/pkg/vmcp/config/config.go @@ -173,14 +173,6 @@ type Config struct { // the THV_SESSION_REDIS_PASSWORD environment variable. // +optional SessionStorage *SessionStorageConfig `json:"sessionStorage,omitempty" yaml:"sessionStorage,omitempty"` - - // RateLimiting configures request rate limits for tools/call. - // Kubernetes users should set VirtualMCPServer.spec.rateLimiting; the operator - // copies that top-level field here for the vMCP runtime. - // This remains YAML-only so it can be written to the runtime config ConfigMap - // without exposing spec.config.rateLimiting in the Kubernetes API. - // +optional - RateLimiting *thvjson.Map `json:"-" yaml:"rateLimiting,omitempty"` } // IncomingAuthConfig configures client authentication to the virtual MCP server. diff --git a/pkg/vmcp/config/config_test.go b/pkg/vmcp/config/config_test.go index da9e88aeda..5f652ebce0 100644 --- a/pkg/vmcp/config/config_test.go +++ b/pkg/vmcp/config/config_test.go @@ -4,7 +4,6 @@ package config import ( - "encoding/json" "fmt" "os" "path/filepath" @@ -16,36 +15,10 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" - thvjson "github.com/stacklok/toolhive/pkg/json" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" ) -func TestConfigRateLimitingIsYAMLOnly(t *testing.T) { - t.Parallel() - - rateLimiting := thvjson.NewMap(map[string]any{ - "global": map[string]any{ - "maxTokens": 10, - "refillPeriod": "1m0s", - }, - }) - cfg := Config{ - Name: "test-vmcp", - RateLimiting: &rateLimiting, - } - - jsonBytes, err := json.Marshal(cfg) - require.NoError(t, err) - assert.NotContains(t, string(jsonBytes), "rateLimiting") - - yamlBytes, err := yaml.Marshal(cfg) - require.NoError(t, err) - assert.Contains(t, string(yamlBytes), "rateLimiting:") - assert.Contains(t, string(yamlBytes), "maxTokens: 10") -} - func TestOutgoingAuthConfig_ResolveForBackend(t *testing.T) { t.Parallel() diff --git a/pkg/vmcp/config/zz_generated.deepcopy.go b/pkg/vmcp/config/zz_generated.deepcopy.go index 018d6cabdd..80861bff11 100644 --- a/pkg/vmcp/config/zz_generated.deepcopy.go +++ b/pkg/vmcp/config/zz_generated.deepcopy.go @@ -204,10 +204,6 @@ func (in *Config) DeepCopyInto(out *Config) { *out = new(SessionStorageConfig) **out = **in } - if in.RateLimiting != nil { - in, out := &in.RateLimiting, &out.RateLimiting - *out = (*in).DeepCopy() - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. From dc566e1c6b1d1859e47fea2acfcad95d1002de1f Mon Sep 17 00:00:00 2001 From: Sanskarzz Date: Sat, 2 May 2026 21:28:39 +0530 Subject: [PATCH 4/4] removed wrapper and reverted path marshall directly Signed-off-by: Sanskarzz --- .../virtualmcpserver_vmcpconfig.go | 13 +------- .../virtualmcpserver_vmcpconfig_test.go | 31 ++----------------- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go index 7a37984b56..b4c4956371 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig.go @@ -82,14 +82,9 @@ func (r *VirtualMCPServerReconciler) ensureVmcpConfigConfigMap( } // Marshal the serializable Config to YAML for storage in ConfigMap. - // RateLimiting is a top-level VirtualMCPServer spec field, not part of - // spec.config, so add it only when writing the runtime ConfigMap. // Note: gopkg.in/yaml.v3 produces deterministic output by sorting map keys alphabetically. // This ensures stable checksums for triggering pod rollouts only when content actually changes. - vmcpConfigYAML, err := yaml.Marshal(vmcpConfigMapConfig{ - Config: *config, - RateLimiting: vmcp.Spec.RateLimiting, - }) + vmcpConfigYAML, err := yaml.Marshal(config) if err != nil { return fmt.Errorf("failed to marshal vmcp config: %w", err) } @@ -135,12 +130,6 @@ func (r *VirtualMCPServerReconciler) ensureVmcpConfigConfigMap( return nil } -type vmcpConfigMapConfig struct { - vmcpconfig.Config `yaml:",inline"` - - RateLimiting *mcpv1beta1.RateLimitConfig `yaml:"rateLimiting,omitempty"` -} - // populateOptimizerEmbeddingService wires the EmbeddingServer URL into the optimizer // config and emits warnings for non-recommended configurations. // diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go index 5e7c095cc5..2da5a24a45 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go @@ -459,25 +459,6 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { }, Spec: mcpv1beta1.VirtualMCPServerSpec{ GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, - SessionStorage: &mcpv1beta1.SessionStorageConfig{ - Provider: mcpv1beta1.SessionStorageProviderRedis, - Address: "redis.default.svc.cluster.local:6379", - }, - RateLimiting: &mcpv1beta1.RateLimitConfig{ - PerUser: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 2, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - Tools: []mcpv1beta1.ToolRateLimitConfig{ - { - Name: "backend_a_echo", - Shared: &mcpv1beta1.RateLimitBucket{ - MaxTokens: 5, - RefillPeriod: metav1.Duration{Duration: time.Minute}, - }, - }, - }, - }, }, } @@ -527,16 +508,10 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { assert.Contains(t, cm.Data, "config.yaml") assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"]) - var cfg vmcpConfigMapConfig + var cfg vmcpconfig.Config require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg)) - require.NotNil(t, cfg.RateLimiting, "runtime config must include spec.rateLimiting") - - require.NotNil(t, cfg.RateLimiting.PerUser) - assert.EqualValues(t, 2, cfg.RateLimiting.PerUser.MaxTokens) - require.Len(t, cfg.RateLimiting.Tools, 1) - assert.Equal(t, "backend_a_echo", cfg.RateLimiting.Tools[0].Name) - require.NotNil(t, cfg.RateLimiting.Tools[0].Shared) - assert.EqualValues(t, 5, cfg.RateLimiting.Tools[0].Shared.MaxTokens) + assert.Equal(t, "test-vmcp", cfg.Name) + assert.Equal(t, "test-group", cfg.Group) } // TestSetAuthConfigConditions tests that auth config conditions reflect the current state